예제 #1
0
def GetReferenceList(seedArticle, databaseFile = None, graphFile = None):
    global global_identification_value
    seedArticle.link = seedArticle.link.replace('articleDetails', 'abstractReferences')
    html =  GetHTMLFromLink(driver, seedArticle.link)
    references = GetReferencesFromHTML(html)
    articleList = []
    
    for ref in references:
        try :
            article = Article()
            article.title = GetTitleFromRef(ref)
            html = GetHTMLSearchIEEEByName(webdriver.Firefox(), article.title)
            article.link = GetSearchLinkFromArticleName(html, article.title)
            article.identification = parseIdentificationFromLink(article.link)
            
            
            articleList.append(article)
#            if (databaseFile is not None and graphFile is not None):
#                AppendDatabaseFromMap([article], databaseFile, graphFile)
            print article.identification
            print article.title
            print "\n"
        except :
            continue

    return articleList
예제 #2
0
def StartFromSeed(seedLink, seedTitle):  
    SeedArticle = Article()
    SeedArticle.link = seedLink
    SeedArticle.title = seedTitle
    SeedArticle.identification = parseIdentificationFromLink(SeedArticle.link)
    
    databasefile = "database.csv"
    graphfile = "graph.csv"
    
    SeedArticle.references = GetReferenceList(SeedArticle, databaseFile = databasefile, graphFile = graphfile)
    mapToInsert = {}
    mapToInsert[SeedArticle.identification] = SeedArticle
    for art in SeedArticle.references:
        mapToInsert[art.identification] = art
        
    AppendDatabaseFromMap(mapToInsert, databasefile, graphfile)

    print 'done'