def GetReferenceList(seedArticle, databaseFile = None, graphFile = None): global global_identification_value seedArticle.link = seedArticle.link.replace('articleDetails', 'abstractReferences') html = GetHTMLFromLink(driver, seedArticle.link) references = GetReferencesFromHTML(html) articleList = [] for ref in references: try : article = Article() article.title = GetTitleFromRef(ref) html = GetHTMLSearchIEEEByName(webdriver.Firefox(), article.title) article.link = GetSearchLinkFromArticleName(html, article.title) article.identification = parseIdentificationFromLink(article.link) articleList.append(article) # if (databaseFile is not None and graphFile is not None): # AppendDatabaseFromMap([article], databaseFile, graphFile) print article.identification print article.title print "\n" except : continue return articleList
def StartFromSeed(seedLink, seedTitle): SeedArticle = Article() SeedArticle.link = seedLink SeedArticle.title = seedTitle SeedArticle.identification = parseIdentificationFromLink(SeedArticle.link) databasefile = "database.csv" graphfile = "graph.csv" SeedArticle.references = GetReferenceList(SeedArticle, databaseFile = databasefile, graphFile = graphfile) mapToInsert = {} mapToInsert[SeedArticle.identification] = SeedArticle for art in SeedArticle.references: mapToInsert[art.identification] = art AppendDatabaseFromMap(mapToInsert, databasefile, graphfile) print 'done'