class EcoLexCrawl(object): def __init__(self, gApi = None): if gApi != None: self.gApi = defineApi(gApi) else: self.gApi = gApi while self.gApi == None: self.gApi = defineApi() self.firstTime() self.search = EcoLexSearch() self.entriesAdded = 0 self.enterNewResults() def firstTime(self): """Prompts user about creating a new data base.""" first = raw_input("Is this the first time you run EcoLex Crawler? 1. 'Yes', 2. 'No' >> " ) if first == '1': self.firsttime = True else: self.firsttime = False def enterResults(self,links): """Pass results to database""" for link in links: newentry = EcoLexResult(link,self.gApi) addEntry(newentry,self.firsttime) if self.firsttime == True: self.firsttime = False self.entriesAdded = self.entriesAdded + 1 def enterNewResults(self): """Checks whether scraped entries are in database already and passes new ones to database""" links = self.search.entrylinks if not self.firsttime: for link in links: start = link.rfind('id=') + 3 end = link.rfind('&',start) ecolexID = link[start:end] session = startSession() check = session.query(addEcoLexEntry.EcoLexEntry).filter(addEcoLexEntry.EcoLexEntry.ecolex_id == ecolexID ).count() session.commit() if check == 1: links.remove(link) else: pass self.enterResults(links) def printReport(self): """Prints a report of the specified search.""" self.search.printSearchInput() print "This specification led to %i new entries in your data base." % self.entriesAdded
def __init__(self, gApi = None): if gApi != None: self.gApi = defineApi(gApi) else: self.gApi = gApi while self.gApi == None: self.gApi = defineApi() self.firstTime() self.search = EcoLexSearch() self.entriesAdded = 0 self.enterNewResults()