Exemple #1
0
class EcoLexCrawl(object):
    def __init__(self, gApi = None):
        if gApi != None:
            self.gApi = defineApi(gApi)
        else:
            self.gApi = gApi
        while self.gApi == None:
            self.gApi = defineApi()

        self.firstTime()
        self.search = EcoLexSearch()
        self.entriesAdded = 0
        self.enterNewResults()


    def firstTime(self):
        """Prompts user about creating a new data base."""
        first = raw_input("Is this the first time you run EcoLex Crawler? 1. 'Yes', 2. 'No' >> " )
        if first == '1':
            self.firsttime = True
        else:
            self.firsttime = False

    def enterResults(self,links):
        """Pass results to database"""
        for link in links:
            newentry = EcoLexResult(link,self.gApi)
            addEntry(newentry,self.firsttime)
            if self.firsttime == True:
                self.firsttime = False
            self.entriesAdded = self.entriesAdded + 1
        
    def enterNewResults(self):
        """Checks whether scraped entries are in database already and passes new ones to database"""
        links = self.search.entrylinks
        if not self.firsttime:
            for link in links:
                start = link.rfind('id=') + 3
                end = link.rfind('&',start)
                ecolexID = link[start:end]
                session = startSession()
                check = session.query(addEcoLexEntry.EcoLexEntry).filter(addEcoLexEntry.EcoLexEntry.ecolex_id == ecolexID ).count()
                session.commit()
                if check == 1:
                    links.remove(link)
                else:
                    pass
                
        self.enterResults(links)

    def printReport(self):
        """Prints a report of the specified search."""
        self.search.printSearchInput()
        print "This specification led to %i new entries in your data base." % self.entriesAdded
Exemple #2
0
    def __init__(self, gApi = None):
        if gApi != None:
            self.gApi = defineApi(gApi)
        else:
            self.gApi = gApi
        while self.gApi == None:
            self.gApi = defineApi()

        self.firstTime()
        self.search = EcoLexSearch()
        self.entriesAdded = 0
        self.enterNewResults()