コード例 #1
0
ファイル: EcoLexMain.py プロジェクト: mace84/EcolexCrawler
    def __init__(self, gApi = None):
        if gApi != None:
            self.gApi = defineApi(gApi)
        else:
            self.gApi = gApi
        while self.gApi == None:
            self.gApi = defineApi()

        self.firstTime()
        self.search = EcoLexSearch()
        self.entriesAdded = 0
        self.enterNewResults()
コード例 #2
0
    def __init__(self,entryurl, gApi = None):
        #check for google api key, request if necessary
        if gApi != None:
            self.gApi = defineApi(gApi)
        else:
            self.gApi = defineApi()

        self.entryurl = entryurl
        table = self.getHTMLTable()
        fieldList = self.getFieldList(table)
        entryList = self.getEntryList(table)
        
        self.ecolex_id = self.getEntry('Legislation ID number',fieldList,entryList)
        self.name = self.getEntry('Title of tex',fieldList,entryList)
        self.country = self.getEntry('Country',fieldList,entryList)
        self.date = self.getEntry('Date of tex',fieldList,entryList)
        self.legtype = self.getEntry('Type of documen',fieldList,entryList)
        self.source = self.getEntry('Source',fieldList,entryList)
        self.fulltext = self.getUrl('Link to full tex',fieldList,entryList)
        self.abstract = self.getEntry('Abstrac',fieldList,entryList)

        # concatenate subject and keywords only of there are entries
        keywordsA = self.getEntry('Keyword(s)',fieldList,entryList)
        keywordsB = self.getEntry('Subject(s)',fieldList,entryList)
        if keywordsA != None and keywordsB != None:
            self.keywords = keywordsA + '; ' + keywordsB
        elif keywordsA != None and keywordsB == None:
            self.keywords = keywordsA
        elif keywordsA == None and keywordsB != None:
            self.keywords = keywordsB
        else:
            self.keywords = None

        # check language and translate keywords and abstract if not english
        if self.abstract != None:
            languageSample = ' '.join(self.abstract.split(' ')[0:5])
            self.language = identify(languageSample,self.gApi)
        
            if self.language != 'en':
                translationAB = translate(self.abstract,self.language,'en',self.gApi)
                self.abstractEN = to_unicode(translationAB)
                translationKW = translate(self.abstract,self.language,'en',self.gApi)
                self.keywordsEN = to_unicode(translationKW)
            else:
                self.abstractEN = self.abstract
                self.keywordsEN = self.keywords
        else:
            self.language = None
            self.abstractEN = None
            self.keywordsEN = None