Exemple #1
0
    def __init__(self, eid, phr, cc, is_valid):
        Taxon.__init__(self)

        self.entity_id = eid
        self.country = cc
        self.phrase = phr
        self.phrasenorm = phr.lower()
        self.is_valid = is_valid 
                
        # taxon ID/name:
        self.name = eid
        self.tags = self._make_tags()
    def __init__(self, eid, variant_id, etype, lang, primary_name, ename):
        Taxon.__init__(self)

        # JRC original entity ID and type
        self.entity_id = eid
        self.entity_type = etype.upper()
        self.lang = lang
        self.phrase = ename
        self.phrasenorm = ename.lower()
        
        if self.phrase in FIXES:
            self.entity_type = FIXES.get(self.phrase)

        if apply_default_fixes:
          tokens = self.phrasenorm.split()
          if tokens[-1] in PLACE_ENDING_FIXES:
            # Place (T=terrain)
            self.entity_type = 'T'
            print "Place Phrase fixed", self.phrase
          elif tokens[0] in PLACE_STARTING_FIXES:
            self.entity_type = 'T'
            print "Place Phrase fixed", self.phrase
          elif tokens[-1] in ignore_provinces:
            self.entity_type = 'T'
            print "Ignore Province name in token", self.phrase

          if self.entity_type == 'P':
            for tok in tokens:
                if tok in ORG_FIXES: 
                    self.entity_type = 'O'
                    print "Org Phrase fixed", self.phrase
                    break;
        
        if self.entity_type in entity_map:
            self.entity_type = entity_map[self.entity_type]
        
        self.variant_id = variant_id
        # solr record ID:
        self.id = self._make_id()
        self.is_valid = True
        self.is_acronym = ename.isupper() and is_ascii(ename)
                
        # taxon ID/name:
        self.name = '%s.%s' % (self.entity_type, primary_name)
        
        self.tags = self._make_tags()