def __init__(self, eid, phr, cc, is_valid): Taxon.__init__(self) self.entity_id = eid self.country = cc self.phrase = phr self.phrasenorm = phr.lower() self.is_valid = is_valid # taxon ID/name: self.name = eid self.tags = self._make_tags()
def __init__(self, eid, variant_id, etype, lang, primary_name, ename): Taxon.__init__(self) # JRC original entity ID and type self.entity_id = eid self.entity_type = etype.upper() self.lang = lang self.phrase = ename self.phrasenorm = ename.lower() if self.phrase in FIXES: self.entity_type = FIXES.get(self.phrase) if apply_default_fixes: tokens = self.phrasenorm.split() if tokens[-1] in PLACE_ENDING_FIXES: # Place (T=terrain) self.entity_type = 'T' print "Place Phrase fixed", self.phrase elif tokens[0] in PLACE_STARTING_FIXES: self.entity_type = 'T' print "Place Phrase fixed", self.phrase elif tokens[-1] in ignore_provinces: self.entity_type = 'T' print "Ignore Province name in token", self.phrase if self.entity_type == 'P': for tok in tokens: if tok in ORG_FIXES: self.entity_type = 'O' print "Org Phrase fixed", self.phrase break; if self.entity_type in entity_map: self.entity_type = entity_map[self.entity_type] self.variant_id = variant_id # solr record ID: self.id = self._make_id() self.is_valid = True self.is_acronym = ename.isupper() and is_ascii(ename) # taxon ID/name: self.name = '%s.%s' % (self.entity_type, primary_name) self.tags = self._make_tags()