Ejemplo n.º 1
0
    def __init__(self, eid, phr, cc, is_valid):
        Taxon.__init__(self)

        self.entity_id = eid
        self.country = cc
        self.phrase = phr
        self.phrasenorm = phr.lower()
        self.is_valid = is_valid 
                
        # taxon ID/name:
        self.name = eid
        self.tags = self._make_tags()
Ejemplo n.º 2
0
    def __init__(self, eid, variant_id, etype, lang, primary_name, ename):
        Taxon.__init__(self)

        # JRC original entity ID and type
        self.entity_id = eid
        self.entity_type = etype.upper()
        self.lang = lang
        self.phrase = ename
        self.phrasenorm = ename.lower()
        
        if self.phrase in FIXES:
            self.entity_type = FIXES.get(self.phrase)

        if apply_default_fixes:
          tokens = self.phrasenorm.split()
          if tokens[-1] in PLACE_ENDING_FIXES:
            # Place (T=terrain)
            self.entity_type = 'T'
            print "Place Phrase fixed", self.phrase
          elif tokens[0] in PLACE_STARTING_FIXES:
            self.entity_type = 'T'
            print "Place Phrase fixed", self.phrase
          elif tokens[-1] in ignore_provinces:
            self.entity_type = 'T'
            print "Ignore Province name in token", self.phrase

          if self.entity_type == 'P':
            for tok in tokens:
                if tok in ORG_FIXES: 
                    self.entity_type = 'O'
                    print "Org Phrase fixed", self.phrase
                    break;
        
        if self.entity_type in entity_map:
            self.entity_type = entity_map[self.entity_type]
        
        self.variant_id = variant_id
        # solr record ID:
        self.id = self._make_id()
        self.is_valid = True
        self.is_acronym = ename.isupper() and is_ascii(ename)
                
        # taxon ID/name:
        self.name = '%s.%s' % (self.entity_type, primary_name)
        
        self.tags = self._make_tags()
Ejemplo n.º 3
0
def create_entity(name):
    '''
    Create a generic person name taxon, rather than a particular personality/celebrity
    '''
    taxon = Taxon()
    n = name.strip().lower()
    taxon.name = 'person_name.{}'.format(n)
    taxon.phrase = n
    taxon.phrasenorm = taxon.phrase # Nothing more to normalize.
    taxon.is_valid = True
    taxon.tags = []
    #if n in non_person_names: 
    #    taxon.is_valid = False
    
    return taxon