def postproc_20131119(): from nomenklatura.model.text import normalize_text for entity in Entity.query: print [entity] entity.normalized = normalize_text(entity.name) #entity.attributes = entity.data db.session.add(entity) db.session.commit()
def postproc_20131119(): from nomenklatura.model.text import normalize_text for entity in Entity.query: print[entity] entity.normalized = normalize_text(entity.name) #entity.attributes = entity.data db.session.add(entity) db.session.commit()
def by_name(cls, dataset, name): q = cls.query.filter_by(dataset=dataset) attr = Entity.name if dataset.normalize_text: attr = Entity.normalized name = normalize_text(name) if dataset.ignore_case: attr = func.lower(attr) if isinstance(name, basestring): name = name.lower() q = q.filter(attr==name) return q.first()
def update(self, data, account): state = EntityState(self.dataset, self) data = EntitySchema().to_python(data, state) self.creator = account self.name = data['name'] self.normalized = normalize_text(self.name) self.data = data['data'] self.reviewed = data['reviewed'] self.invalid = data['invalid'] self.canonical = data['canonical'] # TODO: redirect all aliases of this entity db.session.add(self)
def create(cls, dataset, data, account): state = EntityState(dataset, None) data = EntitySchema().to_python(data, state) entity = cls() entity.dataset = dataset entity.creator = account entity.name = data['name'] entity.normalized = normalize_text(entity.name) entity.attributes = data.get('attributes', {}) entity.reviewed = data['reviewed'] entity.invalid = data['invalid'] entity.canonical = data['canonical'] db.session.add(entity) db.session.flush() return entity
def update(self, data, account): state = EntityState(self.dataset, self) data = EntitySchema().to_python(data, state) self.creator = account self.name = data['name'] self.normalized = normalize_text(self.name) self.attributes = data['attributes'] self.reviewed = data['reviewed'] self.invalid = data['invalid'] self.canonical = data['canonical'] # redirect all aliases of this entity if self.canonical: if self.canonical.canonical_id: if self.canonial.canonical_id == self.id: self.canonical.canonical = None else: self.canonical = self.canonical.canonical for alias in self.aliases: alias.canonical = self.canonical db.session.add(self)
def normalize(text, dataset): if dataset.ignore_case: text = text.lower() if dataset.normalize_text: text = normalize_text(text) return text