class Account(db.Model): __tablename__ = 'account' id = db.Column(db.Integer, primary_key=True) github_id = db.Column(db.Integer) login = db.Column(db.Unicode) email = db.Column(db.Unicode) api_key = db.Column(db.Unicode, default=make_key) created_at = db.Column(db.DateTime, default=datetime.utcnow) updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) datasets = db.relationship('Dataset', backref='owner', lazy='dynamic') uploads = db.relationship('Upload', backref='creator', lazy='dynamic') entities_created = db.relationship('Entity', backref='creator', lazy='dynamic') def to_dict(self): return { 'id': self.id, 'github_id': self.github_id, 'login': self.login, 'created_at': self.created_at, 'updated_at': self.updated_at, } @classmethod def by_id(cls, id): return cls.query.filter_by(id=id).first() @classmethod def by_api_key(cls, api_key): return cls.query.filter_by(api_key=api_key).first() @classmethod def by_github_id(cls, github_id): return cls.query.filter_by(github_id=github_id).first() @classmethod def create(cls, data): account = cls() account.github_id = data['id'] account.login = data['login'] account.email = data.get('email') db.session.add(account) db.session.flush() return account def update(self, data): self.login = data['login'] self.email = data.get('email') db.session.add(self)
class Dataset(db.Model): __tablename__ = 'dataset' id = db.Column(db.Integer, primary_key=True) name = db.Column(db.Unicode) label = db.Column(db.Unicode) ignore_case = db.Column(db.Boolean, default=False) match_aliases = db.Column(db.Boolean, default=False) public_edit = db.Column(db.Boolean, default=False) normalize_text = db.Column(db.Boolean, default=True) enable_invalid = db.Column(db.Boolean, default=True) owner_id = db.Column(db.Integer, db.ForeignKey('account.id')) created_at = db.Column(db.DateTime, default=datetime.utcnow) updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) entities = db.relationship('Entity', backref='dataset', lazy='dynamic') uploads = db.relationship('Upload', backref='dataset', lazy='dynamic') def to_dict(self): from nomenklatura.model.entity import Entity num_aliases = Entity.all(self).filter( Entity.canonical_id != None).count() num_review = Entity.all(self).filter_by(reviewed=False).count() num_entities = Entity.all(self).count() num_invalid = Entity.all(self).filter_by(invalid=True).count() return { 'id': self.id, 'name': self.name, 'label': self.label, 'owner': self.owner.to_dict(), 'stats': { 'num_aliases': num_aliases, 'num_entities': num_entities, 'num_review': num_review, 'num_invalid': num_invalid }, 'ignore_case': self.ignore_case, 'match_aliases': self.match_aliases, 'public_edit': self.public_edit, 'normalize_text': self.normalize_text, 'enable_invalid': self.enable_invalid, 'created_at': self.created_at, 'updated_at': self.updated_at } @property def last_modified(self): dates = [self.updated_at] from nomenklatura.model.entity import Entity latest_entity = self.entities.order_by( Entity.updated_at.desc()).first() if latest_entity is not None: dates.append(latest_entity.updated_at) from nomenklatura.model.alias import Alias latest_alias = self.aliases.order_by(Alias.updated_at.desc()).first() if latest_alias is not None: dates.append(latest_alias.updated_at) return max(dates) @classmethod def by_name(cls, name): return cls.query.filter_by(name=name).first() @classmethod def find(cls, name): dataset = cls.by_name(name) if dataset is None: raise NotFound("No such dataset: %s" % name) return dataset @classmethod def from_form(cls, form_data): data = FormDatasetSchema().to_python(form_data) return data.get('dataset') @classmethod def all(cls): return cls.query @classmethod def create(cls, data, account): data = DatasetNewSchema().to_python(data) dataset = cls() dataset.owner = account dataset.name = data['name'] dataset.label = data['label'] db.session.add(dataset) db.session.flush() return dataset def update(self, data): data = DatasetEditSchema().to_python(data) self.label = data['label'] self.normalize_text = data['normalize_text'] self.ignore_case = data['ignore_case'] self.public_edit = data['public_edit'] self.match_aliases = data['match_aliases'] self.enable_invalid = data['enable_invalid'] db.session.add(self) db.session.flush()
class Entity(db.Model): __tablename__ = 'entity' id = db.Column(db.Integer, primary_key=True) name = db.Column(db.Unicode) normalized = db.Column(db.Unicode) attributes = db.Column(HSTORE) reviewed = db.Column(db.Boolean, default=False) invalid = db.Column(db.Boolean, default=False) canonical_id = db.Column(db.Integer, db.ForeignKey('entity.id'), nullable=True) dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id')) creator_id = db.Column(db.Integer, db.ForeignKey('account.id')) created_at = db.Column(db.DateTime, default=datetime.utcnow) updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) canonical = db.relationship('Entity', backref=backref('aliases', lazy='dynamic'), remote_side='Entity.id') def to_dict(self, shallow=False): d = { 'id': self.id, 'name': self.name, 'dataset': self.dataset.name, 'reviewed': self.reviewed, 'invalid': self.invalid, 'canonical': self.canonical, #'normalized': self.normalized, 'created_at': self.created_at, 'updated_at': self.updated_at, } if not shallow: d['creator'] = self.creator.to_dict() d['attributes'] = self.attributes d['num_aliases'] = self.aliases.count() return d def to_row(self): row = self.attributes or {} row = row.copy() row.update(self.to_dict(shallow=True)) if self.canonical is not None: row['canonical'] = self.canonical.name return row @property def display_name(self): return self.name @classmethod def by_name(cls, dataset, name): q = cls.query.filter_by(dataset=dataset) attr = Entity.name if dataset.normalize_text: attr = Entity.normalized name = normalize_text(name) if dataset.ignore_case: attr = func.lower(attr) if isinstance(name, basestring): name = name.lower() q = q.filter(attr==name) return q.first() @classmethod def by_id(cls, id): try: return cls.query.filter_by(id=int(id)).first() except ValueError: return None @classmethod def id_map(cls, ids): entities = {} for entity in cls.query.filter(cls.id.in_(ids)): entities[entity.id] = entity return entities @classmethod def find(cls, dataset, id): entity = cls.by_id(id) if entity is None: raise NotFound("No such value ID: %s" % id) return entity @classmethod def all(cls, dataset=None, query=None, eager_aliases=False, eager=False): q = cls.query if dataset is not None: q = q.filter_by(dataset=dataset) if query is not None and len(query.strip()): q = q.filter(cls.name.ilike('%%%s%%' % query.strip())) if eager_aliases: q = q.options(joinedload_all(cls.aliases_static)) if eager: q = q.options(db.joinedload('dataset')) q = q.options(db.joinedload('creator')) return q @classmethod def create(cls, dataset, data, account): state = EntityState(dataset, None) data = EntitySchema().to_python(data, state) entity = cls() entity.dataset = dataset entity.creator = account entity.name = data['name'] entity.normalized = normalize_text(entity.name) entity.attributes = data.get('attributes', {}) entity.reviewed = data['reviewed'] entity.invalid = data['invalid'] entity.canonical = data['canonical'] db.session.add(entity) db.session.flush() return entity def update(self, data, account): state = EntityState(self.dataset, self) data = EntitySchema().to_python(data, state) self.creator = account self.name = data['name'] self.normalized = normalize_text(self.name) self.attributes = data['attributes'] self.reviewed = data['reviewed'] self.invalid = data['invalid'] self.canonical = data['canonical'] # redirect all aliases of this entity if self.canonical: if self.canonical.canonical_id: if self.canonial.canonical_id == self.id: self.canonical.canonical = None else: self.canonical = self.canonical.canonical for alias in self.aliases: alias.canonical = self.canonical db.session.add(self)
class Dataset(db.Model): __tablename__ = 'dataset' id = db.Column(db.Integer, primary_key=True) name = db.Column(db.Unicode) label = db.Column(db.Unicode) ignore_case = db.Column(db.Boolean, default=False) match_aliases = db.Column(db.Boolean, default=False) public_edit = db.Column(db.Boolean, default=False) normalize_text = db.Column(db.Boolean, default=True) enable_invalid = db.Column(db.Boolean, default=True) algorithm = db.Column(db.Unicode) owner_id = db.Column(db.Integer, db.ForeignKey('account.id')) created_at = db.Column(db.DateTime, default=datetime.utcnow) updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) entities = db.relationship('Entity', backref='dataset', lazy='dynamic') aliases = db.relationship('Alias', backref='dataset', lazy='dynamic') def as_dict(self): return { 'id': self.id, 'name': self.name, 'label': self.label, 'owner': self.owner.as_dict(), 'ignore_case': self.ignore_case, 'match_aliases': self.match_aliases, 'public_edit': self.public_edit, 'normalize_text': self.normalize_text, 'enable_invalid': self.enable_invalid, 'algorithm': self.algorithm, 'created_at': self.created_at, 'updated_at': self.updated_at } @classmethod def by_name(cls, name): return cls.query.filter_by(name=name).first() @classmethod def find(cls, name): dataset = cls.by_name(name) if dataset is None: raise NotFound("No such dataset: %s" % name) return dataset @classmethod def all(cls): return cls.query @classmethod def create(cls, data, account): data = DatasetNewSchema().to_python(data) dataset = cls() dataset.owner = account dataset.name = data['name'] dataset.label = data['label'] db.session.add(dataset) db.session.flush() flush_cache(dataset) return dataset def update(self, data): data = DatasetEditSchema().to_python(data) self.label = data['label'] self.normalize_text = data['normalize_text'] self.ignore_case = data['ignore_case'] self.public_edit = data['public_edit'] self.match_aliases = data['match_aliases'] self.enable_invalid = data['enable_invalid'] self.algorithm = data['algorithm'] db.session.add(self) db.session.flush() flush_cache(self)
class Entity(db.Model): __tablename__ = 'entity' id = db.Column(db.Integer, primary_key=True) name = db.Column(db.Unicode) data = db.Column(JsonType, default=dict) dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id')) creator_id = db.Column(db.Integer, db.ForeignKey('account.id')) created_at = db.Column(db.DateTime, default=datetime.utcnow) updated_at = db.Column(db.DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) aliases = db.relationship('Alias', backref='entity', lazy='dynamic') aliases_static = db.relationship('Alias') def as_dict(self, shallow=False): d = { 'id': self.id, 'name': self.name, 'created_at': self.created_at, 'updated_at': self.updated_at, } if not shallow: d['creator'] = self.creator.as_dict() d['dataset'] = self.dataset.name, d['data'] = self.data, return d def as_row(self): row = self.data.copy() row.update(self.as_dict(shallow=True)) return row @property def display_name(self): return self.name @classmethod def by_name(cls, dataset, name): return cls.query.filter_by(dataset=dataset).\ filter_by(name=name).first() @classmethod def by_id(cls, dataset, id): return cls.query.filter_by(dataset=dataset).\ filter_by(id=id).first() @classmethod def id_map(cls, dataset, ids): entities = {} for entity in cls.query.filter_by(dataset=dataset).\ filter(cls.id.in_(ids)): entities[entity.id] = entity return entities @classmethod def find(cls, dataset, id): entity = cls.by_id(dataset, id) if entity is None: raise NotFound("No such value ID: %s" % id) return entity @classmethod def all(cls, dataset, query=None, eager_aliases=False, eager=False): q = cls.query.filter_by(dataset=dataset) if query is not None and len(query.strip()): q = q.filter(cls.name.ilike('%%%s%%' % query.strip())) if eager_aliases: q = q.options(joinedload_all(cls.aliases_static)) if eager: q = q.options(db.joinedload('dataset')) q = q.options(db.joinedload('creator')) return q @classmethod def create(cls, dataset, data, account): state = EntityState(dataset, None) data = EntitySchema().to_python(data, state) entity = cls() entity.dataset = dataset entity.creator = account entity.name = data['name'] entity.data = data['data'] db.session.add(entity) db.session.flush() add_candidate_to_cache(dataset, entity.name, entity.id) return entity def update(self, data, account): state = EntityState(self.dataset, self) data = EntitySchema().to_python(data, state) self.creator = account self.name = data['name'] self.data = data['data'] flush_cache(self.dataset) db.session.add(self) def merge_into(self, data, account): from nomenklatura.model.alias import Alias state = EntityState(self.dataset, self) data = EntityMergeSchema().to_python(data, state) target = data.get('target') for alias in self.aliases: alias.value = target alias = Alias() alias.name = self.name alias.creator = self.creator alias.matcher = account alias.entity = target alias.dataset = self.dataset alias.is_matched = True db.session.delete(self) db.session.add(alias) db.session.commit() flush_cache(self.dataset) return target