예제 #1
0
class Account(db.Model):
    __tablename__ = 'account'

    id = db.Column(db.Integer, primary_key=True)
    github_id = db.Column(db.Integer)
    login = db.Column(db.Unicode)
    email = db.Column(db.Unicode)
    api_key = db.Column(db.Unicode, default=make_key)
    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    updated_at = db.Column(db.DateTime,
                           default=datetime.utcnow,
                           onupdate=datetime.utcnow)

    datasets = db.relationship('Dataset', backref='owner', lazy='dynamic')
    uploads = db.relationship('Upload', backref='creator', lazy='dynamic')
    entities_created = db.relationship('Entity',
                                       backref='creator',
                                       lazy='dynamic')

    def to_dict(self):
        return {
            'id': self.id,
            'github_id': self.github_id,
            'login': self.login,
            'created_at': self.created_at,
            'updated_at': self.updated_at,
        }

    @classmethod
    def by_id(cls, id):
        return cls.query.filter_by(id=id).first()

    @classmethod
    def by_api_key(cls, api_key):
        return cls.query.filter_by(api_key=api_key).first()

    @classmethod
    def by_github_id(cls, github_id):
        return cls.query.filter_by(github_id=github_id).first()

    @classmethod
    def create(cls, data):
        account = cls()
        account.github_id = data['id']
        account.login = data['login']
        account.email = data.get('email')
        db.session.add(account)
        db.session.flush()
        return account

    def update(self, data):
        self.login = data['login']
        self.email = data.get('email')
        db.session.add(self)
예제 #2
0
class Dataset(db.Model):
    __tablename__ = 'dataset'

    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.Unicode)
    label = db.Column(db.Unicode)
    ignore_case = db.Column(db.Boolean, default=False)
    match_aliases = db.Column(db.Boolean, default=False)
    public_edit = db.Column(db.Boolean, default=False)
    normalize_text = db.Column(db.Boolean, default=True)
    enable_invalid = db.Column(db.Boolean, default=True)
    owner_id = db.Column(db.Integer, db.ForeignKey('account.id'))
    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    updated_at = db.Column(db.DateTime,
                           default=datetime.utcnow,
                           onupdate=datetime.utcnow)

    entities = db.relationship('Entity', backref='dataset', lazy='dynamic')
    uploads = db.relationship('Upload', backref='dataset', lazy='dynamic')

    def to_dict(self):
        from nomenklatura.model.entity import Entity
        num_aliases = Entity.all(self).filter(
            Entity.canonical_id != None).count()
        num_review = Entity.all(self).filter_by(reviewed=False).count()
        num_entities = Entity.all(self).count()
        num_invalid = Entity.all(self).filter_by(invalid=True).count()

        return {
            'id': self.id,
            'name': self.name,
            'label': self.label,
            'owner': self.owner.to_dict(),
            'stats': {
                'num_aliases': num_aliases,
                'num_entities': num_entities,
                'num_review': num_review,
                'num_invalid': num_invalid
            },
            'ignore_case': self.ignore_case,
            'match_aliases': self.match_aliases,
            'public_edit': self.public_edit,
            'normalize_text': self.normalize_text,
            'enable_invalid': self.enable_invalid,
            'created_at': self.created_at,
            'updated_at': self.updated_at
        }

    @property
    def last_modified(self):
        dates = [self.updated_at]
        from nomenklatura.model.entity import Entity
        latest_entity = self.entities.order_by(
            Entity.updated_at.desc()).first()
        if latest_entity is not None:
            dates.append(latest_entity.updated_at)

        from nomenklatura.model.alias import Alias
        latest_alias = self.aliases.order_by(Alias.updated_at.desc()).first()
        if latest_alias is not None:
            dates.append(latest_alias.updated_at)
        return max(dates)

    @classmethod
    def by_name(cls, name):
        return cls.query.filter_by(name=name).first()

    @classmethod
    def find(cls, name):
        dataset = cls.by_name(name)
        if dataset is None:
            raise NotFound("No such dataset: %s" % name)
        return dataset

    @classmethod
    def from_form(cls, form_data):
        data = FormDatasetSchema().to_python(form_data)
        return data.get('dataset')

    @classmethod
    def all(cls):
        return cls.query

    @classmethod
    def create(cls, data, account):
        data = DatasetNewSchema().to_python(data)
        dataset = cls()
        dataset.owner = account
        dataset.name = data['name']
        dataset.label = data['label']
        db.session.add(dataset)
        db.session.flush()
        return dataset

    def update(self, data):
        data = DatasetEditSchema().to_python(data)
        self.label = data['label']
        self.normalize_text = data['normalize_text']
        self.ignore_case = data['ignore_case']
        self.public_edit = data['public_edit']
        self.match_aliases = data['match_aliases']
        self.enable_invalid = data['enable_invalid']
        db.session.add(self)
        db.session.flush()
예제 #3
0
class Entity(db.Model):
    __tablename__ = 'entity'

    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.Unicode)
    normalized = db.Column(db.Unicode)
    attributes = db.Column(HSTORE)
    reviewed = db.Column(db.Boolean, default=False)
    invalid = db.Column(db.Boolean, default=False)
    canonical_id = db.Column(db.Integer,
        db.ForeignKey('entity.id'), nullable=True)
    dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id'))
    creator_id = db.Column(db.Integer, db.ForeignKey('account.id'))
    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    updated_at = db.Column(db.DateTime, default=datetime.utcnow,
            onupdate=datetime.utcnow)

    canonical = db.relationship('Entity', backref=backref('aliases', lazy='dynamic'),
        remote_side='Entity.id')

    def to_dict(self, shallow=False):
        d = {
            'id': self.id,
            'name': self.name,
            'dataset': self.dataset.name,
            'reviewed': self.reviewed,
            'invalid': self.invalid,
            'canonical': self.canonical,
            #'normalized': self.normalized,
            'created_at': self.created_at,
            'updated_at': self.updated_at,
        }
        if not shallow:
            d['creator'] = self.creator.to_dict()
            d['attributes'] = self.attributes
            d['num_aliases'] = self.aliases.count()
        return d

    def to_row(self):
        row = self.attributes or {}
        row = row.copy()
        row.update(self.to_dict(shallow=True))
        if self.canonical is not None:
            row['canonical'] = self.canonical.name
        return row

    @property
    def display_name(self):
        return self.name

    @classmethod
    def by_name(cls, dataset, name):
        q = cls.query.filter_by(dataset=dataset)
        attr = Entity.name
        if dataset.normalize_text:
            attr = Entity.normalized
            name = normalize_text(name)
        if dataset.ignore_case:
            attr = func.lower(attr)
            if isinstance(name, basestring):
                name = name.lower()
        q = q.filter(attr==name)
        return q.first()

    @classmethod
    def by_id(cls, id):
        try:
            return cls.query.filter_by(id=int(id)).first()
        except ValueError:
            return None

    @classmethod
    def id_map(cls, ids):
        entities = {}
        for entity in cls.query.filter(cls.id.in_(ids)):
            entities[entity.id] = entity
        return entities

    @classmethod
    def find(cls, dataset, id):
        entity = cls.by_id(id)
        if entity is None:
            raise NotFound("No such value ID: %s" % id)
        return entity

    @classmethod
    def all(cls, dataset=None, query=None, eager_aliases=False, eager=False):
        q = cls.query
        if dataset is not None:
            q = q.filter_by(dataset=dataset)
        if query is not None and len(query.strip()):
            q = q.filter(cls.name.ilike('%%%s%%' % query.strip()))
        if eager_aliases:
            q = q.options(joinedload_all(cls.aliases_static))
        if eager:
            q = q.options(db.joinedload('dataset'))
            q = q.options(db.joinedload('creator'))
        return q


    @classmethod
    def create(cls, dataset, data, account):
        state = EntityState(dataset, None)
        data = EntitySchema().to_python(data, state)
        entity = cls()
        entity.dataset = dataset
        entity.creator = account
        entity.name = data['name']
        entity.normalized = normalize_text(entity.name)
        entity.attributes = data.get('attributes', {})
        entity.reviewed = data['reviewed']
        entity.invalid = data['invalid']
        entity.canonical = data['canonical']
        db.session.add(entity)
        db.session.flush()
        return entity


    def update(self, data, account):
        state = EntityState(self.dataset, self)
        data = EntitySchema().to_python(data, state)
        self.creator = account
        self.name = data['name']
        self.normalized = normalize_text(self.name)
        self.attributes = data['attributes']
        self.reviewed = data['reviewed']
        self.invalid = data['invalid']
        self.canonical = data['canonical']

        # redirect all aliases of this entity
        if self.canonical:
            if self.canonical.canonical_id:
                if self.canonial.canonical_id == self.id:
                    self.canonical.canonical = None
                else:
                    self.canonical = self.canonical.canonical

            for alias in self.aliases:
                alias.canonical = self.canonical
        
        db.session.add(self)
예제 #4
0
class Dataset(db.Model):
    __tablename__ = 'dataset'

    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.Unicode)
    label = db.Column(db.Unicode)
    ignore_case = db.Column(db.Boolean, default=False)
    match_aliases = db.Column(db.Boolean, default=False)
    public_edit = db.Column(db.Boolean, default=False)
    normalize_text = db.Column(db.Boolean, default=True)
    enable_invalid = db.Column(db.Boolean, default=True)
    algorithm = db.Column(db.Unicode)
    owner_id = db.Column(db.Integer, db.ForeignKey('account.id'))
    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    updated_at = db.Column(db.DateTime, default=datetime.utcnow,
            onupdate=datetime.utcnow)

    entities = db.relationship('Entity', backref='dataset',
                             lazy='dynamic')
    aliases = db.relationship('Alias', backref='dataset',
                             lazy='dynamic')

    def as_dict(self):
        return {
            'id': self.id,
            'name': self.name,
            'label': self.label,
            'owner': self.owner.as_dict(),
            'ignore_case': self.ignore_case,
            'match_aliases': self.match_aliases,
            'public_edit': self.public_edit,
            'normalize_text': self.normalize_text,
            'enable_invalid': self.enable_invalid,
            'algorithm': self.algorithm,
            'created_at': self.created_at,
            'updated_at': self.updated_at
            }

    @classmethod
    def by_name(cls, name):
        return cls.query.filter_by(name=name).first()

    @classmethod
    def find(cls, name):
        dataset = cls.by_name(name)
        if dataset is None:
            raise NotFound("No such dataset: %s" % name)
        return dataset

    @classmethod
    def all(cls):
        return cls.query

    @classmethod
    def create(cls, data, account):
        data = DatasetNewSchema().to_python(data)
        dataset = cls()
        dataset.owner = account
        dataset.name = data['name']
        dataset.label = data['label']
        db.session.add(dataset)
        db.session.flush()
        flush_cache(dataset)
        return dataset

    def update(self, data):
        data = DatasetEditSchema().to_python(data)
        self.label = data['label']
        self.normalize_text = data['normalize_text']
        self.ignore_case = data['ignore_case']
        self.public_edit = data['public_edit']
        self.match_aliases = data['match_aliases']
        self.enable_invalid = data['enable_invalid']
        self.algorithm = data['algorithm']
        db.session.add(self)
        db.session.flush()
        flush_cache(self)
예제 #5
0
class Entity(db.Model):
    __tablename__ = 'entity'

    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.Unicode)
    data = db.Column(JsonType, default=dict)
    dataset_id = db.Column(db.Integer, db.ForeignKey('dataset.id'))
    creator_id = db.Column(db.Integer, db.ForeignKey('account.id'))
    created_at = db.Column(db.DateTime, default=datetime.utcnow)
    updated_at = db.Column(db.DateTime,
                           default=datetime.utcnow,
                           onupdate=datetime.utcnow)

    aliases = db.relationship('Alias', backref='entity', lazy='dynamic')
    aliases_static = db.relationship('Alias')

    def as_dict(self, shallow=False):
        d = {
            'id': self.id,
            'name': self.name,
            'created_at': self.created_at,
            'updated_at': self.updated_at,
        }
        if not shallow:
            d['creator'] = self.creator.as_dict()
            d['dataset'] = self.dataset.name,
            d['data'] = self.data,
        return d

    def as_row(self):
        row = self.data.copy()
        row.update(self.as_dict(shallow=True))
        return row

    @property
    def display_name(self):
        return self.name

    @classmethod
    def by_name(cls, dataset, name):
        return cls.query.filter_by(dataset=dataset).\
                filter_by(name=name).first()

    @classmethod
    def by_id(cls, dataset, id):
        return cls.query.filter_by(dataset=dataset).\
                filter_by(id=id).first()

    @classmethod
    def id_map(cls, dataset, ids):
        entities = {}
        for entity in cls.query.filter_by(dataset=dataset).\
                filter(cls.id.in_(ids)):
            entities[entity.id] = entity
        return entities

    @classmethod
    def find(cls, dataset, id):
        entity = cls.by_id(dataset, id)
        if entity is None:
            raise NotFound("No such value ID: %s" % id)
        return entity

    @classmethod
    def all(cls, dataset, query=None, eager_aliases=False, eager=False):
        q = cls.query.filter_by(dataset=dataset)
        if query is not None and len(query.strip()):
            q = q.filter(cls.name.ilike('%%%s%%' % query.strip()))
        if eager_aliases:
            q = q.options(joinedload_all(cls.aliases_static))
        if eager:
            q = q.options(db.joinedload('dataset'))
            q = q.options(db.joinedload('creator'))
        return q

    @classmethod
    def create(cls, dataset, data, account):
        state = EntityState(dataset, None)
        data = EntitySchema().to_python(data, state)
        entity = cls()
        entity.dataset = dataset
        entity.creator = account
        entity.name = data['name']
        entity.data = data['data']
        db.session.add(entity)
        db.session.flush()
        add_candidate_to_cache(dataset, entity.name, entity.id)
        return entity

    def update(self, data, account):
        state = EntityState(self.dataset, self)
        data = EntitySchema().to_python(data, state)
        self.creator = account
        self.name = data['name']
        self.data = data['data']
        flush_cache(self.dataset)
        db.session.add(self)

    def merge_into(self, data, account):
        from nomenklatura.model.alias import Alias
        state = EntityState(self.dataset, self)
        data = EntityMergeSchema().to_python(data, state)
        target = data.get('target')
        for alias in self.aliases:
            alias.value = target
        alias = Alias()
        alias.name = self.name
        alias.creator = self.creator
        alias.matcher = account
        alias.entity = target
        alias.dataset = self.dataset
        alias.is_matched = True
        db.session.delete(self)
        db.session.add(alias)
        db.session.commit()
        flush_cache(self.dataset)
        return target