Exemple #1
0
class Source(db.Model):
    """Source, fuente, es define una fuente primaria de datos, eg: las revistas.
    Aqui se tiene la informacion basica de la fuente,
    su relacion con la taxonomia y la informacion de la fuente en tanto repositorio de documentos
    """

    __tablename__ = 'iroko_sources'

    id = db.Column(db.Integer, primary_key=True)
    uuid = db.Column(UUIDType, default=uuid.uuid4)
    name = db.Column(db.String, nullable=False, unique=True)
    source_type = db.Column(db.Enum(SourceType))
    source_status = db.Column(db.Enum(SourceStatus))

    # TODO: decidir sobre esto:  Aunque este repetido, creo que es conveniente poner aqui (y
    #  manejar en las apps, en consecuencia), las relaciones con los terminos. En las tablas se
    #  pone por facilidad, pero aunque este repetido, a la hora de "editar" un Source, me parece
    #  que es mas facil asi..
    data = db.Column(JSONType)
    """The data of the Source, dependent on the source type, including the relationships with
    Terms"""

    # term_sources = db.relationship("Term_sources", back_populates="sources")

    def __str__(self):
        """Representation."""
        return self.name
Exemple #2
0
class Repository(db.Model):
    """Repository is the information of the Source related to
    its condition of a repository, the harvest data, etc...is here"""

    __tablename__ = 'iroko_source_repositories'

    # id = db.Column( db.Integer, primary_key=True)

    # source_id = db.Column(db.Integer, db.ForeignKey(Source.id,
    # name='fk_iroko_source_repository_source_id'))
    # """ID of Source for this inclusion."""
    #
    # source = db.relationship("Source", backref=db.backref("repository",cascade="all,
    # delete-orphan", lazy='dynamic'))

    source_uuid = db.Column(UUIDType, primary_key=True)

    harvest_type = db.Column(db.Enum(HarvestType))
    harvest_endpoint = db.Column(db.String)
    last_harvest_run = db.Column(db.DateTime, nullable=True)
    identifier = db.Column(db.String)
    status = db.Column(db.Enum(HarvestedItemStatus))
    error_log = db.Column(db.String)

    data = db.Column(JSONType)
    """Any relevant data, dependent of the harvest_type,
Exemple #3
0
class GroupRelationship(db.Model, Timestamp):
    """Group relationship model."""

    __tablename__ = 'grouprelationship'
    __table_args__ = (
        UniqueConstraint('source_id',
                         'target_id',
                         'relation',
                         name='uq_grouprelationship_source_target_relation'),
        # TODO: Change to "index=True"
        Index('ix_grouprelationship_source', 'source_id'),
        Index('ix_grouprelationship_target', 'target_id'),
        Index('ix_grouprelationship_relation', 'relation'),
    )

    id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True)
    type = db.Column(db.Enum(GroupType), nullable=False)
    relation = db.Column(db.Enum(Relation), nullable=False)
    source_id = db.Column(UUIDType,
                          db.ForeignKey(Group.id,
                                        ondelete='CASCADE',
                                        onupdate='CASCADE'),
                          nullable=False)
    target_id = db.Column(UUIDType,
                          db.ForeignKey(Group.id,
                                        ondelete='CASCADE',
                                        onupdate='CASCADE'),
                          nullable=False)

    # DB relationships
    source = db.relationship(Group,
                             foreign_keys=[source_id],
                             backref='sources')
    target = db.relationship(Group,
                             foreign_keys=[target_id],
                             backref='targets')

    relationships = db.relationship(
        'GroupRelationship',
        secondary=lambda: GroupRelationshipM2M.__table__,
        primaryjoin=lambda:
        (GroupRelationship.id == GroupRelationshipM2M.relationship_id),
        secondaryjoin=lambda:
        (GroupRelationship.id == GroupRelationshipM2M.subrelationship_id))

    # TODO:
    # We don't store 'deleted' as in the relation as most likely don't need
    # that as 'ground truth' in precomputed groups anyway

    def __repr__(self):
        """String representation of the group relationship."""
        return f'<{self.source} {self.relation.name} {self.target}>'
Exemple #4
0
class ObjectEvent(db.Model, Timestamp):
    """Event related to an Identifier or Relationship."""

    __tablename__ = 'objectevent'
    __table_args__ = (PrimaryKeyConstraint('event_id',
                                           'object_uuid',
                                           'payload_type',
                                           'payload_index',
                                           name='pk_objectevent'), )

    event_id = db.Column(UUIDType, db.ForeignKey(Event.id), nullable=False)
    object_uuid = db.Column(UUIDType, nullable=False)
    payload_type = db.Column(db.Enum(PayloadType), nullable=False)
    payload_index = db.Column(db.Integer, nullable=False)

    event = db.relationship(Event, backref='object_events')

    @property
    def object(self) -> Union[Identifier, Relationship]:
        """Get the associated Identifier or Relationship."""
        if self.payload_type == PayloadType.Identifier:
            return Identifier.query.get(self.object_uuid)
        else:
            return Relationship.query.get(self.object_uuid)

    def __repr__(self):
        """String representation of the object event."""
        return f"<{self.event_id}: {self.object_uuid}>"
Exemple #5
0
class Event(db.Model, Timestamp):
    """Event model."""

    __tablename__ = 'event'

    id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True)
    payload = db.Column(JSONType)

    status = db.Column(db.Enum(EventStatus), nullable=False)
    user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)

    user = db.relationship(User)

    @classmethod
    def get(cls, id: str = None, **kwargs):
        """Get the event from the database."""
        return cls.query.filter_by(id=id).one_or_none()

    @classmethod
    def getStatsFromLastWeek(cls):
        """Gets the stats from the last 7 days"""
        last_week = datetime.datetime.now() - datetime.timedelta(days=7)
        resp = db.session.query(cls.status, func.count('*')).filter(
            cls.updated > str(last_week)).group_by(cls.status).all()
        return resp

    def __repr__(self):
        """String representation of the event."""
        return f"<{self.id}: {self.created}>"
Exemple #6
0
class CDSRun(db.Model):
    __tablename__ = "cds_runs"
    __table_args__ = (db.Index("ix_cds_runs_status_date", "status", "date"), )

    task_id = db.Column(UUIDType, primary_key=True)
    date = db.Column(db.DateTime)
    runtime = db.Column(db.Interval)
    status = db.Column(db.Enum(CDSRunStatus, name="enum_cds_run_status"))
    message = db.Column(db.UnicodeText, default="")

    @classmethod
    def get_last_successful_run(cls):
        return (cls.query.filter_by(status=CDSRunStatus.FINISHED).order_by(
            cls.date.desc()).first())

    @classmethod
    def new_run(cls):
        task_id = uuid.uuid4()
        cds_run = CDSRun(date=datetime.now(),
                         status=CDSRunStatus.RUNNING,
                         task_id=task_id)
        db.session.add(cds_run)
        return task_id

    @classmethod
    def update_status(cls, task_id, status, message=None):
        cds_run = cls.query.filter_by(task_id=task_id).one()
        if message:
            cds_run.message = message

        cds_run.status = status
        cds_run.runtime = datetime.now() - cds_run.date
class GitWebhookSubscriber(db.Model):
    """Records subscribed to the git repository events."""

    __tablename__ = 'git_subscriber'
    __table_args__ = db.UniqueConstraint(
        'record_id',
        'webhook_id',
        name='uq_git_webhook_subscriber_unique_constraint'),

    id = db.Column(db.Integer, primary_key=True)
    type = db.Column(db.Enum('notify', 'download', name='git_event_type'),
                     nullable=False)

    status = db.Column(db.Enum('active', 'deleted', name='git_webhook_status'),
                       nullable=False,
                       default='active')

    record_id = db.Column(UUIDType,
                          db.ForeignKey(RecordMetadata.id),
                          nullable=False)
    record = db.relationship(RecordMetadata,
                             backref=db.backref("webhooks",
                                                cascade="all, delete-orphan"))

    webhook_id = db.Column(db.Integer,
                           db.ForeignKey(GitWebhook.id),
                           nullable=False)
    webhook = db.relationship(GitWebhook,
                              backref=db.backref("subscribers",
                                                 cascade="all, delete-orphan"))

    user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False)
    user = db.relationship(User)

    @property
    def repo(self):
        return self.webhook.repo
Exemple #8
0
class Notification(db.Model):
    """Define a Notification"""

    __tablename__ = 'iroko_notification'

    id = db.Column(db.Integer, primary_key=True)
    classification = db.Column(db.Enum(NotificationType))
    description = db.Column(db.String)
    emiter = db.Column(db.String)
    viewed = db.Column(db.Boolean, default=False)
    receiver_id = db.Column(
        db.Integer,
        db.ForeignKey(User.id, name='fk_iroko_notifications_user_id'))

    receiver = db.relationship(User,
                               backref=db.backref(
                                   'notifications',
                                   cascade='all, delete-orphan'))
    # any data related to the notification
    data = db.Column(JSONType)
Exemple #9
0
class HarvestedItem(db.Model):
    """The items harvested from a repository"""

    __tablename__ = 'iroko_harvest_items'
    __table_args__ = (db.UniqueConstraint(
        'source_uuid',
        'identifier',
        name='identifier_in_repository'
        ),
        )
    id = db.Column(db.Integer, primary_key=True)

    source_uuid = db.Column(
        UUIDType,
        db.ForeignKey(
            'iroko_source_repositories'
            '.source_uuid', ondelete='CASCADE'
            ),
        nullable=False, index=True
        )
    repository = db.relationship(
        "Repository", backref=db.backref(
            'harvested_items'
            )
        )

    """identifier in the repo"""
    identifier = db.Column(db.String, nullable=False)

    # el uuid del iroko record asociado
    record = db.Column(UUIDType, nullable=True)

    status = db.Column(db.Enum(HarvestedItemStatus))
    error_log = db.Column(db.String)

    data = db.Column(JSONType)
    """Any other relevant data to be used in the future could be here."""

    def __str__(self):
        """Representation."""
        return self.identifier
Exemple #10
0
class Event(db.Model, Timestamp):
    """Event model."""

    __tablename__ = 'event'

    id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True)
    payload = db.Column(JSONType)

    status = db.Column(db.Enum(EventStatus), nullable=False)
    user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True)

    user = db.relationship(User)

    @classmethod
    def get(cls, id: str = None, **kwargs):
        """Get the event from the database."""
        return cls.query.filter_by(id=id).one_or_none()

    def __repr__(self):
        """String representation of the event."""
        return f"<{self.id}: {self.created}>"
Exemple #11
0
class HarvestMonitoring(db.Model, Timestamp):
    """Harvesting monitoring model."""

    __tablename__ = 'harvest_monitoring'

    id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True)
    identifier = db.Column(db.String, nullable=False)
    scheme = db.Column(db.String)
    harvester = db.Column(db.String)
    status = db.Column(db.Enum(HarvestStatus), nullable=False)

    @classmethod
    def get(cls, id: str = None, **kwargs):
        """Get the event from the database."""
        return cls.query.filter_by(id=id).one_or_none()

    @classmethod
    def isRecentlyAdded(cls, identifier: str, scheme: str, harvester: str,
                        **kwargs) -> Boolean:
        """Check if the same identifier has been queried for during the last week to avoid duplicates"""
        last_week = datetime.datetime.now() - datetime.timedelta(days=7)
        resp = cls.query.filter(cls.identifier == identifier,
                                cls.scheme == scheme,
                                cls.harvester == harvester,
                                cls.updated > str(last_week)).first()
        return resp is not None

    @classmethod
    def getStatsFromLastWeek(cls):
        """Gets the stats from the last 7 days"""
        last_week = datetime.datetime.now() - datetime.timedelta(days=7)
        resp = db.session.query(cls.status, func.count('*')).filter(
            cls.updated > str(last_week)).group_by(cls.status).all()
        return resp

    def __repr__(self):
        """String representation of the event."""
        return f"<{self.id}: {self.created} : {self.identifier}>"
Exemple #12
0
class Group(db.Model, Timestamp):
    """Group model."""

    __tablename__ = 'group'

    id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True)
    type = db.Column(db.Enum(GroupType), nullable=False)

    identifiers = db.relationship(Identifier,
                                  secondary=lambda: Identifier2Group.__table__,
                                  backref='groups',
                                  viewonly=True)

    groups = db.relationship('Group',
                             secondary=lambda: GroupM2M.__table__,
                             primaryjoin=lambda:
                             (Group.id == GroupM2M.group_id),
                             secondaryjoin=lambda:
                             (Group.id == GroupM2M.subgroup_id))

    def __repr__(self):
        """String representation of the group."""
        return f"<{self.id}: {self.type.name}>"
Exemple #13
0
class ReanaWorkflow(db.Model):
    """Model defining a REANA workflow."""

    __tablename__ = 'reana_workflows'

    id = db.Column(UUIDType,
                   primary_key=True,
                   default=uuid.uuid4,
                   nullable=False)
    rec_uuid = db.Column(UUIDType,
                         db.ForeignKey(RecordMetadata.id),
                         nullable=False)

    user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False)
    workflow_id = db.Column(UUIDType, unique=True, nullable=False)

    name = db.Column(db.String(100), unique=False, nullable=False)
    workflow_name = db.Column(db.String(100), unique=False, nullable=False)
    workflow_name_run = db.Column(db.String(100), unique=False, nullable=False)

    status = db.Column(db.Enum('created',
                               'queued',
                               'running',
                               'stopped',
                               'failed',
                               'finished',
                               'deleted',
                               name='status'),
                       unique=False,
                       nullable=False)

    # the following fields represent the creation part of a workflow
    workflow_json = db.Column(json_type, default=lambda: dict(), nullable=True)

    # logging after the workflow runs
    logs = db.Column(json_type, default=lambda: dict(), nullable=True)

    created = db.Column(db.DateTime, server_default=db.func.now())
    updated = db.Column(db.DateTime,
                        server_default=db.func.now(),
                        server_onupdate=db.func.now())

    user = db.relationship('User')
    record = db.relationship('RecordMetadata',
                             backref=db.backref('reana_workflows',
                                                cascade='all, delete-orphan'))

    @classmethod
    def get_user_workflows(cls, user_id):
        """Get user workflows."""
        workflows = cls.query \
            .filter_by(user_id=user_id) \
            .all()

        return workflows

    @classmethod
    def get_deposit_workflows(cls, depid):
        """Get deposit workflows."""
        workflows = cls.query \
            .filter_by(rec_uuid=depid) \
            .all()

        return workflows

    @classmethod
    def get_workflow_by_id(cls, workflow_id):
        """Get workflow by id."""
        return cls.query \
            .filter_by(workflow_id=workflow_id) \
            .one_or_none()

    def serialize(self):
        """Serialize schema model."""
        return reana_workflow_serializer.dump(self).data
Exemple #14
0
class ImporterImportLog(db.Model):
    """Store the ldap synchronization task history."""

    __tablename__ = "importer_import_log"

    id = db.Column(db.Integer, primary_key=True)

    celery_task_id = db.Column(db.String)

    agent = db.Column(Enum(ImporterAgent), nullable=False)
    """The agent that initiated the task."""

    status = db.Column(Enum(ImporterTaskStatus),
                       nullable=False,
                       default=ImporterTaskStatus.RUNNING)
    """The current status of the task."""

    provider = db.Column(db.String, nullable=False)
    """The provider of the data. Unconstrained for extensibility purposes."""

    source_type = db.Column(db.String, nullable=False)
    """The format of the source data."""

    mode = db.Column(db.Enum(ImporterMode), nullable=False)
    """The chosen importation mode."""

    original_filename = db.Column(db.String, nullable=False)
    """The original name of the imported file."""

    start_time = db.Column(db.DateTime,
                           nullable=False,
                           default=lambda: datetime.now())
    """Task start time."""

    end_time = db.Column(db.DateTime, nullable=True)
    """Task end time (if not currently running)."""

    message = db.Column(db.String, nullable=True)
    """Message in case of an error."""

    entries_count = db.Column(db.Integer, nullable=True)
    """Number of entries in source file."""

    ignore_missing_rules = db.Column(db.Boolean)

    @classmethod
    def create(cls, data):
        """Create a new task log."""
        log = cls(**data)
        db.session.add(log)
        db.session.commit()
        return log

    def is_running(self):
        """Check if the task is currently running."""
        return self.status == ImporterTaskStatus.RUNNING

    def is_cancelled(self):
        """Check if the task is currently running."""
        return self.status == ImporterTaskStatus.CANCELLED

    def finalize(self):
        """Finalize the import."""
        if self.is_running():
            self.set_succeeded()

    def set_succeeded(self):
        """Mark this task as complete and log output."""
        assert self.is_running()
        self.status = ImporterTaskStatus.SUCCEEDED
        self.end_time = datetime.now()
        db.session.commit()

    def set_failed(self, exception):
        """Mark this task as failed."""
        assert self.is_running()
        self.status = ImporterTaskStatus.FAILED
        self.end_time = datetime.now()
        self.message = _format_exception(exception)
        db.session.commit()

    def set_cancelled(self):
        """Mark the task as cancelled."""
        self.status = ImporterTaskStatus.CANCELLED
        self.end_time = datetime.now()
        db.session.commit()

    def set_entries_count(self, entries):
        """Set logged entries count."""
        self.entries_count = len(entries)
        db.session.commit()
Exemple #15
0
class Task(db.Model, Timestamp):
    """Task database model."""

    __tablename__ = 'flows_task'

    id = db.Column(
        UUIDType,
        primary_key=True,
        default=uuid.uuid4,
    )
    """Task identifier."""

    previous = db.Column(
        db.JSON().with_variant(
            postgresql.JSONB(none_as_null=True),
            'postgresql',
        ).with_variant(
            JSONType(),
            'sqlite',
        ).with_variant(
            JSONType(),
            'mysql',
        ),
        default=lambda: list(),
        nullable=True,
    )
    """List of tasks that need to run before this one, if any.

    This is mainly used by visuals to create the flow diagram.
    """

    flow_id = db.Column(
        UUIDType,
        db.ForeignKey(Flow.id, onupdate="CASCADE", ondelete="CASCADE"),
        nullable=False,
    )
    """Task flow instance."""

    flow = db.relationship(Flow, backref='tasks')
    """Relationship to the Flow."""

    name = db.Column(db.String, nullable=False)
    """Task name."""

    payload = db.Column(
        db.JSON().with_variant(
            postgresql.JSONB(none_as_null=True),
            'postgresql',
        ).with_variant(
            JSONType(),
            'sqlite',
        ).with_variant(
            JSONType(),
            'mysql',
        ),
        default=lambda: dict(),
        nullable=True,
    )
    """Flow payload in JSON format, typically args and kwagrs."""

    status = db.Column(
        db.Enum(Status),
        nullable=False,
        default=Status.PENDING,
    )
    """Status of the task, i.e. pending, success, failure."""

    message = db.Column(db.String, nullable=False, default='')
    """Task status message."""
    @classmethod
    def create(cls, name, flow_id, id_=None, payload=None, previous=None):
        """Create a new Task."""
        try:
            with db.session.begin_nested():
                obj = cls(
                    id=id_ or uuid.uuid4(),
                    flow_id=flow_id,
                    name=name,
                    payload=payload or {},
                    previous=previous or [],
                )
                db.session.add(obj)
            logger.info('Created new Flow %s', obj)
        except SQLAlchemyError:
            logger.exception(
                'Failed to create Flow with %s, %s, %s, %s',
                id_,
                flow_id,
                name,
                payload,
            )
            raise
        return obj

    @classmethod
    def get(cls, id_):
        """Get a task object from the DB."""
        return cls.query.get(id_)

    def to_dict(self):
        """Task dictionary representation."""
        return {
            'id': str(self.id),
            'flow_id': str(self.flow_id),
            'created': self.created.isoformat(),
            'updated': self.updated.isoformat(),
            'name': self.name,
            'payload': self.payload,
            'status': str(self.status),
            'message': self.message,
            'previous': self.previous,
        }
Exemple #16
0
class Relationship(db.Model, Timestamp):
    """Relationship between two identifiers."""

    __tablename__ = 'relationship'
    __table_args__ = (
        UniqueConstraint('source_id', 'target_id', 'relation',
                         name='uq_relationship_source_target_relation'),
        Index('ix_relationship_source', 'source_id'),
        Index('ix_relationship_target', 'target_id'),
        Index('ix_relationship_relation', 'relation'),
    )

    id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True)
    source_id = db.Column(
        UUIDType,
        db.ForeignKey(Identifier.id,
                      onupdate='CASCADE', ondelete='CASCADE',
                      name='fk_relationship_source'),
        nullable=False
    )
    target_id = db.Column(
        UUIDType,
        db.ForeignKey(Identifier.id,
                      onupdate='CASCADE', ondelete='CASCADE',
                      name='fk_relationship_target'),
        nullable=False
    )
    relation = db.Column(db.Enum(Relation))

    source = db.relationship(Identifier, foreign_keys=[source_id],
                             backref='sources')
    target = db.relationship(Identifier, foreign_keys=[target_id],
                             backref='targets')

    @classmethod
    def get(cls, source, target, relation, **kwargs):
        """Get the relationship from the database."""
        return cls.query.filter_by(
            source_id=source.id, target_id=target.id,
            relation=relation).one_or_none()

    def fetch_or_create_id(self):
        """Fetches from the database or creates an id for the relationship."""
        self.source = self.source.fetch_or_create_id()
        self.target = self.target.fetch_or_create_id()

        if not self.id:
            obj = self.get(self.source, self.target, self.relation)
            if obj:
                self = obj
            else:
                self.id = uuid.uuid4()
        return self

    @property
    def identity_group(self):
        """Get the relationship's identity group."""
        # TODO: See if we can avoid this
        from ..graph.models import GroupRelationship, GroupType
        return GroupRelationship.query.filter_by(
            source=self.source.identity_group,
            target=self.target.identity_group,
            relation=self.relation,
            type=GroupType.Identity).one_or_none()

    @property
    def data(self):
        """Get the relationship's identity group metadata."""
        if self.identity_group and self.identity_group.data:
            return self.identity_group.data.json

    def __repr__(self):
        """String representation of the relationship."""
        return (
            f'<{self.source.value} {self.relation.name} {self.target.value}>'
        )