class Source(db.Model): """Source, fuente, es define una fuente primaria de datos, eg: las revistas. Aqui se tiene la informacion basica de la fuente, su relacion con la taxonomia y la informacion de la fuente en tanto repositorio de documentos """ __tablename__ = 'iroko_sources' id = db.Column(db.Integer, primary_key=True) uuid = db.Column(UUIDType, default=uuid.uuid4) name = db.Column(db.String, nullable=False, unique=True) source_type = db.Column(db.Enum(SourceType)) source_status = db.Column(db.Enum(SourceStatus)) # TODO: decidir sobre esto: Aunque este repetido, creo que es conveniente poner aqui (y # manejar en las apps, en consecuencia), las relaciones con los terminos. En las tablas se # pone por facilidad, pero aunque este repetido, a la hora de "editar" un Source, me parece # que es mas facil asi.. data = db.Column(JSONType) """The data of the Source, dependent on the source type, including the relationships with Terms""" # term_sources = db.relationship("Term_sources", back_populates="sources") def __str__(self): """Representation.""" return self.name
class Repository(db.Model): """Repository is the information of the Source related to its condition of a repository, the harvest data, etc...is here""" __tablename__ = 'iroko_source_repositories' # id = db.Column( db.Integer, primary_key=True) # source_id = db.Column(db.Integer, db.ForeignKey(Source.id, # name='fk_iroko_source_repository_source_id')) # """ID of Source for this inclusion.""" # # source = db.relationship("Source", backref=db.backref("repository",cascade="all, # delete-orphan", lazy='dynamic')) source_uuid = db.Column(UUIDType, primary_key=True) harvest_type = db.Column(db.Enum(HarvestType)) harvest_endpoint = db.Column(db.String) last_harvest_run = db.Column(db.DateTime, nullable=True) identifier = db.Column(db.String) status = db.Column(db.Enum(HarvestedItemStatus)) error_log = db.Column(db.String) data = db.Column(JSONType) """Any relevant data, dependent of the harvest_type,
class GroupRelationship(db.Model, Timestamp): """Group relationship model.""" __tablename__ = 'grouprelationship' __table_args__ = ( UniqueConstraint('source_id', 'target_id', 'relation', name='uq_grouprelationship_source_target_relation'), # TODO: Change to "index=True" Index('ix_grouprelationship_source', 'source_id'), Index('ix_grouprelationship_target', 'target_id'), Index('ix_grouprelationship_relation', 'relation'), ) id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True) type = db.Column(db.Enum(GroupType), nullable=False) relation = db.Column(db.Enum(Relation), nullable=False) source_id = db.Column(UUIDType, db.ForeignKey(Group.id, ondelete='CASCADE', onupdate='CASCADE'), nullable=False) target_id = db.Column(UUIDType, db.ForeignKey(Group.id, ondelete='CASCADE', onupdate='CASCADE'), nullable=False) # DB relationships source = db.relationship(Group, foreign_keys=[source_id], backref='sources') target = db.relationship(Group, foreign_keys=[target_id], backref='targets') relationships = db.relationship( 'GroupRelationship', secondary=lambda: GroupRelationshipM2M.__table__, primaryjoin=lambda: (GroupRelationship.id == GroupRelationshipM2M.relationship_id), secondaryjoin=lambda: (GroupRelationship.id == GroupRelationshipM2M.subrelationship_id)) # TODO: # We don't store 'deleted' as in the relation as most likely don't need # that as 'ground truth' in precomputed groups anyway def __repr__(self): """String representation of the group relationship.""" return f'<{self.source} {self.relation.name} {self.target}>'
class ObjectEvent(db.Model, Timestamp): """Event related to an Identifier or Relationship.""" __tablename__ = 'objectevent' __table_args__ = (PrimaryKeyConstraint('event_id', 'object_uuid', 'payload_type', 'payload_index', name='pk_objectevent'), ) event_id = db.Column(UUIDType, db.ForeignKey(Event.id), nullable=False) object_uuid = db.Column(UUIDType, nullable=False) payload_type = db.Column(db.Enum(PayloadType), nullable=False) payload_index = db.Column(db.Integer, nullable=False) event = db.relationship(Event, backref='object_events') @property def object(self) -> Union[Identifier, Relationship]: """Get the associated Identifier or Relationship.""" if self.payload_type == PayloadType.Identifier: return Identifier.query.get(self.object_uuid) else: return Relationship.query.get(self.object_uuid) def __repr__(self): """String representation of the object event.""" return f"<{self.event_id}: {self.object_uuid}>"
class Event(db.Model, Timestamp): """Event model.""" __tablename__ = 'event' id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True) payload = db.Column(JSONType) status = db.Column(db.Enum(EventStatus), nullable=False) user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) user = db.relationship(User) @classmethod def get(cls, id: str = None, **kwargs): """Get the event from the database.""" return cls.query.filter_by(id=id).one_or_none() @classmethod def getStatsFromLastWeek(cls): """Gets the stats from the last 7 days""" last_week = datetime.datetime.now() - datetime.timedelta(days=7) resp = db.session.query(cls.status, func.count('*')).filter( cls.updated > str(last_week)).group_by(cls.status).all() return resp def __repr__(self): """String representation of the event.""" return f"<{self.id}: {self.created}>"
class CDSRun(db.Model): __tablename__ = "cds_runs" __table_args__ = (db.Index("ix_cds_runs_status_date", "status", "date"), ) task_id = db.Column(UUIDType, primary_key=True) date = db.Column(db.DateTime) runtime = db.Column(db.Interval) status = db.Column(db.Enum(CDSRunStatus, name="enum_cds_run_status")) message = db.Column(db.UnicodeText, default="") @classmethod def get_last_successful_run(cls): return (cls.query.filter_by(status=CDSRunStatus.FINISHED).order_by( cls.date.desc()).first()) @classmethod def new_run(cls): task_id = uuid.uuid4() cds_run = CDSRun(date=datetime.now(), status=CDSRunStatus.RUNNING, task_id=task_id) db.session.add(cds_run) return task_id @classmethod def update_status(cls, task_id, status, message=None): cds_run = cls.query.filter_by(task_id=task_id).one() if message: cds_run.message = message cds_run.status = status cds_run.runtime = datetime.now() - cds_run.date
class GitWebhookSubscriber(db.Model): """Records subscribed to the git repository events.""" __tablename__ = 'git_subscriber' __table_args__ = db.UniqueConstraint( 'record_id', 'webhook_id', name='uq_git_webhook_subscriber_unique_constraint'), id = db.Column(db.Integer, primary_key=True) type = db.Column(db.Enum('notify', 'download', name='git_event_type'), nullable=False) status = db.Column(db.Enum('active', 'deleted', name='git_webhook_status'), nullable=False, default='active') record_id = db.Column(UUIDType, db.ForeignKey(RecordMetadata.id), nullable=False) record = db.relationship(RecordMetadata, backref=db.backref("webhooks", cascade="all, delete-orphan")) webhook_id = db.Column(db.Integer, db.ForeignKey(GitWebhook.id), nullable=False) webhook = db.relationship(GitWebhook, backref=db.backref("subscribers", cascade="all, delete-orphan")) user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False) user = db.relationship(User) @property def repo(self): return self.webhook.repo
class Notification(db.Model): """Define a Notification""" __tablename__ = 'iroko_notification' id = db.Column(db.Integer, primary_key=True) classification = db.Column(db.Enum(NotificationType)) description = db.Column(db.String) emiter = db.Column(db.String) viewed = db.Column(db.Boolean, default=False) receiver_id = db.Column( db.Integer, db.ForeignKey(User.id, name='fk_iroko_notifications_user_id')) receiver = db.relationship(User, backref=db.backref( 'notifications', cascade='all, delete-orphan')) # any data related to the notification data = db.Column(JSONType)
class HarvestedItem(db.Model): """The items harvested from a repository""" __tablename__ = 'iroko_harvest_items' __table_args__ = (db.UniqueConstraint( 'source_uuid', 'identifier', name='identifier_in_repository' ), ) id = db.Column(db.Integer, primary_key=True) source_uuid = db.Column( UUIDType, db.ForeignKey( 'iroko_source_repositories' '.source_uuid', ondelete='CASCADE' ), nullable=False, index=True ) repository = db.relationship( "Repository", backref=db.backref( 'harvested_items' ) ) """identifier in the repo""" identifier = db.Column(db.String, nullable=False) # el uuid del iroko record asociado record = db.Column(UUIDType, nullable=True) status = db.Column(db.Enum(HarvestedItemStatus)) error_log = db.Column(db.String) data = db.Column(JSONType) """Any other relevant data to be used in the future could be here.""" def __str__(self): """Representation.""" return self.identifier
class Event(db.Model, Timestamp): """Event model.""" __tablename__ = 'event' id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True) payload = db.Column(JSONType) status = db.Column(db.Enum(EventStatus), nullable=False) user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=True) user = db.relationship(User) @classmethod def get(cls, id: str = None, **kwargs): """Get the event from the database.""" return cls.query.filter_by(id=id).one_or_none() def __repr__(self): """String representation of the event.""" return f"<{self.id}: {self.created}>"
class HarvestMonitoring(db.Model, Timestamp): """Harvesting monitoring model.""" __tablename__ = 'harvest_monitoring' id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True) identifier = db.Column(db.String, nullable=False) scheme = db.Column(db.String) harvester = db.Column(db.String) status = db.Column(db.Enum(HarvestStatus), nullable=False) @classmethod def get(cls, id: str = None, **kwargs): """Get the event from the database.""" return cls.query.filter_by(id=id).one_or_none() @classmethod def isRecentlyAdded(cls, identifier: str, scheme: str, harvester: str, **kwargs) -> Boolean: """Check if the same identifier has been queried for during the last week to avoid duplicates""" last_week = datetime.datetime.now() - datetime.timedelta(days=7) resp = cls.query.filter(cls.identifier == identifier, cls.scheme == scheme, cls.harvester == harvester, cls.updated > str(last_week)).first() return resp is not None @classmethod def getStatsFromLastWeek(cls): """Gets the stats from the last 7 days""" last_week = datetime.datetime.now() - datetime.timedelta(days=7) resp = db.session.query(cls.status, func.count('*')).filter( cls.updated > str(last_week)).group_by(cls.status).all() return resp def __repr__(self): """String representation of the event.""" return f"<{self.id}: {self.created} : {self.identifier}>"
class Group(db.Model, Timestamp): """Group model.""" __tablename__ = 'group' id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True) type = db.Column(db.Enum(GroupType), nullable=False) identifiers = db.relationship(Identifier, secondary=lambda: Identifier2Group.__table__, backref='groups', viewonly=True) groups = db.relationship('Group', secondary=lambda: GroupM2M.__table__, primaryjoin=lambda: (Group.id == GroupM2M.group_id), secondaryjoin=lambda: (Group.id == GroupM2M.subgroup_id)) def __repr__(self): """String representation of the group.""" return f"<{self.id}: {self.type.name}>"
class ReanaWorkflow(db.Model): """Model defining a REANA workflow.""" __tablename__ = 'reana_workflows' id = db.Column(UUIDType, primary_key=True, default=uuid.uuid4, nullable=False) rec_uuid = db.Column(UUIDType, db.ForeignKey(RecordMetadata.id), nullable=False) user_id = db.Column(db.Integer, db.ForeignKey(User.id), nullable=False) workflow_id = db.Column(UUIDType, unique=True, nullable=False) name = db.Column(db.String(100), unique=False, nullable=False) workflow_name = db.Column(db.String(100), unique=False, nullable=False) workflow_name_run = db.Column(db.String(100), unique=False, nullable=False) status = db.Column(db.Enum('created', 'queued', 'running', 'stopped', 'failed', 'finished', 'deleted', name='status'), unique=False, nullable=False) # the following fields represent the creation part of a workflow workflow_json = db.Column(json_type, default=lambda: dict(), nullable=True) # logging after the workflow runs logs = db.Column(json_type, default=lambda: dict(), nullable=True) created = db.Column(db.DateTime, server_default=db.func.now()) updated = db.Column(db.DateTime, server_default=db.func.now(), server_onupdate=db.func.now()) user = db.relationship('User') record = db.relationship('RecordMetadata', backref=db.backref('reana_workflows', cascade='all, delete-orphan')) @classmethod def get_user_workflows(cls, user_id): """Get user workflows.""" workflows = cls.query \ .filter_by(user_id=user_id) \ .all() return workflows @classmethod def get_deposit_workflows(cls, depid): """Get deposit workflows.""" workflows = cls.query \ .filter_by(rec_uuid=depid) \ .all() return workflows @classmethod def get_workflow_by_id(cls, workflow_id): """Get workflow by id.""" return cls.query \ .filter_by(workflow_id=workflow_id) \ .one_or_none() def serialize(self): """Serialize schema model.""" return reana_workflow_serializer.dump(self).data
class ImporterImportLog(db.Model): """Store the ldap synchronization task history.""" __tablename__ = "importer_import_log" id = db.Column(db.Integer, primary_key=True) celery_task_id = db.Column(db.String) agent = db.Column(Enum(ImporterAgent), nullable=False) """The agent that initiated the task.""" status = db.Column(Enum(ImporterTaskStatus), nullable=False, default=ImporterTaskStatus.RUNNING) """The current status of the task.""" provider = db.Column(db.String, nullable=False) """The provider of the data. Unconstrained for extensibility purposes.""" source_type = db.Column(db.String, nullable=False) """The format of the source data.""" mode = db.Column(db.Enum(ImporterMode), nullable=False) """The chosen importation mode.""" original_filename = db.Column(db.String, nullable=False) """The original name of the imported file.""" start_time = db.Column(db.DateTime, nullable=False, default=lambda: datetime.now()) """Task start time.""" end_time = db.Column(db.DateTime, nullable=True) """Task end time (if not currently running).""" message = db.Column(db.String, nullable=True) """Message in case of an error.""" entries_count = db.Column(db.Integer, nullable=True) """Number of entries in source file.""" ignore_missing_rules = db.Column(db.Boolean) @classmethod def create(cls, data): """Create a new task log.""" log = cls(**data) db.session.add(log) db.session.commit() return log def is_running(self): """Check if the task is currently running.""" return self.status == ImporterTaskStatus.RUNNING def is_cancelled(self): """Check if the task is currently running.""" return self.status == ImporterTaskStatus.CANCELLED def finalize(self): """Finalize the import.""" if self.is_running(): self.set_succeeded() def set_succeeded(self): """Mark this task as complete and log output.""" assert self.is_running() self.status = ImporterTaskStatus.SUCCEEDED self.end_time = datetime.now() db.session.commit() def set_failed(self, exception): """Mark this task as failed.""" assert self.is_running() self.status = ImporterTaskStatus.FAILED self.end_time = datetime.now() self.message = _format_exception(exception) db.session.commit() def set_cancelled(self): """Mark the task as cancelled.""" self.status = ImporterTaskStatus.CANCELLED self.end_time = datetime.now() db.session.commit() def set_entries_count(self, entries): """Set logged entries count.""" self.entries_count = len(entries) db.session.commit()
class Task(db.Model, Timestamp): """Task database model.""" __tablename__ = 'flows_task' id = db.Column( UUIDType, primary_key=True, default=uuid.uuid4, ) """Task identifier.""" previous = db.Column( db.JSON().with_variant( postgresql.JSONB(none_as_null=True), 'postgresql', ).with_variant( JSONType(), 'sqlite', ).with_variant( JSONType(), 'mysql', ), default=lambda: list(), nullable=True, ) """List of tasks that need to run before this one, if any. This is mainly used by visuals to create the flow diagram. """ flow_id = db.Column( UUIDType, db.ForeignKey(Flow.id, onupdate="CASCADE", ondelete="CASCADE"), nullable=False, ) """Task flow instance.""" flow = db.relationship(Flow, backref='tasks') """Relationship to the Flow.""" name = db.Column(db.String, nullable=False) """Task name.""" payload = db.Column( db.JSON().with_variant( postgresql.JSONB(none_as_null=True), 'postgresql', ).with_variant( JSONType(), 'sqlite', ).with_variant( JSONType(), 'mysql', ), default=lambda: dict(), nullable=True, ) """Flow payload in JSON format, typically args and kwagrs.""" status = db.Column( db.Enum(Status), nullable=False, default=Status.PENDING, ) """Status of the task, i.e. pending, success, failure.""" message = db.Column(db.String, nullable=False, default='') """Task status message.""" @classmethod def create(cls, name, flow_id, id_=None, payload=None, previous=None): """Create a new Task.""" try: with db.session.begin_nested(): obj = cls( id=id_ or uuid.uuid4(), flow_id=flow_id, name=name, payload=payload or {}, previous=previous or [], ) db.session.add(obj) logger.info('Created new Flow %s', obj) except SQLAlchemyError: logger.exception( 'Failed to create Flow with %s, %s, %s, %s', id_, flow_id, name, payload, ) raise return obj @classmethod def get(cls, id_): """Get a task object from the DB.""" return cls.query.get(id_) def to_dict(self): """Task dictionary representation.""" return { 'id': str(self.id), 'flow_id': str(self.flow_id), 'created': self.created.isoformat(), 'updated': self.updated.isoformat(), 'name': self.name, 'payload': self.payload, 'status': str(self.status), 'message': self.message, 'previous': self.previous, }
class Relationship(db.Model, Timestamp): """Relationship between two identifiers.""" __tablename__ = 'relationship' __table_args__ = ( UniqueConstraint('source_id', 'target_id', 'relation', name='uq_relationship_source_target_relation'), Index('ix_relationship_source', 'source_id'), Index('ix_relationship_target', 'target_id'), Index('ix_relationship_relation', 'relation'), ) id = db.Column(UUIDType, default=uuid.uuid4, primary_key=True) source_id = db.Column( UUIDType, db.ForeignKey(Identifier.id, onupdate='CASCADE', ondelete='CASCADE', name='fk_relationship_source'), nullable=False ) target_id = db.Column( UUIDType, db.ForeignKey(Identifier.id, onupdate='CASCADE', ondelete='CASCADE', name='fk_relationship_target'), nullable=False ) relation = db.Column(db.Enum(Relation)) source = db.relationship(Identifier, foreign_keys=[source_id], backref='sources') target = db.relationship(Identifier, foreign_keys=[target_id], backref='targets') @classmethod def get(cls, source, target, relation, **kwargs): """Get the relationship from the database.""" return cls.query.filter_by( source_id=source.id, target_id=target.id, relation=relation).one_or_none() def fetch_or_create_id(self): """Fetches from the database or creates an id for the relationship.""" self.source = self.source.fetch_or_create_id() self.target = self.target.fetch_or_create_id() if not self.id: obj = self.get(self.source, self.target, self.relation) if obj: self = obj else: self.id = uuid.uuid4() return self @property def identity_group(self): """Get the relationship's identity group.""" # TODO: See if we can avoid this from ..graph.models import GroupRelationship, GroupType return GroupRelationship.query.filter_by( source=self.source.identity_group, target=self.target.identity_group, relation=self.relation, type=GroupType.Identity).one_or_none() @property def data(self): """Get the relationship's identity group metadata.""" if self.identity_group and self.identity_group.data: return self.identity_group.data.json def __repr__(self): """String representation of the relationship.""" return ( f'<{self.source.value} {self.relation.name} {self.target.value}>' )