class NonInteractivePipelineRun(PipelineRun): # https://docs.sqlalchemy.org/en/14/orm/inheritance.html # sqlalchemy has 3 kinds of inheritance: joined table, single table, # concrete. # # Concrete is, essentially, not recommended unsless you have a # reason to use it. Will also lead to FKs issues if the base table # is abstract. # # "ORM-enabled UPDATEs and DELETEs do not handle joined table # inheritance automatically." This means that, for example, that # updating a NonInteractivePipelineRun would not allow updating the # columns that belong to the InteractiveRun. This means that, for # for example, the update_status_db function from the utils module # would not work when updating the status of a non interactive run. # https://docs.sqlalchemy.org/en/14/orm/session_basics.html#update-and-delete-with-arbitrary-where-clause # # Single table inheritance is the inheritance of choice, mostly # because of the drawbacks of joined table inheritance. Setting the # tablename to None will result in using single table inheritance, # setting it to a string will result in using joined table # inheritance. # Note that single table inheritance will NOT create a new table for # each "child" of the inheritance. __tablename__ = None # TODO: verify why the job_uuid should be part of the # primary key job_uuid = db.Column(db.String(36), db.ForeignKey("jobs.uuid", ondelete="CASCADE"), index=True) # To what batch of non interactive runs of a job it belongs. The # first time a job runs will produce batch 1, then batch 2, etc. job_run_index = db.Column( db.Integer, nullable=False, server_default=text("0"), ) # This run_id is used to identify the pipeline run within the # job and maintain a consistent ordering. job_run_pipeline_run_index = db.Column(db.Integer, ) # The pipeline run number across all job runs of a job. pipeline_run_index = db.Column(db.Integer, ) # Parameters with which it was run, so that the history is kept. parameters = db.Column( JSONB, nullable=False, # This way migrated entries that did not have this column will # still be valid. server_default="{}", ) # related to inheriting from PipelineRun __mapper_args__ = { "polymorphic_identity": "NonInteractivePipelineRun", }
class InteractiveRunImageMapping(BaseModel): """Stores mappings between an interactive run and the environment images it uses. Used to understand if an image can be removed from the docker environment if it's not used by a run which is PENDING or STARTED. """ __tablename__ = "interactive_run_image_mapping" __table_args__ = ( UniqueConstraint("run_uuid", "orchest_environment_uuid"), UniqueConstraint("run_uuid", "docker_img_id"), ) run_uuid = db.Column( db.ForeignKey(InteractiveRun.run_uuid, ondelete="CASCADE"), unique=False, nullable=False, index=True, primary_key=True, ) orchest_environment_uuid = db.Column(db.String(36), unique=False, nullable=False, primary_key=True) docker_img_id = db.Column(db.String(), unique=False, nullable=False, primary_key=True) def __repr__(self): return (f"<InteractiveRunImageMapping: {self.run_uuid} | " f"{self.orchest_environment_uuid} | " f"{self.docker_img_id}>")
class Project(BaseModel): __tablename__ = "projects" uuid = db.Column(db.String(36), primary_key=True, nullable=False) env_variables = deferred( db.Column(JSONB, nullable=False, server_default="{}")) # Note that all relationships are lazy=select. pipelines = db.relationship("Pipeline", lazy="select", passive_deletes=True, cascade="all, delete") environment_builds = db.relationship("EnvironmentBuild", lazy="select", passive_deletes=True, cascade="all, delete") interactive_sessions = db.relationship("InteractiveSession", lazy="select", passive_deletes=True, cascade="all, delete") jobs = db.relationship("Job", lazy="select", passive_deletes=True, cascade="all, delete") pipeline_runs = db.relationship("PipelineRun", lazy="select", passive_deletes=True, cascade="all, delete")
class PipelineRun(db.Model): __tablename__ = 'pipelineruns' uuid = db.Column(db.String(255), unique=True, nullable=False, primary_key=True) id = db.Column(db.Integer(), unique=False) experiment = db.Column(db.ForeignKey("experiments.uuid")) parameter_json = db.Column(db.JSON, nullable=False)
class Pipeline(BaseModel): __tablename__ = "pipelines" project_uuid = db.Column( db.String(36), db.ForeignKey("projects.uuid", ondelete="CASCADE"), primary_key=True, ) uuid = db.Column(db.String(36), primary_key=True, nullable=False) env_variables = deferred( db.Column(JSONB, nullable=False, server_default="{}")) # Note that all relationships are lazy=select. interactive_sessions = db.relationship("InteractiveSession", lazy="select", passive_deletes=True, cascade="all, delete") jobs = db.relationship("Job", lazy="select", passive_deletes=True, cascade="all, delete") pipeline_runs = db.relationship("PipelineRun", lazy="select", passive_deletes=True, cascade="all, delete")
class InteractiveSession(BaseModel): __tablename__ = 'interactive_sessions' pipeline_uuid = db.Column( db.String(36), primary_key=True, ) status = db.Column( db.String(10), primary_key=False, ) # Used to connect to Jupyter notebook server. jupyter_server_ip = db.Column( db.String(15), unique=True, nullable=True, ) # IPv4 # Used to connect to Jupyter notebook server. notebook_server_info = db.Column( db.JSON, unique=True, nullable=True, ) # Docker container IDs. Used internally to identify the resources of # a specific session. container_ids = db.Column( db.JSON, unique=False, nullable=True, ) def __repr__(self): return f'<Launch {self.pipeline_uuid}>'
class PipelineRunPipelineStep(BaseModel): __abstract__ = True step_uuid = db.Column( db.String(36), primary_key=True ) status = db.Column( db.String(15), unique=False, nullable=True ) started_time = db.Column( db.DateTime, unique=False, nullable=True ) finished_time = db.Column( db.DateTime, unique=False, nullable=True ) def __repr__(self): return f'<{self.__class__.__name__}: {self.run_uuid}.{self.step_uuid}>'
class Project(db.Model): __tablename__ = "project" uuid = db.Column(db.String(255), nullable=False, primary_key=True) path = db.Column(db.String(255), nullable=False) __table_args__ = (UniqueConstraint("uuid", "path"), )
class Experiment(BaseModel): __tablename__ = 'experiments' __bind_key__ = 'persistent_db' experiment_uuid = db.Column( db.String(36), primary_key=True ) pipeline_uuid = db.Column( db.String(36), primary_key=False ) total_number_of_pipeline_runs = db.Column( db.Integer, unique=False, nullable=False, ) scheduled_start = db.Column( db.DateTime, nullable=False ) completed_pipeline_runs = db.Column( db.Integer, unique=False, default=0, ) pipeline_runs = db.relationship('NonInteractiveRun', lazy='joined') def __repr__(self): return f'<Experiment: {self.experiment_uuid}>'
class NonInteractiveRun(PipelineRun): __tablename__ = 'non_interactive_runs' __bind_key__ = 'persistent_db' experiment_uuid = db.Column( db.String(36), db.ForeignKey('experiments.experiment_uuid'), primary_key=True ) run_uuid = db.Column( db.String(36), primary_key=True ) # This run_id is used to identify the pipeline run within the # experiment and maintain a consistent ordering. pipeline_run_id = db.Column( db.Integer, unique=False, nullable=False, ) started_time = db.Column( db.DateTime, unique=False, nullable=True ) finished_time = db.Column( db.DateTime, unique=False, nullable=True ) pipeline_steps = db.relationship('NonInteractiveRunPipelineStep', lazy='joined')
class User(db.Model): __tablename__ = "users" uuid = db.Column( db.String(36), primary_key=True, # required to be referenced as a foreign key by the Token table, # since postgres does not accept foreign keys referencing non # unique fields unique=True, ) username = db.Column( db.String(255), primary_key=True, ) password_hash = db.Column( db.String(255), primary_key=True, ) created = db.Column( db.DateTime, unique=False, nullable=False, server_default=text("timezone('utc', now())"), )
class PipelineRun(db.Model): __tablename__ = "pipelineruns" uuid = db.Column(db.String(255), unique=True, nullable=False, primary_key=True) id = db.Column(db.Integer(), unique=False) job = db.Column(db.ForeignKey("jobs.uuid", ondelete="CASCADE")) parameter_json = db.Column(db.JSON, nullable=False)
class PipelineRun(BaseModel): __abstract__ = True pipeline_uuid = db.Column(db.String(36), unique=False, nullable=False) status = db.Column(db.String(15), unique=False, nullable=True) def __repr__(self): return f'<{self.__class__.__name__}: {self.run_uuid}>'
class Launch(BaseModel, db.Model): __tablename__ = 'launches' pipeline_uuid = db.Column(db.String(36), primary_key=True) server_ip = db.Column(db.String(15), unique=True, nullable=False) # IPv4 server_info = db.Column(db.JSON, unique=True, nullable=False) def __repr__(self): return f'<Launch {self.pipeline_uuid}>'
class Pipeline(db.Model): __tablename__ = "pipeline" uuid = db.Column(db.String(255), nullable=False, primary_key=True) project_uuid = db.Column(db.ForeignKey("project.uuid"), primary_key=True) path = db.Column(db.String(255), nullable=False) __table_args__ = (UniqueConstraint("uuid", "project_uuid"), )
class Pipeline(db.Model): __tablename__ = "pipelines" uuid = db.Column(db.String(255), nullable=False, primary_key=True) project_uuid = db.Column(db.ForeignKey("projects.uuid", ondelete="CASCADE"), primary_key=True) path = db.Column(db.String(255), nullable=False)
class Run(BaseModel, db.Model): __tablename__ = 'runs' run_uuid = db.Column(db.String(36), primary_key=True) pipeline_uuid = db.Column(db.String(36), unique=False, nullable=False) status = db.Column(db.String(15), unique=False, nullable=True) step_statuses = db.relationship('StepStatus', lazy='joined') def __repr__(self): return f'<Run {self.run_uuid}>'
class Image(db.Model): __tablename__ = 'images' name = db.Column(db.String(255), unique=True, nullable=False, primary_key=True) language = db.Column(db.String(255), nullable=False) created = db.Column(db.DateTime, nullable=False, default=datetime.datetime.utcnow) def __repr__(self): return f'<Images {self.name}:{self.language}>'
class NonInteractiveRunPipelineStep(PipelineRunPipelineStep): __tablename__ = "non_interactive_run_pipeline_steps" __bind_key__ = "persistent_db" experiment_uuid = db.Column(db.String(36), db.ForeignKey("experiments.experiment_uuid"), primary_key=True) run_uuid = db.Column(db.String(36), db.ForeignKey("non_interactive_runs.run_uuid"), primary_key=True)
class DataSource(db.Model): __tablename__ = 'datasources' name = db.Column(db.String(255), unique=True, nullable=False, primary_key=True) source_type = db.Column(db.String(100), nullable=False) connection_details = db.Column(db.JSON, nullable=False) created = db.Column(db.DateTime, nullable=False, default=datetime.datetime.utcnow) def __repr__(self): return f'<DataSource {self.name}:{self.source_type}>'
class Token(db.Model): __tablename__ = "tokens" token = db.Column(db.String(255)) user = db.Column(db.String(36), db.ForeignKey("users.uuid"), primary_key=True) created = db.Column( db.DateTime, unique=False, nullable=False, default=datetime.datetime.utcnow )
class Project(db.Model): __tablename__ = "project" uuid = db.Column(db.String(255), nullable=False, primary_key=True) path = db.Column(db.String(255), nullable=False) __table_args__ = (UniqueConstraint("uuid", "path"), ) experiments = db.relationship("Experiment", lazy="joined", passive_deletes=False, cascade="all, delete")
class DataSource(db.Model): __tablename__ = "datasources" name = db.Column(db.String(255), unique=True, nullable=False, primary_key=True) source_type = db.Column(db.String(100), nullable=False) connection_details = db.Column(db.JSON, nullable=False) created = db.Column( db.DateTime, nullable=False, server_default=text("timezone('utc', now())") ) def __repr__(self): return f"<DataSource {self.name}:{self.source_type}>"
class StepStatus(BaseModel, db.Model): __tablename__ = 'stepstatus' run_uuid = db.Column(db.String(36), db.ForeignKey('runs.run_uuid'), primary_key=True) step_uuid = db.Column(db.String(36), primary_key=True) status = db.Column(db.String(15), unique=False, nullable=True) started_time = db.Column(db.DateTime, unique=False, nullable=True) ended_time = db.Column(db.DateTime, unique=False, nullable=True) def __repr__(self): return f'<StepStatus {self.run_uuid}.{self.step_uuid}>'
class EnvironmentBuild(BaseModel): """State of environment builds. Table meant to store the state of the build task of an environment, i.e. when we need to build an image starting from a base image plus optional sh code. This is not related to keeping track of environments or images to decide if a project or pipeline can be run. """ __tablename__ = "environment_builds" __table_args__ = (Index("uuid_proj_env_index", "project_uuid", "environment_uuid"), ) # https://stackoverflow.com/questions/63164261/celery-task-id-max-length uuid = db.Column(db.String(36), primary_key=True, nullable=False) project_uuid = db.Column( db.String(36), db.ForeignKey("projects.uuid", ondelete="CASCADE"), primary_key=True, index=True, ) environment_uuid = db.Column(db.String(36), nullable=False, index=True) project_path = db.Column(db.String(4096), nullable=False, index=True) requested_time = db.Column(db.DateTime, unique=False, nullable=False) started_time = db.Column(db.DateTime, unique=False, nullable=True) finished_time = db.Column(db.DateTime, unique=False, nullable=True) status = db.Column(db.String(15), unique=False, nullable=True) def __repr__(self): return f"<EnvironmentBuildTask: {self.uuid}>"
class BackgroundTask(db.Model): """BackgroundTasks, models all tasks to be run in the background.""" __tablename__ = "background_tasks" task_uuid = db.Column(db.String(36), primary_key=True, unique=True, nullable=False) # see background_task_executor types task_type = db.Column(db.String(50), unique=False, nullable=True) status = db.Column(db.String(15), unique=False, nullable=False) code = db.Column(db.String(15), unique=False, nullable=True) result = db.Column(db.String(), unique=False, nullable=True) def __repr__(self): return f"<BackgroundTask: {self.task_uuid}>"
class InteractiveRun(PipelineRun): __tablename__ = "interactive_runs" run_uuid = db.Column(db.String(36), primary_key=True) pipeline_steps = db.relationship("InteractiveRunPipelineStep", lazy="joined")
class PipelineRunStep(BaseModel): __tablename__ = "pipeline_run_steps" run_uuid = db.Column( db.String(36), db.ForeignKey("pipeline_runs.uuid", ondelete="CASCADE"), primary_key=True, ) step_uuid = db.Column(db.String(36), primary_key=True) status = db.Column(db.String(15), unique=False, nullable=True) started_time = db.Column(db.DateTime, unique=False, nullable=True) finished_time = db.Column(db.DateTime, unique=False, nullable=True) def __repr__(self): return f"<{self.__class__.__name__}: {self.run_uuid}.{self.step_uuid}>"
class InteractiveRun(PipelineRun): __tablename__ = "interactive_runs" run_uuid = db.Column(db.String(36), primary_key=True) # https://docs.sqlalchemy.org/en/14/orm/cascades.html#using-foreign-key-on-delete-cascade-with-orm-relationships # In order to use ON DELETE foreign key cascades in conjunction # with relationship(), it’s important to note first and foremost # that the relationship.cascade setting must still be configured # to match the desired “delete” or “set null” behavior # Essentially, the specifed behaviour in the FK column # and the one specified in the relationship must match. pipeline_steps = db.relationship( "InteractiveRunPipelineStep", lazy="joined", # do not rely on the db to delete # TODO: can be set to true after we move away from sqllite passive_deletes=False, cascade="all, delete", ) image_mappings = db.relationship( "InteractiveRunImageMapping", lazy="joined", passive_deletes=False, cascade="all, delete", )
class Token(db.Model): __tablename__ = "tokens" token = db.Column(db.String(255)) user = db.Column(db.String(36), db.ForeignKey("users.uuid"), primary_key=True) created = db.Column( db.DateTime, unique=False, nullable=False, server_default=text("timezone('utc', now())"), )