Ejemplo n.º 1
0
class NonInteractivePipelineRun(PipelineRun):
    # https://docs.sqlalchemy.org/en/14/orm/inheritance.html
    # sqlalchemy has 3 kinds of inheritance: joined table, single table,
    # concrete.
    #
    # Concrete is, essentially, not recommended unsless you have a
    # reason to use it. Will also lead to FKs issues if the base table
    # is abstract.
    #
    # "ORM-enabled UPDATEs and DELETEs do not handle joined table
    # inheritance automatically." This means that, for example, that
    # updating a NonInteractivePipelineRun would not allow updating the
    # columns that belong to the InteractiveRun. This means that, for
    # for example, the update_status_db function from the utils module
    # would not work when updating the status of a non interactive run.
    # https://docs.sqlalchemy.org/en/14/orm/session_basics.html#update-and-delete-with-arbitrary-where-clause
    #
    # Single table inheritance is the inheritance of choice, mostly
    # because of the drawbacks of joined table inheritance. Setting the
    # tablename to None will result in using single table inheritance,
    # setting it to a string will result in using joined table
    # inheritance.
    # Note that single table inheritance will NOT create a new table for
    # each "child" of the inheritance.
    __tablename__ = None

    # TODO: verify why the job_uuid should be part of the
    # primary key
    job_uuid = db.Column(db.String(36),
                         db.ForeignKey("jobs.uuid", ondelete="CASCADE"),
                         index=True)

    # To what batch of non interactive runs of a job it belongs. The
    # first time a job runs will produce batch 1, then batch 2, etc.
    job_run_index = db.Column(
        db.Integer,
        nullable=False,
        server_default=text("0"),
    )

    # This run_id is used to identify the pipeline run within the
    # job and maintain a consistent ordering.
    job_run_pipeline_run_index = db.Column(db.Integer, )

    # The pipeline run number across all job runs of a job.
    pipeline_run_index = db.Column(db.Integer, )

    # Parameters with which it was run, so that the history is kept.
    parameters = db.Column(
        JSONB,
        nullable=False,
        # This way migrated entries that did not have this column will
        # still be valid.
        server_default="{}",
    )

    # related to inheriting from PipelineRun
    __mapper_args__ = {
        "polymorphic_identity": "NonInteractivePipelineRun",
    }
Ejemplo n.º 2
0
class InteractiveRunImageMapping(BaseModel):
    """Stores mappings between an interactive run and the environment
     images it uses.

    Used to understand if an image can be removed from the docker
    environment if it's not used by a run which is PENDING or STARTED.

    """

    __tablename__ = "interactive_run_image_mapping"
    __table_args__ = (
        UniqueConstraint("run_uuid", "orchest_environment_uuid"),
        UniqueConstraint("run_uuid", "docker_img_id"),
    )

    run_uuid = db.Column(
        db.ForeignKey(InteractiveRun.run_uuid, ondelete="CASCADE"),
        unique=False,
        nullable=False,
        index=True,
        primary_key=True,
    )
    orchest_environment_uuid = db.Column(db.String(36),
                                         unique=False,
                                         nullable=False,
                                         primary_key=True)
    docker_img_id = db.Column(db.String(),
                              unique=False,
                              nullable=False,
                              primary_key=True)

    def __repr__(self):
        return (f"<InteractiveRunImageMapping: {self.run_uuid} | "
                f"{self.orchest_environment_uuid} | "
                f"{self.docker_img_id}>")
Ejemplo n.º 3
0
class Project(BaseModel):
    __tablename__ = "projects"

    uuid = db.Column(db.String(36), primary_key=True, nullable=False)
    env_variables = deferred(
        db.Column(JSONB, nullable=False, server_default="{}"))

    # Note that all relationships are lazy=select.
    pipelines = db.relationship("Pipeline",
                                lazy="select",
                                passive_deletes=True,
                                cascade="all, delete")
    environment_builds = db.relationship("EnvironmentBuild",
                                         lazy="select",
                                         passive_deletes=True,
                                         cascade="all, delete")
    interactive_sessions = db.relationship("InteractiveSession",
                                           lazy="select",
                                           passive_deletes=True,
                                           cascade="all, delete")
    jobs = db.relationship("Job",
                           lazy="select",
                           passive_deletes=True,
                           cascade="all, delete")
    pipeline_runs = db.relationship("PipelineRun",
                                    lazy="select",
                                    passive_deletes=True,
                                    cascade="all, delete")
Ejemplo n.º 4
0
class PipelineRun(db.Model):
    __tablename__ = 'pipelineruns'

    uuid = db.Column(db.String(255), unique=True, nullable=False, primary_key=True)
    id = db.Column(db.Integer(), unique=False)
    experiment = db.Column(db.ForeignKey("experiments.uuid"))
    parameter_json = db.Column(db.JSON, nullable=False)
Ejemplo n.º 5
0
class Pipeline(BaseModel):
    __tablename__ = "pipelines"

    project_uuid = db.Column(
        db.String(36),
        db.ForeignKey("projects.uuid", ondelete="CASCADE"),
        primary_key=True,
    )
    uuid = db.Column(db.String(36), primary_key=True, nullable=False)
    env_variables = deferred(
        db.Column(JSONB, nullable=False, server_default="{}"))

    # Note that all relationships are lazy=select.
    interactive_sessions = db.relationship("InteractiveSession",
                                           lazy="select",
                                           passive_deletes=True,
                                           cascade="all, delete")
    jobs = db.relationship("Job",
                           lazy="select",
                           passive_deletes=True,
                           cascade="all, delete")
    pipeline_runs = db.relationship("PipelineRun",
                                    lazy="select",
                                    passive_deletes=True,
                                    cascade="all, delete")
Ejemplo n.º 6
0
class InteractiveSession(BaseModel):
    __tablename__ = 'interactive_sessions'
    pipeline_uuid = db.Column(
        db.String(36),
        primary_key=True,
    )
    status = db.Column(
        db.String(10),
        primary_key=False,
    )
    # Used to connect to Jupyter notebook server.
    jupyter_server_ip = db.Column(
        db.String(15),
        unique=True,
        nullable=True,
    )  # IPv4
    # Used to connect to Jupyter notebook server.
    notebook_server_info = db.Column(
        db.JSON,
        unique=True,
        nullable=True,
    )
    # Docker container IDs. Used internally to identify the resources of
    # a specific session.
    container_ids = db.Column(
        db.JSON,
        unique=False,
        nullable=True,
    )

    def __repr__(self):
        return f'<Launch {self.pipeline_uuid}>'
Ejemplo n.º 7
0
class PipelineRunPipelineStep(BaseModel):
    __abstract__ = True

    step_uuid = db.Column(
        db.String(36),
        primary_key=True
    )
    status = db.Column(
        db.String(15),
        unique=False,
        nullable=True
    )
    started_time = db.Column(
        db.DateTime,
        unique=False,
        nullable=True
    )
    finished_time = db.Column(
        db.DateTime,
        unique=False,
        nullable=True
    )

    def __repr__(self):
        return f'<{self.__class__.__name__}: {self.run_uuid}.{self.step_uuid}>'
Ejemplo n.º 8
0
class Project(db.Model):
    __tablename__ = "project"

    uuid = db.Column(db.String(255), nullable=False, primary_key=True)
    path = db.Column(db.String(255), nullable=False)

    __table_args__ = (UniqueConstraint("uuid", "path"), )
Ejemplo n.º 9
0
class Experiment(BaseModel):
    __tablename__ = 'experiments'
    __bind_key__ = 'persistent_db'

    experiment_uuid = db.Column(
        db.String(36),
        primary_key=True
    )
    pipeline_uuid = db.Column(
        db.String(36),
        primary_key=False
    )
    total_number_of_pipeline_runs = db.Column(
        db.Integer,
        unique=False,
        nullable=False,
    )
    scheduled_start = db.Column(
        db.DateTime,
        nullable=False
    )
    completed_pipeline_runs = db.Column(
        db.Integer,
        unique=False,
        default=0,
    )

    pipeline_runs = db.relationship('NonInteractiveRun', lazy='joined')

    def __repr__(self):
        return f'<Experiment: {self.experiment_uuid}>'
Ejemplo n.º 10
0
class NonInteractiveRun(PipelineRun):
    __tablename__ = 'non_interactive_runs'
    __bind_key__ = 'persistent_db'

    experiment_uuid = db.Column(
        db.String(36),
        db.ForeignKey('experiments.experiment_uuid'),
        primary_key=True
    )
    run_uuid = db.Column(
        db.String(36),
        primary_key=True
    )
    # This run_id is used to identify the pipeline run within the
    # experiment and maintain a consistent ordering.
    pipeline_run_id = db.Column(
        db.Integer,
        unique=False,
        nullable=False,
    )
    started_time = db.Column(
        db.DateTime,
        unique=False,
        nullable=True
    )
    finished_time = db.Column(
        db.DateTime,
        unique=False,
        nullable=True
    )

    pipeline_steps = db.relationship('NonInteractiveRunPipelineStep', lazy='joined')
Ejemplo n.º 11
0
class User(db.Model):

    __tablename__ = "users"

    uuid = db.Column(
        db.String(36),
        primary_key=True,
        # required to be referenced as a foreign key by the Token table,
        # since postgres does not accept foreign keys referencing non
        # unique fields
        unique=True,
    )

    username = db.Column(
        db.String(255),
        primary_key=True,
    )

    password_hash = db.Column(
        db.String(255),
        primary_key=True,
    )

    created = db.Column(
        db.DateTime,
        unique=False,
        nullable=False,
        server_default=text("timezone('utc', now())"),
    )
Ejemplo n.º 12
0
class PipelineRun(db.Model):
    __tablename__ = "pipelineruns"

    uuid = db.Column(db.String(255), unique=True, nullable=False, primary_key=True)
    id = db.Column(db.Integer(), unique=False)
    job = db.Column(db.ForeignKey("jobs.uuid", ondelete="CASCADE"))
    parameter_json = db.Column(db.JSON, nullable=False)
Ejemplo n.º 13
0
class PipelineRun(BaseModel):
    __abstract__ = True

    pipeline_uuid = db.Column(db.String(36), unique=False, nullable=False)
    status = db.Column(db.String(15), unique=False, nullable=True)

    def __repr__(self):
        return f'<{self.__class__.__name__}: {self.run_uuid}>'
Ejemplo n.º 14
0
class Launch(BaseModel, db.Model):
    __tablename__ = 'launches'
    pipeline_uuid = db.Column(db.String(36), primary_key=True)
    server_ip = db.Column(db.String(15), unique=True, nullable=False)  # IPv4
    server_info = db.Column(db.JSON, unique=True, nullable=False)

    def __repr__(self):
        return f'<Launch {self.pipeline_uuid}>'
Ejemplo n.º 15
0
class Pipeline(db.Model):
    __tablename__ = "pipeline"

    uuid = db.Column(db.String(255), nullable=False, primary_key=True)
    project_uuid = db.Column(db.ForeignKey("project.uuid"), primary_key=True)
    path = db.Column(db.String(255), nullable=False)

    __table_args__ = (UniqueConstraint("uuid", "project_uuid"), )
Ejemplo n.º 16
0
class Pipeline(db.Model):
    __tablename__ = "pipelines"

    uuid = db.Column(db.String(255), nullable=False, primary_key=True)
    project_uuid = db.Column(db.ForeignKey("projects.uuid",
                                           ondelete="CASCADE"),
                             primary_key=True)
    path = db.Column(db.String(255), nullable=False)
Ejemplo n.º 17
0
class Run(BaseModel, db.Model):
    __tablename__ = 'runs'
    run_uuid = db.Column(db.String(36), primary_key=True)
    pipeline_uuid = db.Column(db.String(36), unique=False, nullable=False)
    status = db.Column(db.String(15), unique=False, nullable=True)
    step_statuses = db.relationship('StepStatus', lazy='joined')

    def __repr__(self):
        return f'<Run {self.run_uuid}>'
Ejemplo n.º 18
0
class Image(db.Model):
    __tablename__ = 'images'
    
    name = db.Column(db.String(255), unique=True, nullable=False, primary_key=True)
    language = db.Column(db.String(255), nullable=False)
    created = db.Column(db.DateTime, nullable=False, default=datetime.datetime.utcnow)

    def __repr__(self):
        return f'<Images {self.name}:{self.language}>'
Ejemplo n.º 19
0
class NonInteractiveRunPipelineStep(PipelineRunPipelineStep):
    __tablename__ = "non_interactive_run_pipeline_steps"
    __bind_key__ = "persistent_db"

    experiment_uuid = db.Column(db.String(36),
                                db.ForeignKey("experiments.experiment_uuid"),
                                primary_key=True)
    run_uuid = db.Column(db.String(36),
                         db.ForeignKey("non_interactive_runs.run_uuid"),
                         primary_key=True)
Ejemplo n.º 20
0
class DataSource(db.Model):
    __tablename__ = 'datasources'

    name = db.Column(db.String(255), unique=True, nullable=False, primary_key=True)
    source_type = db.Column(db.String(100), nullable=False)
    connection_details = db.Column(db.JSON, nullable=False)
    created = db.Column(db.DateTime, nullable=False, default=datetime.datetime.utcnow)

    def __repr__(self):
        return f'<DataSource {self.name}:{self.source_type}>'
Ejemplo n.º 21
0
class Token(db.Model):

    __tablename__ = "tokens"

    token = db.Column(db.String(255))

    user = db.Column(db.String(36), db.ForeignKey("users.uuid"), primary_key=True)

    created = db.Column(
        db.DateTime, unique=False, nullable=False, default=datetime.datetime.utcnow
    )
Ejemplo n.º 22
0
class Project(db.Model):
    __tablename__ = "project"

    uuid = db.Column(db.String(255), nullable=False, primary_key=True)
    path = db.Column(db.String(255), nullable=False)

    __table_args__ = (UniqueConstraint("uuid", "path"), )
    experiments = db.relationship("Experiment",
                                  lazy="joined",
                                  passive_deletes=False,
                                  cascade="all, delete")
Ejemplo n.º 23
0
class DataSource(db.Model):
    __tablename__ = "datasources"

    name = db.Column(db.String(255), unique=True, nullable=False, primary_key=True)
    source_type = db.Column(db.String(100), nullable=False)
    connection_details = db.Column(db.JSON, nullable=False)
    created = db.Column(
        db.DateTime, nullable=False, server_default=text("timezone('utc', now())")
    )

    def __repr__(self):
        return f"<DataSource {self.name}:{self.source_type}>"
Ejemplo n.º 24
0
class StepStatus(BaseModel, db.Model):
    __tablename__ = 'stepstatus'
    run_uuid = db.Column(db.String(36),
                         db.ForeignKey('runs.run_uuid'),
                         primary_key=True)
    step_uuid = db.Column(db.String(36), primary_key=True)
    status = db.Column(db.String(15), unique=False, nullable=True)
    started_time = db.Column(db.DateTime, unique=False, nullable=True)
    ended_time = db.Column(db.DateTime, unique=False, nullable=True)

    def __repr__(self):
        return f'<StepStatus {self.run_uuid}.{self.step_uuid}>'
Ejemplo n.º 25
0
class EnvironmentBuild(BaseModel):
    """State of environment builds.

    Table meant to store the state of the build task of an environment,
    i.e. when we need to build an image starting from a base image plus
    optional sh code. This is not related to keeping track of
    environments or images to decide if a project or pipeline can be
    run.

    """

    __tablename__ = "environment_builds"
    __table_args__ = (Index("uuid_proj_env_index", "project_uuid",
                            "environment_uuid"), )

    # https://stackoverflow.com/questions/63164261/celery-task-id-max-length
    uuid = db.Column(db.String(36), primary_key=True, nullable=False)
    project_uuid = db.Column(
        db.String(36),
        db.ForeignKey("projects.uuid", ondelete="CASCADE"),
        primary_key=True,
        index=True,
    )
    environment_uuid = db.Column(db.String(36), nullable=False, index=True)
    project_path = db.Column(db.String(4096), nullable=False, index=True)
    requested_time = db.Column(db.DateTime, unique=False, nullable=False)
    started_time = db.Column(db.DateTime, unique=False, nullable=True)
    finished_time = db.Column(db.DateTime, unique=False, nullable=True)
    status = db.Column(db.String(15), unique=False, nullable=True)

    def __repr__(self):
        return f"<EnvironmentBuildTask: {self.uuid}>"
Ejemplo n.º 26
0
class BackgroundTask(db.Model):
    """BackgroundTasks, models all tasks to be run in the background."""

    __tablename__ = "background_tasks"

    task_uuid = db.Column(db.String(36), primary_key=True, unique=True, nullable=False)
    # see background_task_executor types
    task_type = db.Column(db.String(50), unique=False, nullable=True)
    status = db.Column(db.String(15), unique=False, nullable=False)
    code = db.Column(db.String(15), unique=False, nullable=True)
    result = db.Column(db.String(), unique=False, nullable=True)

    def __repr__(self):
        return f"<BackgroundTask: {self.task_uuid}>"
Ejemplo n.º 27
0
class InteractiveRun(PipelineRun):
    __tablename__ = "interactive_runs"

    run_uuid = db.Column(db.String(36), primary_key=True)

    pipeline_steps = db.relationship("InteractiveRunPipelineStep",
                                     lazy="joined")
Ejemplo n.º 28
0
class PipelineRunStep(BaseModel):
    __tablename__ = "pipeline_run_steps"

    run_uuid = db.Column(
        db.String(36),
        db.ForeignKey("pipeline_runs.uuid", ondelete="CASCADE"),
        primary_key=True,
    )

    step_uuid = db.Column(db.String(36), primary_key=True)
    status = db.Column(db.String(15), unique=False, nullable=True)
    started_time = db.Column(db.DateTime, unique=False, nullable=True)
    finished_time = db.Column(db.DateTime, unique=False, nullable=True)

    def __repr__(self):
        return f"<{self.__class__.__name__}: {self.run_uuid}.{self.step_uuid}>"
Ejemplo n.º 29
0
class InteractiveRun(PipelineRun):
    __tablename__ = "interactive_runs"

    run_uuid = db.Column(db.String(36), primary_key=True)

    # https://docs.sqlalchemy.org/en/14/orm/cascades.html#using-foreign-key-on-delete-cascade-with-orm-relationships
    # In order to use ON DELETE foreign key cascades in conjunction
    # with relationship(), it’s important to note first and foremost
    # that the relationship.cascade setting must still be configured
    # to match the desired “delete” or “set null” behavior
    # Essentially, the specifed behaviour in the FK column
    # and the one specified in the relationship must match.
    pipeline_steps = db.relationship(
        "InteractiveRunPipelineStep",
        lazy="joined",
        # do not rely on the db to delete
        # TODO: can be set to true after we move away from sqllite
        passive_deletes=False,
        cascade="all, delete",
    )
    image_mappings = db.relationship(
        "InteractiveRunImageMapping",
        lazy="joined",
        passive_deletes=False,
        cascade="all, delete",
    )
Ejemplo n.º 30
0
class Token(db.Model):

    __tablename__ = "tokens"

    token = db.Column(db.String(255))

    user = db.Column(db.String(36),
                     db.ForeignKey("users.uuid"),
                     primary_key=True)

    created = db.Column(
        db.DateTime,
        unique=False,
        nullable=False,
        server_default=text("timezone('utc', now())"),
    )