class SqlExperiment(Base): __tablename__ = 'experiments' experiment_id = Column(Integer, autoincrement=True) name = Column(String(256), unique=True, nullable=False) artifact_location = Column(String(256), nullable=True) lifecycle_stage = Column(String(32), default=LifecycleStage.ACTIVE) __table_args__ = (CheckConstraint(lifecycle_stage.in_( LifecycleStage.view_type_to_stages(ViewType.ALL)), name='lifecycle_stage'), PrimaryKeyConstraint('experiment_id', name='experiment_pk')) def __repr__(self): return '<SqlExperiment ({}, {})>'.format(self.experiment_id, self.name) def to_mlflow_entity(self): return _create_entity(Experiment, self)
class SqlExperiment(Base): """ DB model for :py:class:`mlflow.entities.Experiment`. These are recorded in ``experiment`` table. """ __tablename__ = 'experiments' experiment_id = Column(Integer, autoincrement=True) """ Experiment ID: `Integer`. *Primary Key* for ``experiment`` table. """ name = Column(String(256), unique=True, nullable=False) """ Experiment name: `String` (limit 256 characters). Defined as *Unique* and *Non null* in table schema. """ artifact_location = Column(String(256), nullable=True) """ Default artifact location for this experiment: `String` (limit 256 characters). Defined as *Non null* in table schema. """ lifecycle_stage = Column(String(32), default=LifecycleStage.ACTIVE) """ Lifecycle Stage of experiment: `String` (limit 32 characters). Can be either ``active`` (default) or ``deleted``. """ __table_args__ = ( CheckConstraint( lifecycle_stage.in_(LifecycleStage.view_type_to_stages(ViewType.ALL)), name='lifecycle_stage'), PrimaryKeyConstraint('experiment_id', name='experiment_pk') ) def __repr__(self): return '<SqlExperiment ({}, {})>'.format(self.experiment_id, self.name) def to_mlflow_entity(self): """ Convert DB model to corresponding MLflow entity. :return: :py:class:`mlflow.entities.Experiment`. """ return _create_entity(Experiment, self)
def _list_experiments(self, session, ids=None, names=None, view_type=ViewType.ACTIVE_ONLY, eager=False): """ :param eager: If ``True``, eagerly loads each experiments's tags. If ``False``, these tags are not eagerly loaded and will be loaded if/when their corresponding object properties are accessed from a resulting ``SqlExperiment`` object. """ stages = LifecycleStage.view_type_to_stages(view_type) conditions = [SqlExperiment.lifecycle_stage.in_(stages)] if ids and len(ids) > 0: int_ids = [int(eid) for eid in ids] conditions.append(SqlExperiment.experiment_id.in_(int_ids)) if names and len(names) > 0: conditions.append(SqlExperiment.name.in_(names)) query_options = self._get_eager_experiment_query_options() if eager else [] return session \ .query(SqlExperiment) \ .options(*query_options) \ .filter(*conditions) \ .all()
def _get_experiment(self, session, experiment_id, view_type, eager=False): """ :param eager: If ``True``, eagerly loads the experiments's tags. If ``False``, these tags are not eagerly loaded and will be loaded if/when their corresponding object properties are accessed from the resulting ``SqlExperiment`` object. """ experiment_id = experiment_id or SqlAlchemyStore.DEFAULT_EXPERIMENT_ID stages = LifecycleStage.view_type_to_stages(view_type) query_options = self._get_eager_experiment_query_options( ) if eager else [] experiment = (session.query(SqlExperiment).options( *query_options).filter( SqlExperiment.experiment_id == experiment_id, SqlExperiment.lifecycle_stage.in_(stages), ).one_or_none()) if experiment is None: raise MlflowException( "No Experiment with id={} exists".format(experiment_id), RESOURCE_DOES_NOT_EXIST) return experiment
def _search_runs(self, experiment_ids, filter_string, run_view_type, max_results, order_by, page_token): # TODO: push search query into backend database layer if max_results > SEARCH_MAX_RESULTS_THRESHOLD: raise MlflowException( "Invalid value for request parameter max_results. It must be at " "most {}, but got value {}".format( SEARCH_MAX_RESULTS_THRESHOLD, max_results), INVALID_PARAMETER_VALUE) stages = set(LifecycleStage.view_type_to_stages(run_view_type)) with self.ManagedSessionMaker() as session: # Fetch the appropriate runs and eagerly load their summary metrics, params, and # tags. These run attributes are referenced during the invocation of # ``run.to_mlflow_entity()``, so eager loading helps avoid additional database queries # that are otherwise executed at attribute access time under a lazy loading model. query = session.query(SqlRun) parsed = SearchUtils.parse_search_filter(filter_string) for s in _get_sqlalchemy_filter_clauses(parsed, session): query = query.join(s) queried_runs = query.distinct() \ .options(*self._get_eager_run_query_options()) \ .filter( SqlRun.experiment_id.in_(experiment_ids), SqlRun.lifecycle_stage.in_(stages), *_get_attributes_filtering_clauses(parsed)) \ .all() runs = [run.to_mlflow_entity() for run in queried_runs] sorted_runs = SearchUtils.sort(runs, order_by) runs, next_page_token = SearchUtils.paginate(sorted_runs, page_token, max_results) return runs, next_page_token
class SqlRun(Base): """ DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table. """ __tablename__ = "runs" run_uuid = Column(String(32), nullable=False) """ Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table. """ name = Column(String(250)) """ Run name: `String` (limit 250 characters). """ source_type = Column(String(20), default=SourceType.to_string(SourceType.LOCAL)) """ Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``, ``LOCAL`` (default), or ``UNKNOWN``. """ source_name = Column(String(500)) """ Name of source recording the run: `String` (limit 500 characters). """ entry_point_name = Column(String(50)) """ Entry-point name that launched the run run: `String` (limit 50 characters). """ user_id = Column(String(256), nullable=True, default=None) """ User ID: `String` (limit 256 characters). Defaults to ``null``. """ status = Column(String(20), default=RunStatus.to_string(RunStatus.SCHEDULED)) """ Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default), ``FINISHED``, ``FAILED``. """ start_time = Column(BigInteger, default=int(time.time())) """ Run start time: `BigInteger`. Defaults to current system time. """ end_time = Column(BigInteger, nullable=True, default=None) """ Run end time: `BigInteger`. """ source_version = Column(String(50)) """ Source version: `String` (limit 50 characters). """ lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE) """ Lifecycle Stage of run: `String` (limit 32 characters). Can be either ``active`` (default) or ``deleted``. """ artifact_uri = Column(String(200), default=None) """ Default artifact location for this run: `String` (limit 200 characters). """ experiment_id = Column(Integer, ForeignKey("experiments.experiment_id")) """ Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table. """ experiment = relationship("SqlExperiment", backref=backref("runs", cascade="all")) """ SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`. """ __table_args__ = ( CheckConstraint(source_type.in_(SourceTypes), name="source_type"), CheckConstraint(status.in_(RunStatusTypes), name="status"), CheckConstraint( lifecycle_stage.in_( LifecycleStage.view_type_to_stages(ViewType.ALL)), name="runs_lifecycle_stage", ), PrimaryKeyConstraint("run_uuid", name="run_pk"), ) @staticmethod def get_attribute_name(mlflow_attribute_name): """ Resolves an MLflow attribute name to a `SqlRun` attribute name. """ # Currently, MLflow Search attributes defined in `SearchUtils.VALID_SEARCH_ATTRIBUTE_KEYS` # share the same names as their corresponding `SqlRun` attributes. Therefore, this function # returns the same attribute name return mlflow_attribute_name def to_mlflow_entity(self): """ Convert DB model to corresponding MLflow entity. :return: :py:class:`mlflow.entities.Run`. """ run_info = RunInfo( run_uuid=self.run_uuid, run_id=self.run_uuid, experiment_id=str(self.experiment_id), user_id=self.user_id, status=self.status, start_time=self.start_time, end_time=self.end_time, lifecycle_stage=self.lifecycle_stage, artifact_uri=self.artifact_uri, ) run_data = RunData( metrics=[m.to_mlflow_entity() for m in self.latest_metrics], params=[p.to_mlflow_entity() for p in self.params], tags=[t.to_mlflow_entity() for t in self.tags], ) return Run(run_info=run_info, run_data=run_data)
def _list_runs(self, experiment_id, run_view_type): exp = self._list_experiments(ids=[experiment_id], view_type=ViewType.ALL).first() stages = set(LifecycleStage.view_type_to_stages(run_view_type)) return [run for run in exp.runs if run.lifecycle_stage in stages]
class SqlRun(Base): """ DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table. """ __tablename__ = 'runs' run_uuid = Column(String(32), nullable=False) """ Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table. """ name = Column(String(250)) """ Run name: `String` (limit 250 characters). """ source_type = Column(String(20), default=SourceType.to_string(SourceType.LOCAL)) """ Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``, ``LOCAL`` (default), or ``UNKNOWN``. """ source_name = Column(String(500)) """ Name of source recording the run: `String` (limit 500 characters). """ entry_point_name = Column(String(50)) """ Entry-point name that launched the run run: `String` (limit 50 characters). """ user_id = Column(String(256), nullable=True, default=None) """ User ID: `String` (limit 256 characters). Defaults to ``null``. """ status = Column(String(20), default=RunStatus.to_string(RunStatus.SCHEDULED)) """ Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default), ``FINISHED``, ``FAILED``. """ start_time = Column(BigInteger, default=int(time.time())) """ Run start time: `BigInteger`. Defaults to current system time. """ end_time = Column(BigInteger, nullable=True, default=None) """ Run end time: `BigInteger`. """ source_version = Column(String(50)) """ Source version: `String` (limit 50 characters). """ lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE) """ Lifecycle Stage of run: `String` (limit 32 characters). Can be either ``active`` (default) or ``deleted``. """ artifact_uri = Column(String(200), default=None) """ Default artifact location for this run: `String` (limit 200 characters). """ experiment_id = Column(Integer, ForeignKey('experiments.experiment_id')) """ Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table. """ experiment = relationship('SqlExperiment', backref=backref('runs', cascade='all')) """ SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`. """ __table_args__ = (CheckConstraint(source_type.in_(SourceTypes), name='source_type'), CheckConstraint(status.in_(RunStatusTypes), name='status'), CheckConstraint(lifecycle_stage.in_( LifecycleStage.view_type_to_stages(ViewType.ALL)), name='runs_lifecycle_stage'), PrimaryKeyConstraint('run_uuid', name='run_pk')) def to_mlflow_entity(self): """ Convert DB model to corresponding MLflow entity. :return: :py:class:`mlflow.entities.Run`. """ run_info = RunInfo(run_uuid=self.run_uuid, run_id=self.run_uuid, experiment_id=str(self.experiment_id), user_id=self.user_id, status=self.status, start_time=self.start_time, end_time=self.end_time, lifecycle_stage=self.lifecycle_stage, artifact_uri=self.artifact_uri) # only get latest recorded metrics per key all_metrics = [m.to_mlflow_entity() for m in self.metrics] metrics = {} for m in all_metrics: existing_metric = metrics.get(m.key) if (existing_metric is None)\ or ((m.step, m.timestamp, m.value) >= (existing_metric.step, existing_metric.timestamp, existing_metric.value)): metrics[m.key] = m run_data = RunData(metrics=list(metrics.values()), params=[p.to_mlflow_entity() for p in self.params], tags=[t.to_mlflow_entity() for t in self.tags]) return Run(run_info=run_info, run_data=run_data)
class SqlRun(Base): """ DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table. """ __tablename__ = 'runs' run_uuid = Column(String(32), nullable=False) """ Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table. """ name = Column(String(250)) """ Run name: `String` (limit 250 characters). """ source_type = Column(String(20), default=SourceType.to_string(SourceType.LOCAL)) """ Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``, ``LOCAL`` (default), or ``UNKNOWN``. """ source_name = Column(String(500)) """ Name of source recording the run: `String` (limit 500 characters). """ entry_point_name = Column(String(50)) """ Entry-point name that launched the run run: `String` (limit 50 characters). """ user_id = Column(String(256), nullable=True, default=None) """ User ID: `String` (limit 256 characters). Defaults to ``null``. """ status = Column(String(20), default=RunStatus.to_string(RunStatus.SCHEDULED)) """ Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default), ``FINISHED``, ``FAILED``. """ start_time = Column(BigInteger, default=int(time.time())) """ Run start time: `BigInteger`. Defaults to current system time. """ end_time = Column(BigInteger, nullable=True, default=None) """ Run end time: `BigInteger`. """ source_version = Column(String(50)) """ Source version: `String` (limit 50 characters). """ lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE) """ Lifecycle Stage of run: `String` (limit 32 characters). Can be either ``active`` (default) or ``deleted``. """ artifact_uri = Column(String(200), default=None) """ Default artifact location for this run: `String` (limit 200 characters). """ experiment_id = Column(Integer, ForeignKey('experiments.experiment_id')) """ Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table. """ experiment = relationship('SqlExperiment', backref=backref('runs', cascade='all')) """ SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`. """ __table_args__ = ( CheckConstraint(source_type.in_(SourceTypes), name='source_type'), CheckConstraint(status.in_(RunStatusTypes), name='status'), CheckConstraint(lifecycle_stage.in_(LifecycleStage.view_type_to_stages(ViewType.ALL)), name='lifecycle_stage'), PrimaryKeyConstraint('run_uuid', name='run_pk') ) def to_mlflow_entity(self): """ Convert DB model to corresponding MLflow entity. :return: :py:class:`mlflow.entities.Run`. """ # run has diff parameter names in __init__ than in properties_ so we do this manually info = _create_entity(RunInfo, self) data = _create_entity(RunData, self) return Run(run_info=info, run_data=data)
RunStatus.to_string(RunStatus.SCHEDULED), RunStatus.to_string(RunStatus.FAILED), RunStatus.to_string(RunStatus.FINISHED), RunStatus.to_string(RunStatus.RUNNING), RunStatus.to_string(RunStatus.KILLED) ] # Certain SQL backends (e.g., SQLite) do not preserve CHECK constraints during migrations. # For these backends, CHECK constraints must be specified as table arguments. Here, we define # the collection of CHECK constraints that should be preserved when performing the migration. # The "status" constraint is excluded from this set because it is explicitly modified # within the migration's `upgrade()` routine. check_constraint_table_args = [ CheckConstraint(SqlRun.source_type.in_(SourceTypes), name='source_type'), CheckConstraint(SqlRun.lifecycle_stage.in_( LifecycleStage.view_type_to_stages(ViewType.ALL)), name='runs_lifecycle_stage'), ] def upgrade(): with op.batch_alter_table( "runs", table_args=check_constraint_table_args) as batch_op: # Transform the "status" column to an `Enum` and define a new check constraint. Specify # `native_enum=False` to create a check constraint rather than a # database-backend-dependent enum (see https://docs.sqlalchemy.org/en/13/core/ # type_basics.html#sqlalchemy.types.Enum.params.native_enum) batch_op.alter_column("status", type_=Enum(*new_run_statuses, create_constraint=True, native_enum=False))