def create_run(self, experiment_id, user_id, start_time, tags): with self.ManagedSessionMaker() as session: experiment = self.get_experiment(experiment_id) self._check_experiment_is_active(experiment) run_id = uuid.uuid4().hex artifact_location = append_to_uri_path( experiment.artifact_location, run_id, SqlAlchemyStore.ARTIFACTS_FOLDER_NAME) run = SqlRun( name="", artifact_uri=artifact_location, run_uuid=run_id, experiment_id=experiment_id, source_type=SourceType.to_string(SourceType.UNKNOWN), source_name="", entry_point_name="", user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING), start_time=start_time, end_time=None, source_version="", lifecycle_stage=LifecycleStage.ACTIVE, ) tags_dict = {} for tag in tags: tags_dict[tag.key] = tag.value run.tags = [ SqlTag(key=key, value=value) for key, value in tags_dict.items() ] self._save_to_db(objs=run, session=session) return run.to_mlflow_entity()
def _get_orderby_clauses(order_by_list, session): """Sorts a set of runs based on their natural ordering and an overriding set of order_bys. Runs are naturally ordered first by start time descending, then by run id for tie-breaking. """ clauses = [] ordering_joins = [] clause_id = 0 # contrary to filters, it is not easily feasible to separately handle sorting # on attributes and on joined tables as we must keep all clauses in the same order if order_by_list: for order_by_clause in order_by_list: clause_id += 1 (key_type, key, ascending) = SearchUtils.parse_order_by_for_search_runs(order_by_clause) if SearchUtils.is_attribute(key_type, "="): order_value = getattr(SqlRun, SqlRun.get_attribute_name(key)) else: if SearchUtils.is_metric(key_type, "="): # any valid comparator entity = SqlLatestMetric elif SearchUtils.is_tag(key_type, "="): entity = SqlTag elif SearchUtils.is_param(key_type, "="): entity = SqlParam else: raise MlflowException( "Invalid identifier type '%s'" % key_type, error_code=INVALID_PARAMETER_VALUE, ) # build a subquery first because we will join it in the main request so that the # metric we want to sort on is available when we apply the sorting clause subquery = session.query(entity).filter(entity.key == key).subquery() ordering_joins.append(subquery) order_value = subquery.c.value # sqlite does not support NULLS LAST expression, so we sort first by # presence of the field (and is_nan for metrics), then by actual value # As the subqueries are created independently and used later in the # same main query, the CASE WHEN columns need to have unique names to # avoid ambiguity if SearchUtils.is_metric(key_type, "="): clauses.append( sql.case( [(subquery.c.is_nan.is_(True), 1), (order_value.is_(None), 1)], else_=0 ).label("clause_%s" % clause_id) ) else: # other entities do not have an 'is_nan' field clauses.append( sql.case([(order_value.is_(None), 1)], else_=0).label("clause_%s" % clause_id) ) if ascending: clauses.append(order_value) else: clauses.append(order_value.desc()) clauses.append(SqlRun.start_time.desc()) clauses.append(SqlRun.run_uuid) return clauses, ordering_joins
def _get_attributes_filtering_clauses(parsed): clauses = [] for sql_statement in parsed: key_type = sql_statement.get('type') key_name = sql_statement.get('key') value = sql_statement.get('value') comparator = sql_statement.get('comparator') if SearchUtils.is_attribute(key_type, comparator): # validity of the comparator is checked in SearchUtils.parse_search_filter() op = SearchUtils.filter_ops.get(comparator) if op: # key_name is guaranteed to be a valid searchable attribute of entities.RunInfo # by the call to parse_search_filter attribute_name = SqlRun.get_attribute_name(key_name) clauses.append(op(getattr(SqlRun, attribute_name), value)) return clauses
def _get_attributes_filtering_clauses(parsed): clauses = [] for sql_statement in parsed: key_type = sql_statement.get("type") key_name = sql_statement.get("key") value = sql_statement.get("value") comparator = sql_statement.get("comparator").upper() if SearchUtils.is_attribute(key_type, comparator): # key_name is guaranteed to be a valid searchable attribute of entities.RunInfo # by the call to parse_search_filter attribute = getattr(SqlRun, SqlRun.get_attribute_name(key_name)) if comparator in SearchUtils.CASE_INSENSITIVE_STRING_COMPARISON_OPERATORS: op = SearchUtils.get_sql_filter_ops(attribute, comparator) clauses.append(op(value)) elif comparator in SearchUtils.filter_ops: op = SearchUtils.filter_ops.get(comparator) clauses.append(op(attribute, value)) return clauses
def _get_orderby_clauses(order_by_list, session): """Sorts a set of runs based on their natural ordering and an overriding set of order_bys. Runs are naturally ordered first by start time descending, then by run id for tie-breaking. """ clauses = [] ordering_joins = [] clause_id = 0 observed_order_by_clauses = set() # contrary to filters, it is not easily feasible to separately handle sorting # on attributes and on joined tables as we must keep all clauses in the same order if order_by_list: for order_by_clause in order_by_list: clause_id += 1 (key_type, key, ascending ) = SearchUtils.parse_order_by_for_search_runs(order_by_clause) if SearchUtils.is_string_attribute( key_type, key, "=") or SearchUtils.is_numeric_attribute( key_type, key, "="): order_value = getattr(SqlRun, SqlRun.get_attribute_name(key)) else: if SearchUtils.is_metric(key_type, "="): # any valid comparator entity = SqlLatestMetric elif SearchUtils.is_tag(key_type, "="): entity = SqlTag elif SearchUtils.is_param(key_type, "="): entity = SqlParam else: raise MlflowException( "Invalid identifier type '%s'" % key_type, error_code=INVALID_PARAMETER_VALUE, ) # build a subquery first because we will join it in the main request so that the # metric we want to sort on is available when we apply the sorting clause subquery = session.query(entity).filter( entity.key == key).subquery() ordering_joins.append(subquery) order_value = subquery.c.value # sqlite does not support NULLS LAST expression, so we sort first by # presence of the field (and is_nan for metrics), then by actual value # As the subqueries are created independently and used later in the # same main query, the CASE WHEN columns need to have unique names to # avoid ambiguity if SearchUtils.is_metric(key_type, "="): clauses.append( sql.case( [ # Ideally the use of "IS" is preferred here but owing to sqlalchemy # translation in MSSQL we are forced to use "=" instead. # These 2 options are functionally identical / unchanged because # the column (is_nan) is not nullable. However it could become an issue # if this precondition changes in the future. (subquery.c.is_nan == sqlalchemy.true(), 1), (order_value.is_(None), 1), ], else_=0, ).label("clause_%s" % clause_id)) else: # other entities do not have an 'is_nan' field clauses.append( sql.case([(order_value.is_(None), 1)], else_=0).label("clause_%s" % clause_id)) if (key_type, key) in observed_order_by_clauses: raise MlflowException( "`order_by` contains duplicate fields: {}".format( order_by_list)) observed_order_by_clauses.add((key_type, key)) if ascending: clauses.append(order_value) else: clauses.append(order_value.desc()) if (SearchUtils._ATTRIBUTE_IDENTIFIER, SqlRun.start_time.key) not in observed_order_by_clauses: clauses.append(SqlRun.start_time.desc()) clauses.append(SqlRun.run_uuid) return clauses, ordering_joins