def parse_filter_for_model_registry(cls, filter_string): if not filter_string or filter_string == '': return [] expected = "Expected search filter with single comparison operator.\ e.g. name='myModelName'" try: parsed = sqlparse.parse(filter_string) except Exception: raise MlflowException("Error while parsing filter '%s'. %s" % (filter_string, expected), error_code=INVALID_PARAMETER_VALUE) if len(parsed) == 0 or not isinstance(parsed[0], Statement): raise MlflowException( "Invalid filter '%s'. Could not be parsed. %s" % (filter_string, expected), error_code=INVALID_PARAMETER_VALUE) elif len(parsed) > 1: raise MlflowException( "Search filter '%s' contains multiple expressions. " '%s ' % (filter_string, expected), error_code=INVALID_PARAMETER_VALUE) statement = parsed[0] invalids = list(filter(cls._invalid_statement_token, statement.tokens)) if len(invalids) > 0: invalid_clauses = ', '.join("'%s'" % token for token in invalids) raise MlflowException('Invalid clause(s) in filter string: %s. ' '%s' % (invalid_clauses, expected), error_code=INVALID_PARAMETER_VALUE) return [ cls._get_comparison_for_model_registry(si) for si in statement.tokens if isinstance(si, Comparison) ]
def parse_runs_uri(run_uri): parsed = urllib.parse.urlparse(run_uri) if parsed.scheme != 'runs': raise MlflowException( 'Not a proper runs:/ URI: %s. ' % run_uri + 'Runs URIs must be of the form ' + "'runs:/<run_id>/run-relative/path/to/artifact'" ) # hostname = parsed.netloc # TODO: support later path = parsed.path if not path.startswith('/') or len(path) <= 1: raise MlflowException( 'Not a proper runs:/ URI: %s. ' % run_uri + 'Runs URIs must be of the form ' + "'runs:/<run_id>/run-relative/path/to/artifact'" ) path = path[1:] path_parts = path.split('/') run_id = path_parts[0] if run_id == '': raise MlflowException( 'Not a proper runs:/ URI: %s. ' % run_uri + 'Runs URIs must be of the form ' + "'runs:/<run_id>/run-relative/path/to/artifact'" ) artifact_path = '/'.join( path_parts[1:]) if len(path_parts) > 1 else None artifact_path = artifact_path if artifact_path != '' else None return run_id, artifact_path
def _get_value(cls, identifier_type, token): if identifier_type == cls._METRIC_IDENTIFIER: if token.ttype not in cls.NUMERIC_VALUE_TYPES: raise MlflowException( 'Expected numeric value type for metric. ' 'Found {}'.format(token.value), error_code=INVALID_PARAMETER_VALUE) return token.value elif identifier_type == cls._PARAM_IDENTIFIER or identifier_type == cls._TAG_IDENTIFIER: # noqa: E501 if token.ttype in cls.STRING_VALUE_TYPES or isinstance( token, Identifier): return cls._strip_quotes(token.value, expect_quoted_value=True) raise MlflowException( 'Expected a quoted string value for ' "{identifier_type} (e.g. 'my-value'). Got value " '{value}'.format(identifier_type=identifier_type, value=token.value), error_code=INVALID_PARAMETER_VALUE) elif identifier_type == cls._ATTRIBUTE_IDENTIFIER: if token.ttype in cls.STRING_VALUE_TYPES or isinstance( token, Identifier): return cls._strip_quotes(token.value, expect_quoted_value=True) else: raise MlflowException( 'Expected a quoted string value for attributes. ' 'Got value {value}'.format(value=token.value), error_code=INVALID_PARAMETER_VALUE) else: # Expected to be either "param" or "metric". raise MlflowException( 'Invalid identifier type. Expected one of ' '{}.'.format([cls._METRIC_IDENTIFIER, cls._PARAM_IDENTIFIER]))
def parse_order_by(cls, order_by): try: parsed = sqlparse.parse(order_by) except Exception: raise MlflowException("Error on parsing order_by clause '%s'" % order_by, error_code=INVALID_PARAMETER_VALUE) if len(parsed) != 1 or not isinstance(parsed[0], Statement): raise MlflowException( "Invalid order_by clause '%s'. Could not be parsed." % order_by, error_code=INVALID_PARAMETER_VALUE) statement = parsed[0] if len(statement.tokens) != 1 or not isinstance( statement[0], Identifier): raise MlflowException( "Invalid order_by clause '%s'. Could not be parsed." % order_by, error_code=INVALID_PARAMETER_VALUE) token_value = statement.tokens[0].value is_ascending = True if token_value.lower().endswith(' desc'): is_ascending = False token_value = token_value[0:-len(' desc')] elif token_value.lower().endswith(' asc'): token_value = token_value[0:-len(' asc')] identifier = cls._get_identifier(token_value.strip(), cls.VALID_ORDER_BY_ATTRIBUTE_KEYS) return (identifier['type'], identifier['key'], is_ascending)
def _validate_metric_name(name): """Check that `name` is a valid metric name and raise an exception if it isn't.""" if not _VALID_PARAM_AND_METRIC_NAMES.match(name): raise MlflowException( "Invalid metric name: '%s'. %s" % (name, _BAD_CHARACTERS_MESSAGE), INVALID_PARAMETER_VALUE) if path_not_unique(name): raise MlflowException( "Invalid metric name: '%s'. %s" % (name, bad_path_message(name)), INVALID_PARAMETER_VALUE)
def _validate_experiment_name(experiment_name): """Check that `experiment_name` is a valid string and raise an exception if it isn't.""" if experiment_name == '' or experiment_name is None: raise MlflowException("Invalid experiment name: '%s'" % experiment_name, error_code=INVALID_PARAMETER_VALUE) if not is_string_type(experiment_name): raise MlflowException( 'Invalid experiment name: %s. Expects a string.' % experiment_name, error_code=INVALID_PARAMETER_VALUE)
def matches_view_type(cls, view_type, lifecycle_stage): if not cls.is_valid(lifecycle_stage): raise MlflowException("Invalid lifecycle stage '%s'" % str(lifecycle_stage)) if view_type == ViewType.ALL: return True elif view_type == ViewType.ACTIVE_ONLY: return lifecycle_stage == LifecycleStage.ACTIVE elif view_type == ViewType.DELETED_ONLY: return lifecycle_stage == LifecycleStage.DELETED else: raise MlflowException("Invalid view type '%s'" % str(view_type))
def _validate_run_id_exp_id(exp_id, run_id): """Check that `run_id` is a valid run ID and raise an exception if it isn't.""" if _RUN_ID_REGEX.match(run_id) is None: raise MlflowException("Invalid run ID: '%s'" % run_id, error_code=INVALID_PARAMETER_VALUE) if _EXPERIMENT_ID_REGEX.match(exp_id) is None: raise MlflowException("Invalid exp ID: '%s'" % run_id, error_code=INVALID_PARAMETER_VALUE) if not os.path.isdir(os.path.join(_ROOT_DIR, exp_id, run_id)): raise MlflowException('RunId: {} doesnot exist under expID: {}'.format( run_id, exp_id), error_code=RESOURCE_DOES_NOT_EXIST)
def _get_identifier(cls, identifier, valid_attributes): try: entity_type, key = identifier.split('.', 1) except ValueError: raise MlflowException( "Invalid identifier '%s'. Columns should be specified as " "'attribute.<key>', 'metric.<key>', 'tag.<key>', or " "'param.'." % identifier, error_code=INVALID_PARAMETER_VALUE) identifier = cls._valid_entity_type(entity_type) key = cls._trim_backticks(cls._strip_quotes(key)) if identifier == cls._ATTRIBUTE_IDENTIFIER and key not in valid_attributes: # noqa: E501 raise MlflowException( "Invalid attribute key '{}' specified. Valid keys " " are '{}'".format(key, valid_attributes)) return {'type': identifier, 'key': key}
def _validate_batch_log_api_req(json_req): if len(json_req) > MAX_BATCH_LOG_REQUEST_SIZE: error_msg = ( 'Batched logging API requests must be at most {limit} bytes, got ' 'a request of size {size}.').format( limit=MAX_BATCH_LOG_REQUEST_SIZE, size=len(json_req)) raise MlflowException(error_msg, error_code=INVALID_PARAMETER_VALUE)
def set_project(experiment_id): """Set given experiment as active experiment. If experiment does not exist, create an experiment with provided name. Args: experiment_id (str): id of experiment to be activated. Raises: MlflowException: Description """ os.environ[_EXPERIMENT_ID_ENV_VAR] = experiment_id client = MlflowClient() experiment = client.get_experiment(experiment_id) if experiment_id is None: # id can be 0 print(f"INFO: '{experiment_id}' does not exist.") # experiment_id = client.create_experiment(experiment_name) elif experiment.lifecycle_stage == LifecycleStage.DELETED: raise MlflowException( "Cannot set a deleted experiment '%s' as the active experiment." ' You can restore the experiment, or permanently delete the ' ' experiment to create a new one.' % experiment.name) global _active_experiment_id _active_experiment_id = experiment_id
def log_table(key, table, step=None): """logs a pandas dataframe/csv file as a table artifact. Args: key (str): name of the table table (pd.Dataframe, str): A pandas dataframe or path to a csv file step (None, optional): integer indicating the step at which artifact was generated Raises: MlflowException: Description """ if not isinstance(table, (str, pd.DataFrame)): raise MlflowException( 'table must be a pandas.DataFrame or a string to a csv file') path = table if isinstance(table, pd.DataFrame): path = os.path.join(gettempdir(), 'temp_table.csv') table.to_csv(path, index=False) run = _get_or_start_run() run_id = run.info.run_id experiment_id = run.info.experiment_id MlflowClient().log_artifact_lite(run_id, experiment_id, key, path, artifact_type='table', step=step)
def _resolve_experiment_id(experiment_name=None, experiment_id=None): """Resolve experiment. Verifies either one or other is specified - cannot be both selected. If ``experiment_name`` is provided and does not exist, an experiment of that name is created and its id is returned. :param experiment_name: Name of experiment under which to launch the run. :param experiment_id: ID of experiment under which to launch the run. :return: str """ if experiment_name and experiment_id: raise MlflowException( "Specify only one of 'experiment_name' or 'experiment_id'.") if experiment_id: return str(experiment_id) if experiment_name: client = MlflowClient() exp = client.get_experiment_by_name(experiment_name) if exp: return exp.experiment_id else: print( "INFO: '{}' does not exist. Creating a new experiment".format( experiment_name)) return client.create_experiment(experiment_name) return _get_experiment_id()
def _does_run_match_clause(cls, run, sed): key_type = sed.get('type') key = sed.get('key') value = sed.get('value') comparator = sed.get('comparator') if cls.is_metric(key_type, comparator): lhs = run.data.metrics.get(key, None) value = float(value) elif cls.is_param(key_type, comparator): lhs = run.data.params.get(key, None) elif cls.is_tag(key_type, comparator): lhs = run.data.tags.get(key, None) elif cls.is_attribute(key_type, comparator): lhs = getattr(run.info, key) else: raise MlflowException("Invalid search expression type '%s'" % key_type, error_code=INVALID_PARAMETER_VALUE) if lhs is None: return False if comparator in cls.filter_ops.keys(): return cls.filter_ops.get(comparator)(lhs, value) else: return False
def refresh_token(): headers = {'authorization': f'Bearer {os.environ.get(_REFRESH_TOKEN)}'} query = requests.post(f'{SEGMIND_API_URL}/auth/refresh-token', headers=headers) if query.status_code != 200: raise MlflowException(query.json()['message']) else: os.environ[_ACCESS_TOKEN] = query.json()['access_token']
def is_tag(cls, key_type, comparator): if key_type == cls._TAG_IDENTIFIER: if comparator not in cls.VALID_TAG_COMPARATORS: raise MlflowException("Invalid comparator '%s' " "not one of '%s" % (comparator, cls.VALID_TAG_COMPARATORS)) return True return False
def _validate_batch_limit(entity_name, limit, length): if length > limit: error_msg = ( 'A batch logging request can contain at most {limit} {name}. ' 'Got {count} {name}. Please split up {name} across multiple ' 'requests and try again.').format(name=entity_name, count=length, limit=limit) raise MlflowException(error_msg, error_code=INVALID_PARAMETER_VALUE)
def is_attribute(cls, key_type, comparator): if key_type == cls._ATTRIBUTE_IDENTIFIER: if comparator not in cls.VALID_STRING_ATTRIBUTE_COMPARATORS: raise MlflowException( "Invalid comparator '{}' not one of " "'{}".format(comparator, cls.VALID_STRING_ATTRIBUTE_COMPARATORS)) return True return False
def parse_search_filter(cls, filter_string): if not filter_string: return [] try: parsed = sqlparse.parse(filter_string) except Exception: raise MlflowException("Error on parsing filter '%s'" % filter_string, error_code=INVALID_PARAMETER_VALUE) if len(parsed) == 0 or not isinstance(parsed[0], Statement): raise MlflowException("Invalid filter '%s'. Could not be parsed." % filter_string, error_code=INVALID_PARAMETER_VALUE) elif len(parsed) > 1: raise MlflowException( "Search filter contained multiple expression '%s'. " 'Provide AND-ed expression list.' % filter_string, error_code=INVALID_PARAMETER_VALUE) return SearchUtils._process_statement(parsed[0])
def is_param(cls, key_type, comparator): if key_type == cls._PARAM_IDENTIFIER: if comparator not in cls.VALID_PARAM_COMPARATORS: raise MlflowException( "Invalid comparator '%s' " "not one of '%s'" % (comparator, cls.VALID_PARAM_COMPARATORS), error_code=INVALID_PARAMETER_VALUE) return True return False
def _process_statement(cls, statement): # check validity invalids = list(filter(cls._invalid_statement_token, statement.tokens)) if len(invalids) > 0: invalid_clauses = ', '.join("'%s'" % token for token in invalids) raise MlflowException('Invalid clause(s) in filter string: %s' % invalid_clauses, error_code=INVALID_PARAMETER_VALUE) return [ cls._get_comparison(si) for si in statement.tokens if isinstance(si, Comparison) ]
def _validate_metric(key, value, timestamp, step): """Check that a param with the specified key, value, timestamp is valid and raise an exception if it isn't.""" _validate_metric_name(key) if not isinstance(value, numbers.Number): raise MlflowException( "Got invalid value %s for metric '%s' (timestamp=%s). Please " 'specify value as a valid double (64-bit floating point)' % (value, key, timestamp), INVALID_PARAMETER_VALUE) if not isinstance(timestamp, numbers.Number) or timestamp < 0: raise MlflowException( "Got invalid timestamp %s for metric '%s' (value=%s). Timestamp " 'must be a nonnegative long (64-bit integer) ' % (timestamp, key, value), INVALID_PARAMETER_VALUE) if not isinstance(step, numbers.Number): raise MlflowException( "Got invalid step %s for metric '%s' (value=%s). Step must be a " 'valid long (64-bit integer).' % (step, key, value), INVALID_PARAMETER_VALUE)
def verify_rest_response(response, endpoint): """Verify the return code and raise exception if the request was not successful.""" if response.status_code != 200: if _can_parse_as_json(response.text): raise RestException(json.loads(response.text)) else: base_msg = 'API request to endpoint %s failed with error code ' \ '%s != 200' % (endpoint, response.status_code) raise MlflowException("%s. Response body: '%s'" % (base_msg, response.text)) return response
def parse_start_offset_from_page_token(cls, page_token): # Note: the page_token is expected to be a base64-encoded JSON that # looks like { "offset": xxx }. However, this format is not stable, so # it should not be relied upon outside of this method. if not page_token: return 0 try: decoded_token = base64.b64decode(page_token) except TypeError: raise MlflowException( 'Invalid page token, could not base64-decode', error_code=INVALID_PARAMETER_VALUE) except base64.binascii.Error: raise MlflowException( 'Invalid page token, could not base64-decode', error_code=INVALID_PARAMETER_VALUE) try: parsed_token = json.loads(decoded_token) except ValueError: raise MlflowException('Invalid page token, decoded value=%s' % decoded_token, error_code=INVALID_PARAMETER_VALUE) offset_str = parsed_token.get('offset') if not offset_str: raise MlflowException('Invalid page token, parsed value=%s' % parsed_token, error_code=INVALID_PARAMETER_VALUE) try: offset = int(offset_str) except ValueError: raise MlflowException('Invalid page token, not stringable %s' % offset_str, error_code=INVALID_PARAMETER_VALUE) return offset
def __init__(self, host, username=None, password=None, token=None, ignore_tls_verification=False): if not host: raise MlflowException( 'host is a required parameter for MlflowHostCreds') self.host = host self.username = username self.password = password self.token = token self.ignore_tls_verification = ignore_tls_verification
def _validate_comparison(cls, tokens): base_error_string = 'Invalid comparison clause' if len(tokens) != 3: raise MlflowException('{}. Expected 3 tokens found {}'.format( base_error_string, len(tokens)), error_code=INVALID_PARAMETER_VALUE) if not isinstance(tokens[0], Identifier): raise MlflowException( "{}. Expected 'Identifier' found '{}'".format( base_error_string, str(tokens[0])), error_code=INVALID_PARAMETER_VALUE) if not isinstance( tokens[1], Token) and tokens[1].ttype != TokenType.Operator.Comparison: raise MlflowException("{}. Expected comparison found '{}'".format( base_error_string, str(tokens[1])), error_code=INVALID_PARAMETER_VALUE) if not isinstance(tokens[2], Token) and \ (tokens[2].ttype not in cls.STRING_VALUE_TYPES.union(cls.NUMERIC_VALUE_TYPES) or # noqa: E501 isinstance(tokens[2], Identifier)): raise MlflowException("{}. Expected value token found '{}'".format( base_error_string, str(tokens[2])), error_code=INVALID_PARAMETER_VALUE)
def _get_comparison_for_model_registry(cls, comparison): stripped_comparison = [ token for token in comparison.tokens if not token.is_whitespace ] cls._validate_comparison(stripped_comparison) key = stripped_comparison[0].value if key not in cls.VALID_SEARCH_KEYS_FOR_MODEL_REGISTRY: raise MlflowException( "Invalid attribute key '{}' specified. Valid keys " " are '{}'".format(key, cls.VALID_SEARCH_KEYS_FOR_MODEL_REGISTRY)) value_token = stripped_comparison[2] if value_token.ttype not in cls.STRING_VALUE_TYPES: raise MlflowException( 'Expected a quoted string value for attributes. ' 'Got value {value}'.format(value=value_token.value), error_code=INVALID_PARAMETER_VALUE) comp = { 'key': key, 'comparator': stripped_comparison[1].value, 'value': cls._strip_quotes(value_token.value, expect_quoted_value=True) } return comp
def _strip_quotes(cls, value, expect_quoted_value=False): """Remove quotes for input string. Values of type strings are expected to have quotes. Keys containing special characters are also expected to be enclose in quotes. """ if cls._is_quoted(value, "'") or cls._is_quoted(value, '"'): return cls._trim_ends(value) elif expect_quoted_value: raise MlflowException( 'Parameter value is either not quoted or unidentified quote ' 'types used for string value %s. Use either single or double ' 'quotes.' % value, error_code=INVALID_PARAMETER_VALUE) else: return value
def get_artifact_repository(self, artifact_uri): """Get an artifact repository from the registry based on the scheme of artifact_uri. Args: store_uri: The store URI. This URI is used to select which artifact repository implementation to instantiate and is passed to the constructor of the implementation. :return: An instance of `segmind_track.store.ArtifactRepository` that fulfills the artifact URI requirements. """ scheme = get_uri_scheme(artifact_uri) repository = self._registry.get(scheme) if repository is None: raise MlflowException( 'Could not find a registered artifact repository for: {}. ' 'Currently registered schemes are: {}'.format( artifact_uri, list(self._registry.keys()))) return repository(artifact_uri)
def _valid_entity_type(cls, entity_type): entity_type = cls._trim_backticks(entity_type) if entity_type not in cls._VALID_IDENTIFIERS: raise MlflowException("Invalid entity type '%s'. " 'Valid values are %s' % (entity_type, cls._IDENTIFIERS), error_code=INVALID_PARAMETER_VALUE) if entity_type in cls._ALTERNATE_PARAM_IDENTIFIERS: return cls._PARAM_IDENTIFIER elif entity_type in cls._ALTERNATE_METRIC_IDENTIFIERS: return cls._METRIC_IDENTIFIER elif entity_type in cls._ALTERNATE_TAG_IDENTIFIERS: return cls._TAG_IDENTIFIER elif entity_type in cls._ALTERNATE_ATTRIBUTE_IDENTIFIERS: return cls._ATTRIBUTE_IDENTIFIER else: # one of ("metric", "parameter", "tag", or "attribute") since it a # valid type return entity_type