def get_version_condition(self, name, versions, version_column, time_column): """ returns the condition part of the versions for the sql statement Arguments: name {str} -- not used versions {str or list of str} -- a or the versions to condition on version_column {str} -- version column name time_column {str} -- time column name Returns: str -- the condition for the versions """ version_condition = '' if versions is not None: version_condition = ' and ' if isinstance(versions, str): version_condition += version_column + " = '" + versions + "'" else: if isinstance(versions, tuple): start_time = _time_from_version(versions[0]) end_time = _time_from_version(versions[1]) version_condition += "'" + str( start_time ) + "' <= " + time_column + " and " + time_column + "<= '" + str( end_time) + "'" else: if isinstance(versions, list): version_condition += version_column + ' in (' tmp = "','" tmp = tmp.join([v for v in versions]) version_condition += "'" + tmp + "')" return version_condition
def _is_in_versions(self, version, versions): """ Check whether the version exists between the dates of the two versions Args: version (str): the version to check whether it is here versions (list of str): a list of possible two versions, takes the date of the first version as a start date and the second as the end date Returns: bool -- returns true if the version is in between the two dates """ if version is None: return True if versions is None: return True v = versions if isinstance(v, list): return version in v if isinstance(v, tuple): if v[0] is None: start_time = datetime.datetime(1980, 1, 1, 0, 0) else: start_time = _time_from_version(v[0]) if v[1] is None: end_time = datetime.datetime(2300, 1, 1, 0, 0) else: end_time = _time_from_version(v[1]) time = _time_from_version(version) return (time >= start_time) and (time <= end_time) return version == v
def replace(self, obj): """ Overwrite existing object without incrementing version Arguments: obj {RepoObject} -- repo object to be overwritten """ logger.info('Replacing ' + obj["repo_info"][RepoInfoKey.NAME.value] + ', version ' + str(obj["repo_info"][RepoInfoKey.VERSION.value])) select_statement = "select path, file from versions where name = '" +\ obj["repo_info"][RepoInfoKey.NAME.value] + "' and version = '" +\ str(obj["repo_info"][RepoInfoKey.VERSION.value]) + "'" with closing(self._conn.cursor()) as cursor: for row in cursor.execute(select_statement): self._save_function( self._main_dir + '/' + str(row[0]) + '/' + str(row[1]), obj) # delete all modification infos cursor.execute("delete from modification_info where name='" + obj["repo_info"][RepoInfoKey.NAME.value] + "' and version = '" + str(obj["repo_info"][RepoInfoKey.VERSION.value]) + "'") if repo_objects.RepoInfoKey.MODIFICATION_INFO.value in obj[ 'repo_info']: for k, v in obj['repo_info'][repo_objects.RepoInfoKey. MODIFICATION_INFO.value].items(): tmp = _time_from_version(v) cursor.execute( "insert into modification_info (name, version, modifier, modifier_version, modifier_uuid_time) VALUES ('" + obj["repo_info"][RepoInfoKey.NAME.value] + "','" + str(obj["repo_info"][RepoInfoKey.VERSION.value]) + "','" + k + "','" + str(v) + "','" + str(tmp) + "')") self._conn.commit()
def get_measure_history(ml_repo, measure_names): """Returns for a (list of) measure(s) the historic evolution of the measure (using the order induced by the datetime encoded in the version number) Args: ml_repo (MLRepo): the ml repo measure_names (str, list(str)): string or list of strings of measure names (inlcuding full path) Returns: """ label_checker = _LabelChecker(ml_repo) if isinstance(measure_names, str): measure_names = [measure_names] result_all = {} for measure_name in measure_names: data = str( NamingConventions.Data( NamingConventions.EvalData( NamingConventions.Measure(measure_name)))) measures = ml_repo.get( measure_name, version=(RepoStore.FIRST_VERSION, RepoStore.LAST_VERSION )) # , modifier_versions={data: data_versions}) if not isinstance(measures, list): measures = [measures] model_name = NamingConventions.CalibratedModel( NamingConventions.Measure(measure_name)) model_name = str(model_name) train_data = ml_repo.get_names(MLObjectType.TRAINING_DATA)[0] # eval_name result = [] for x in measures: info = { 'model_version': x.repo_info[RepoInfoKey.MODIFICATION_INFO][model_name], 'data_version': x.repo_info[RepoInfoKey.MODIFICATION_INFO][data], 'train_data_version': x.repo_info[RepoInfoKey.MODIFICATION_INFO][train_data], 'value': x.value, 'datetime': _time_from_version( x.repo_info[RepoInfoKey.MODIFICATION_INFO][model_name]) } label = label_checker.get_label( model_name, x.repo_info[RepoInfoKey.MODIFICATION_INFO][model_name]) if label is not None: info['model_label'] = label result.append(info) result_all[measure_name] = result return result_all
def _add(self, obj): """Add an object to the storage. Arguments: obj {RepoObject} -- repository object Raises: Exception if an object with same name already exists. """ with self._conn: with closing(self._conn.cursor()) as cursor: try: uid_time = _time_from_version(obj['repo_info'][ repo_objects.RepoInfoKey.VERSION.value]) name = obj['repo_info'][ repo_objects.RepoInfoKey.NAME.value] if isinstance( obj['repo_info'][ repo_objects.RepoInfoKey.CATEGORY.value], repo.MLObjectType): category = obj['repo_info'][ repo_objects.RepoInfoKey.CATEGORY.value].name else: category = obj['repo_info'][ repo_objects.RepoInfoKey.CATEGORY.value] exists = False # region write mapping for row in cursor.execute( 'select * from mapping where name = ' + "'" + name + "'"): exists = True break if not exists: cursor.execute( "insert into mapping (name, category) VALUES ('" + name + "', '" + category + "')") # endregion # region write file info version = obj['repo_info'][ repo_objects.RepoInfoKey.VERSION.value] file_sub_dir = category + '/' + name + '/' os.makedirs(self._main_dir + '/' + file_sub_dir, exist_ok=True) filename = version cursor.execute( "insert into versions (name, version, path, file, uuid_time) VALUES('" + name + "', '" + version + "','" + file_sub_dir + "','" + filename + "','" + str(uid_time) + "')") # endregion # region write modification info if repo_objects.RepoInfoKey.MODIFICATION_INFO.value in obj[ 'repo_info']: for k, v in obj['repo_info'][ repo_objects.RepoInfoKey.MODIFICATION_INFO. value].items(): tmp = _time_from_version(v) cursor.execute( "insert into modification_info (name, version, modifier, modifier_version, modifier_uuid_time) VALUES ('" + name + "','" + version + "','" + k + "','" + str(v) + "','" + str(tmp) + "')") # endregion self._conn.commit() # region write file logger.debug('Write object as json file with filename ' + filename) self._save_function( self._main_dir + '/' + file_sub_dir + '/' + filename, obj) # endregion except Exception as e: logger.error('Error: ' + str(e) + ', rolling back changes.') self._conn.rollback()