def get_pipeline_filepath(fn, name, client, usets=True): import time client_fs = ClientFs(client) ts = str(int(time.time())) if usets else '' fname = "p_{0}_{1}{2}.py".format(fn, name, ts) filepath = client_fs.get(['ml_data', fname]) return filepath
def __init__(self, client, name): from helpers import ClientFs self.client = client self.name = name self.clfs = ClientFs(client) project_path = self.clfs.get(['projects', name]) self.path = project_path self.script = self.clfs.get([project_path, 'script.py']) self.features = [] self._script_instance = None self.features_dir = self.clfs.get([self.path, 'features']) self.__bootstrap()
def load_ml_script(client, exp_name, *args, **kwargs) -> MlScript: """ Loads a ml script for a client for a specific experiment. :param client: :param exp_name: :param args: :param kwargs: :return: MlScript """ client_fs = ClientFs(client) script_path = client_fs.get([DIR_EXP, exp_name, 'script.py']) instance = load_ml_script_abs(script_path, *args, **kwargs) return instance
def save_ml_script(client, name, script): client_fs = ClientFs(client) client_fs.save([DIR_EXP, name, 'script.py'], script['code'])
def store_model(model, client, target): client_fs = ClientFs(client) m_id = model['model_id'] filepath = get_model_filepath(client, m_id, target) return client_fs.save_pickle(filepath, model, is_abs=True)
def load_model(client, model_id, target): client_fs = ClientFs(client) fpath = get_model_filepath(client, model_id, target) mlmodel = client_fs.load_pickle(fpath) return mlmodel
def get_model_filepath(client, model_id, target): client_fs = ClientFs(client) model_fname = get_model_filename(model_id, target) path = client_fs.get(['models', model_fname]) return path
def __init__(self, client): from helpers import ClientFs self.client = client self.client_fs = ClientFs(client) self.current_target = None self.current_file = None
class ExperimentLogger(object): def __init__(self, client): from helpers import ClientFs self.client = client self.client_fs = ClientFs(client) self.current_target = None self.current_file = None def get_log_file(self, target): experiment_log = "exp_{0}_{1}.log".format( target, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) log_file = self.client_fs.get(["logs", experiment_log]) return log_file def set_current_target(self, target): self.current_target = target self.current_file = self.get_log_file(self.current_target) def log_report(self, data, target, filename=None): log_file = self.get_log_file(target) if filename is None else filename #print("Logging to file: " + log_file) if 'best_performance' not in data: data['best_performance'] = 'na' if 'best' not in data: data['best'] = 'na' if 'accuracy' not in data: data['accuracy'] = 'na' if 'results' not in data: data['results'] = [] if 'statistics' not in data: data['statistics'] = '' if 'columns' not in data: data['columns'] = {'data': [], 'target': []} if 'feature_importance' not in data: data['feature_importance'] = '' if 'scaling' not in data: data['scaling'] = {'data': 'none', 'target': 'none'} with open(log_file, 'w') as f: report = str(data['model']) + '\n' report += "=========Best configuration==============\n" report += str(data['best'] if 'best' in data else 0) + '\n' report += "\n==========Best model performance on test cases===========\n" if isinstance( data['best_performance'] if 'best_performance' in data else None, list): report += "\n".join(data['best_performance']) else: report += data['best_performance'] report += "\nAccuracy: " + str(data['accuracy']) + "\n" report += "\n Feature Importance: +" + str( data['feature_importance']) report += "\n==========Other configurations============\n" report += pd.DataFrame(data['results']).to_string() report += "\n====================Stats===================\n" report += data['statistics'] report += "\n====================Scaling===================\n" report += "Data: {0} Target: {1}\n".format( data['scaling']['data'], data['scaling']['target']) report += "\n====================Columns===================\n" report += "Data: " + str(data['columns']['data']) + "\n" report += "Target: " + str(data['columns']['target']) + "\n" report += '\n\n\n' report = report.replace('\n', '\r\n') #print("Wrote log file..") f.write(report) return log_file
def set_user_client(self, client): from helpers import ClientFs self.client = client self.client_fs = ClientFs(client)
class Project(object): def __init__(self, client, name): from helpers import ClientFs self.client = client self.name = name self.clfs = ClientFs(client) project_path = self.clfs.get(['projects', name]) self.path = project_path self.script = self.clfs.get([project_path, 'script.py']) self.features = [] self._script_instance = None self.features_dir = self.clfs.get([self.path, 'features']) self.__bootstrap() def __bootstrap(self): """ Creates the initial assets required for each project. :return: """ from shutil import copyfile # features_template_path = get_template_file(['features', '__init__.py']) # features_init_file = os.path.join(self.features_dir, '__init__.py') # copyfile(features_template_path, features_init_file) def write_script(self, code): with open(self.script, 'w') as f_script: f_script.write(code) def create_script(self, features, targets, data_flags, grouping, model_id, generateFeatures): """ Creates a script for the project and returns an instance of it. :param features: :param targets: :param data_flags: :param grouping: :param model_id: :param generateFeatures: :return: """ script_src = generate_script({ 'features': features, 'targets': targets, 'data_flags': data_flags, 'grouping': grouping, 'client': self.client, 'name': self.name, 'model_id': model_id, 'use_featuregen': generateFeatures }) self.write_script(script_src) if isinstance(features, list): features = {'common': features} features_src = generate_features_module(features) features_package = generate_features_package_init(features) self.add_features_module(None, features_src) self._set_features_package_init(features_package) with add_path(self.path): instance = self.get_script_instance() return instance def get_script_instance(self): """ Gets an instance of the script. It's created only once. :return: """ if self._script_instance is not None: return self._script_instance script_instance = load_ml_script_abs(self.script) self._script_instance = script_instance return script_instance def get_model(self, target_col): script_instance = self.get_script_instance() model_id = script_instance.model_id model_filepath = get_model_filepath(self.client, model_id, target_col) model_source = get_local_model_source(model_filepath) return model_source, model_filepath def get_model_path(self, target): """ :param target: :return: """ script = self.get_script_instance() model_id = script.model_id model_fname = get_model_filename(model_id, target) model_filepath = self.clfs.get([self.path, model_fname]) return model_filepath def move_temp_assets(self): """ Moves any temporary assets to the project directory :return: """ import ntpath from shutil import copyfile import os from utils import file_exists script = self.get_script_instance() targets = script.get_targets() model_id = script.model_id client = self.client project_path = self.path for t in targets: target_col = t['column'] model_filepath = get_model_filepath(client, model_id, target_col) model_filename = ntpath.basename(model_filepath) model_source = get_local_model_source(model_filepath) dest_model_path = os.path.join(project_path, model_filename) if model_source.exists(): # We copy the model to our project dir model_source.copy_to(dest_model_path) # Pipeline pipeline_file = get_pipeline_latest('tpot', target_col, client) if pipeline_file is not None: pipeline_filename = ntpath.basename(pipeline_file) dest_pipeline_file = os.path.join(project_path, pipeline_filename) if file_exists(pipeline_file): copyfile(pipeline_file, dest_pipeline_file) def get_zip(self): from utils import zipdir import zipfile path = self.path name = self.name # We zip up the project project_zip = self.clfs.get([path, '..', name + '.zip']) with zipfile.ZipFile(project_zip, 'w', zipfile.ZIP_BZIP2) as zipf: zipdir(path, zipf) # We need to package these files: # script.py # model.pickl # pipeline.py for refitting return project_zip def _set_features_package_init(self, source): features_file_path = os.path.join(self.features_dir, '__init__.py') with open(features_file_path, 'wb') as features_file: features_file.write(source.encode('utf-8')) return self def add_features_module(self, target, feature_mod_src): """ :param target: :param feature_mod_src: :return: """ fname = get_features_module_name(target) self.features.append({'target': target, 'src': feature_mod_src}) features_file_path = os.path.join(self.features_dir, fname + '.py') with open(features_file_path, 'wb') as features_file: features_file.write(feature_mod_src.encode('utf-8')) return self