Esempio n. 1
0
def get_pipeline_filepath(fn, name, client, usets=True):
    import time
    client_fs = ClientFs(client)
    ts = str(int(time.time())) if usets else ''
    fname = "p_{0}_{1}{2}.py".format(fn, name, ts)
    filepath = client_fs.get(['ml_data', fname])
    return filepath
Esempio n. 2
0
 def __init__(self, client, name):
     from helpers import ClientFs
     self.client = client
     self.name = name
     self.clfs = ClientFs(client)
     project_path = self.clfs.get(['projects', name])
     self.path = project_path
     self.script = self.clfs.get([project_path, 'script.py'])
     self.features = []
     self._script_instance = None
     self.features_dir = self.clfs.get([self.path, 'features'])
     self.__bootstrap()
Esempio n. 3
0
def load_ml_script(client, exp_name, *args, **kwargs) -> MlScript:
    """
    Loads a ml script for a client for a specific experiment.

    :param client:
    :param exp_name:
    :param args:
    :param kwargs:
    :return: MlScript
    """
    client_fs = ClientFs(client)
    script_path = client_fs.get([DIR_EXP, exp_name, 'script.py'])
    instance = load_ml_script_abs(script_path, *args, **kwargs)
    return instance
Esempio n. 4
0
def save_ml_script(client, name, script):
    client_fs = ClientFs(client)
    client_fs.save([DIR_EXP, name, 'script.py'], script['code'])
Esempio n. 5
0
def store_model(model, client, target):
    client_fs = ClientFs(client)
    m_id = model['model_id']
    filepath = get_model_filepath(client, m_id, target)
    return client_fs.save_pickle(filepath, model, is_abs=True)
Esempio n. 6
0
def load_model(client, model_id, target):
    client_fs = ClientFs(client)
    fpath = get_model_filepath(client, model_id, target)
    mlmodel = client_fs.load_pickle(fpath)
    return mlmodel
Esempio n. 7
0
def get_model_filepath(client, model_id, target):
    client_fs = ClientFs(client)
    model_fname = get_model_filename(model_id, target)
    path = client_fs.get(['models', model_fname])
    return path
Esempio n. 8
0
 def __init__(self, client):
     from helpers import ClientFs
     self.client = client
     self.client_fs = ClientFs(client)
     self.current_target = None
     self.current_file = None
Esempio n. 9
0
class ExperimentLogger(object):
    def __init__(self, client):
        from helpers import ClientFs
        self.client = client
        self.client_fs = ClientFs(client)
        self.current_target = None
        self.current_file = None

    def get_log_file(self, target):
        experiment_log = "exp_{0}_{1}.log".format(
            target,
            datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
        log_file = self.client_fs.get(["logs", experiment_log])
        return log_file

    def set_current_target(self, target):
        self.current_target = target
        self.current_file = self.get_log_file(self.current_target)

    def log_report(self, data, target, filename=None):
        log_file = self.get_log_file(target) if filename is None else filename

        #print("Logging to file: " + log_file)
        if 'best_performance' not in data:
            data['best_performance'] = 'na'
        if 'best' not in data:
            data['best'] = 'na'
        if 'accuracy' not in data:
            data['accuracy'] = 'na'
        if 'results' not in data:
            data['results'] = []
        if 'statistics' not in data:
            data['statistics'] = ''
        if 'columns' not in data:
            data['columns'] = {'data': [], 'target': []}
        if 'feature_importance' not in data:
            data['feature_importance'] = ''
        if 'scaling' not in data:
            data['scaling'] = {'data': 'none', 'target': 'none'}
        with open(log_file, 'w') as f:
            report = str(data['model']) + '\n'
            report += "=========Best configuration==============\n"
            report += str(data['best'] if 'best' in data else 0) + '\n'
            report += "\n==========Best model performance on test cases===========\n"
            if isinstance(
                    data['best_performance']
                    if 'best_performance' in data else None, list):
                report += "\n".join(data['best_performance'])
            else:
                report += data['best_performance']
            report += "\nAccuracy: " + str(data['accuracy']) + "\n"
            report += "\n Feature Importance: +" + str(
                data['feature_importance'])
            report += "\n==========Other configurations============\n"
            report += pd.DataFrame(data['results']).to_string()
            report += "\n====================Stats===================\n"
            report += data['statistics']
            report += "\n====================Scaling===================\n"
            report += "Data: {0}    Target: {1}\n".format(
                data['scaling']['data'], data['scaling']['target'])
            report += "\n====================Columns===================\n"
            report += "Data: " + str(data['columns']['data']) + "\n"
            report += "Target: " + str(data['columns']['target']) + "\n"
            report += '\n\n\n'
            report = report.replace('\n', '\r\n')
            #print("Wrote log file..")
            f.write(report)
        return log_file
Esempio n. 10
0
 def set_user_client(self, client):
     from helpers import ClientFs
     self.client = client
     self.client_fs = ClientFs(client)
Esempio n. 11
0
class Project(object):
    def __init__(self, client, name):
        from helpers import ClientFs
        self.client = client
        self.name = name
        self.clfs = ClientFs(client)
        project_path = self.clfs.get(['projects', name])
        self.path = project_path
        self.script = self.clfs.get([project_path, 'script.py'])
        self.features = []
        self._script_instance = None
        self.features_dir = self.clfs.get([self.path, 'features'])
        self.__bootstrap()

    def __bootstrap(self):
        """
        Creates the initial assets required for each project.
        :return:
        """
        from shutil import copyfile
        # features_template_path = get_template_file(['features', '__init__.py'])
        # features_init_file = os.path.join(self.features_dir, '__init__.py')
        # copyfile(features_template_path, features_init_file)

    def write_script(self, code):
        with open(self.script, 'w') as f_script:
            f_script.write(code)

    def create_script(self, features, targets, data_flags, grouping, model_id,
                      generateFeatures):
        """
        Creates a script for the project and returns an instance of it.
        :param features:
        :param targets:
        :param data_flags:
        :param grouping:
        :param model_id:
        :param generateFeatures:
        :return:
        """
        script_src = generate_script({
            'features': features,
            'targets': targets,
            'data_flags': data_flags,
            'grouping': grouping,
            'client': self.client,
            'name': self.name,
            'model_id': model_id,
            'use_featuregen': generateFeatures
        })
        self.write_script(script_src)
        if isinstance(features, list):
            features = {'common': features}
        features_src = generate_features_module(features)
        features_package = generate_features_package_init(features)
        self.add_features_module(None, features_src)
        self._set_features_package_init(features_package)
        with add_path(self.path):
            instance = self.get_script_instance()
        return instance

    def get_script_instance(self):
        """
        Gets an instance of the script. It's created only once.
        :return:
        """
        if self._script_instance is not None:
            return self._script_instance
        script_instance = load_ml_script_abs(self.script)
        self._script_instance = script_instance
        return script_instance

    def get_model(self, target_col):
        script_instance = self.get_script_instance()
        model_id = script_instance.model_id
        model_filepath = get_model_filepath(self.client, model_id, target_col)
        model_source = get_local_model_source(model_filepath)
        return model_source, model_filepath

    def get_model_path(self, target):
        """

        :param target:
        :return:
        """
        script = self.get_script_instance()
        model_id = script.model_id
        model_fname = get_model_filename(model_id, target)
        model_filepath = self.clfs.get([self.path, model_fname])
        return model_filepath

    def move_temp_assets(self):
        """
        Moves any temporary assets to the project directory
        :return:
        """
        import ntpath
        from shutil import copyfile
        import os
        from utils import file_exists
        script = self.get_script_instance()
        targets = script.get_targets()
        model_id = script.model_id
        client = self.client
        project_path = self.path
        for t in targets:
            target_col = t['column']
            model_filepath = get_model_filepath(client, model_id, target_col)
            model_filename = ntpath.basename(model_filepath)
            model_source = get_local_model_source(model_filepath)
            dest_model_path = os.path.join(project_path, model_filename)
            if model_source.exists():
                # We copy the model to our project dir
                model_source.copy_to(dest_model_path)
            # Pipeline
            pipeline_file = get_pipeline_latest('tpot', target_col, client)
            if pipeline_file is not None:
                pipeline_filename = ntpath.basename(pipeline_file)
                dest_pipeline_file = os.path.join(project_path,
                                                  pipeline_filename)
                if file_exists(pipeline_file):
                    copyfile(pipeline_file, dest_pipeline_file)

    def get_zip(self):
        from utils import zipdir
        import zipfile
        path = self.path
        name = self.name
        # We zip up the project
        project_zip = self.clfs.get([path, '..', name + '.zip'])
        with zipfile.ZipFile(project_zip, 'w', zipfile.ZIP_BZIP2) as zipf:
            zipdir(path, zipf)
        # We need to package these files:
        # script.py
        # model.pickl
        # pipeline.py for refitting
        return project_zip

    def _set_features_package_init(self, source):
        features_file_path = os.path.join(self.features_dir, '__init__.py')
        with open(features_file_path, 'wb') as features_file:
            features_file.write(source.encode('utf-8'))
        return self

    def add_features_module(self, target, feature_mod_src):
        """

        :param target:
        :param feature_mod_src:
        :return:
        """
        fname = get_features_module_name(target)
        self.features.append({'target': target, 'src': feature_mod_src})
        features_file_path = os.path.join(self.features_dir, fname + '.py')
        with open(features_file_path, 'wb') as features_file:
            features_file.write(feature_mod_src.encode('utf-8'))
        return self