예제 #1
0
def get_automl(project_name):
    """
    Retrieve information about an AutoML instance.

    :param str project_name:  A string indicating the project_name of the automl instance to retrieve.
    :returns: A dictionary containing the project_name, leader model, and leaderboard.
    """
    automl_json = h2o.api("GET /99/AutoML/%s" % project_name)
    project_name = automl_json["project_name"]
    leaderboard_list = [key["name"] for key in automl_json['leaderboard']['models']]

    if leaderboard_list is not None and len(leaderboard_list) > 0:
        leader_id = leaderboard_list[0]
    else:
        leader_id = None

    leader = h2o.get_model(leader_id)
    # Intentionally mask the progress bar here since showing multiple progress bars is confusing to users.
    # If any failure happens, revert back to user's original setting for progress and display the error message.
    is_progress = H2OJob.__PROGRESS_BAR__
    h2o.no_progress()
    try:
        # Parse leaderboard H2OTwoDimTable & return as an H2OFrame
        leaderboard = h2o.H2OFrame(
            automl_json["leaderboard_table"].cell_values,
            column_names=automl_json["leaderboard_table"].col_header)
    except Exception as ex:
        raise ex
    finally:
        if is_progress is True:
            h2o.show_progress()

    leaderboard = leaderboard[1:]
    automl_dict = {'project_name': project_name, "leader": leader, "leaderboard": leaderboard}
    return automl_dict
예제 #2
0
파일: autoh2o.py 프로젝트: StevenLOL/h2o-3
def get_automl(project_name):
    """
    Retrieve information about an AutoML instance.

    :param str project_name:  A string indicating the project_name of the automl instance to retrieve.
    :returns: A dictionary containing the project_name, leader model, and leaderboard.
    """
    automl_json = h2o.api("GET /99/AutoML/%s" % project_name)
    project_name = automl_json["project_name"]
    leaderboard_list = [key["name"] for key in automl_json['leaderboard']['models']]

    if leaderboard_list is not None and len(leaderboard_list) > 0:
        leader_id = leaderboard_list[0]
    else:
        leader_id = None

    leader = h2o.get_model(leader_id)
    # Intentionally mask the progress bar here since showing multiple progress bars is confusing to users.
    # If any failure happens, revert back to user's original setting for progress and display the error message.
    is_progress = H2OJob.__PROGRESS_BAR__
    h2o.no_progress()
    try:
        # Parse leaderboard H2OTwoDimTable & return as an H2OFrame
        leaderboard = h2o.H2OFrame(
            automl_json["leaderboard_table"].cell_values,
            column_names=automl_json["leaderboard_table"].col_header)
    except Exception as ex:
        raise ex
    finally:
        if is_progress is True:
            h2o.show_progress()

    leaderboard = leaderboard[1:]
    automl_dict = {'project_name': project_name, "leader": leader, "leaderboard": leaderboard}
    return automl_dict
예제 #3
0
    def _fetch(self):
        res = h2o.api("GET /99/AutoML/" + self.project_name)
        leaderboard_list = [key["name"] for key in res['leaderboard']['models']]

        if leaderboard_list is not None and len(leaderboard_list) > 0:
            self._leader_id = leaderboard_list[0]
        else:
            self._leader_id = None

        # Intentionally mask the progress bar here since showing multiple progress bars is confusing to users.
        # If any failure happens, revert back to user's original setting for progress and display the error message.
        is_progress = H2OJob.__PROGRESS_BAR__
        h2o.no_progress()
        try:
            # Parse leaderboard H2OTwoDimTable & return as an H2OFrame
            leaderboard = h2o.H2OFrame(
                res["leaderboard_table"].cell_values,
                column_names=res["leaderboard_table"].col_header)
        except Exception as ex:
            raise ex
        finally:
            if is_progress is True:
                h2o.show_progress()

        self._leaderboard = leaderboard[1:]
        return self._leader_id is not None
예제 #4
0
파일: autoh2o.py 프로젝트: spennihana/h2o-3
    def _fetch(self):
        res = h2o.api("GET /99/AutoML/" + self._automl_key)
        leaderboard_list = [key["name"] for key in res['leaderboard']['models']]

        if leaderboard_list is not None and len(leaderboard_list) > 0:
            self._leader_id = leaderboard_list[0]
        else:
            self._leader_id = None

        # Intentionally mask the progress bar here since showing multiple progress bars is confusing to users.
        # If any failure happens, revert back to user's original setting for progress and display the error message.
        is_progress = H2OJob.__PROGRESS_BAR__
        h2o.no_progress()
        try:
            # Parse leaderboard H2OTwoDimTable & return as an H2OFrame
            leaderboard = h2o.H2OFrame(
                res["leaderboard_table"].cell_values,
                column_names=res["leaderboard_table"].col_header)
        except Exception as ex:
            raise ex
        finally:
            if is_progress is True:
                h2o.show_progress()

        self._leaderboard = leaderboard[1:]
        return self._leader_id is not None
예제 #5
0
파일: h2o.py 프로젝트: vivlet/mlflow
def _load_model(path, init=False):
    path = os.path.abspath(path)
    with open(os.path.join(path, "h2o.yaml")) as f:
        params = yaml.safe_load(f.read())
    if init:
        h2o.init(**(params["init"] if "init" in params else {}))
        h2o.no_progress()
    return h2o.load_model(os.path.join(path, params['model_file']))
예제 #6
0
 def __init__(self, song_df):
     h2o.init()
     h2o.no_progress()
     self.song_df = song_df.sort_values('song_id')
     self.song_df_h2o = h2o.H2OFrame(
         self.song_df.drop(['Song', 'Artist', 'Album'], axis=1))
     self.X = [
         'Genre', 'mode', 'tempo', 'acousticness', 'danceability', 'energy',
         'instrumentalness', 'liveness', 'loudness', 'speechiness',
         'valence'
     ]
     self.classifier = H2ORandomForestEstimator(ntrees=200, min_rows=3)
예제 #7
0
 def __init__(self,
              ip: str = '',
              port: str = '',
              settings_file_name: str = 'settings.ini'):
     """Init."""
     self.config = configparser.ConfigParser()
     self.config.read(settings_file_name, encoding='utf-8')
     for key, value in self.config['MAIN'].items():
         setattr(self, key, value)
     h2o.connect(ip=ip,
                 port=port,
                 auth=(self.login, self.password),
                 verbose=False)
     h2o.no_progress()
예제 #8
0
def h2ono_progress():
    """
    Python API test: h2o.no_progress()

    Command is verified by eyeballing the pyunit test output file and make sure the no progress bars are there.
    Here, we will assume the command runs well if there is no error message.
    """
    try:  # only only work with Python 3.
        s = StringIO()
        sys.stdout = s  # redirect output
        h2o.no_progress()  # true by default.
        run_test()
        # make sure the word progress is found and % is found.  That is how progress is displayed.
        assert not s.getvalue(
        ), "Nothing should have been printed, instead got " + s.getvalue()
    finally:
        sys.stdout = sys.__stdout__  # restore old stdout
예제 #9
0
def h2ono_progress():
    """
    Python API test: h2o.no_progress()

    Command is verified by eyeballing the pyunit test output file and make sure the no progress bars are there.
    Here, we will assume the command runs well if there is no error message.
    """
    try:    # only only work with Python 3.
        s = StringIO()
        sys.stdout = s   # redirect output
        h2o.no_progress()   # true by default.
        run_test()
        sys.stdout=sys.__stdout__       # restore old stdout
        # make sure the word progress is found and % is found.  That is how progress is displayed.
        assert s.getvalue()=="", "Nothing should have been printed, instead got " + s.getvalue()
    except Exception as e:  # may get error for python 2
        sys.stdout=sys.__stdout__       # restore old stdout
        assert s.buf=="", "Nothing should have been printed, instead got " + s.buf
예제 #10
0
def load(
    tag: t.Union[str, Tag],
    init_params: t.Optional[t.Dict[str, t.Any]] = None,
    model_store: "ModelStore" = Provide[BentoMLContainer.model_store],
) -> h2o.model.model_base.ModelBase:
    """
    Load a model from BentoML local modelstore with given tag.

    Args:
        tag (:code:`Union[str, Tag]`):
            Tag of a saved model in BentoML local modelstore.
        init_params (:code:`Dict[str, Union[str, Any]]`, `optional`, defaults to `None`):
            Params for h2o server initialization
        model_store (:mod:`~bentoml._internal.models.store.ModelStore`, default to :mod:`BentoMLContainer.model_store`):
            BentoML modelstore, provided by DI Container.

    Returns:
        :obj:`h2o.model.model_base.ModelBase`: an instance of `h2o.model.model_base.ModelBase` from BentoML modelstore.

    Examples:

    .. code-block:: python

        import bentoml

        model = bentoml.h2o.load(tag, init_params=dict(port=54323))
    """  # noqa

    if not init_params:
        init_params = dict()

    h2o.init(**init_params)

    model = model_store.get(tag)
    if model.info.module not in (MODULE_NAME, __name__):
        raise BentoMLException(
            f"Model {tag} was saved with module {model.info.module}, failed loading with {MODULE_NAME}."
        )

    path = model.path_of(SAVE_NAMESPACE)
    h2o.no_progress()
    return h2o.load_model(path)
예제 #11
0
파일: h2o.py 프로젝트: wwjiang007/mlflow
def _load_model(path, init=False):
    import h2o

    path = os.path.abspath(path)
    with open(os.path.join(path, "h2o.yaml")) as f:
        params = yaml.safe_load(f.read())
    if init:
        h2o.init(**(params["init"] if "init" in params else {}))
        h2o.no_progress()

    model_path = os.path.join(path, params["model_file"])
    if hasattr(h2o, "upload_model"):
        model = h2o.upload_model(model_path)
    else:
        warnings.warn(
            "If your cluster is remote, H2O may not load the model correctly. "
            "Please upgrade H2O version to a newer version")
        model = h2o.load_model(model_path)

    return model
예제 #12
0
 def _h2o_init(h2o_init_params):
     no_progress()
     if cluster() is None:
         init(**(h2o_init_params if h2o_init_params is not None else {}))
예제 #13
0
def starth2o(h2oserver):
    h2o.init(ip=h2oserver, max_mem_size="28G")  # specify max number of bytes. uses all cores by default.
    h2o.no_progress()
    h2o.remove_all()  # clean slate, in case cluster was already running
import seaborn as sns

import h2o
from h2o.estimators.glrm import H2OGeneralizedLowRankEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o.grid.grid_search import H2OGridSearch
from h2o.estimators.xgboost import H2OXGBoostEstimator
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator

from h2o.estimators.glm import H2OGeneralizedLinearEstimator  # import GLM models
from h2o.grid.grid_search import H2OGridSearch

#import xgboost as xgb
h2o.init(max_mem_size='6G')  # give h2o as much memory as possible
h2o.no_progress()  # turn off h2o progress bars

# Definitions
pd.set_option('display.float_format', lambda x: '%.3f' % x)
#%matplotlib inline
#njobs = 4


def get_type_lists(frame, rejects):
    """Creates lists of numeric and categorical variables.
    :param frame: The frame from which to determine types.
    :param rejects: Variable names not to be included in returned lists.
    :return: Tuple of lists for numeric and categorical variables in the frame.
    """

    nums, cats = [], []
예제 #15
0
def makeDlModel(subOpt=None,
                xCol=None,
                yCol=None,
                inpData=None,
                modelKey=None):

    log.info('[START] {}'.format('makeDlModel'))

    result = None

    try:

        saveModel = '{}/{}-{}-{}-{}-{}-{}.model'.format(
            globalVar['outPath'], serviceName, modelKey, 'final', 'h2o', 'act',
            '*')
        saveModelList = sorted(glob.glob(saveModel), reverse=True)
        xyCol = xCol.copy()
        xyCol.append(yCol)
        data = inpData[xyCol]

        # h2o.shutdown(prompt=False)

        if (not subOpt['isInit']):
            h2o.init()
            h2o.no_progress()
            subOpt['isInit'] = True

        # 학습 모델이 없을 경우
        if (subOpt['isOverWrite']) or (len(saveModelList) < 1):

            # 7:3에 대한 학습/테스트 분류
            trainData, validData = train_test_split(data, test_size=0.3)
            # trainData = inpData

            # dlModel = H2OAutoML(max_models=30, max_runtime_secs=99999, balance_classes=True, seed=123)
            dlModel = H2OAutoML(max_models=20,
                                max_runtime_secs=99999,
                                balance_classes=True,
                                seed=123)
            dlModel.train(x=xCol,
                          y=yCol,
                          training_frame=h2o.H2OFrame(trainData),
                          validation_frame=h2o.H2OFrame(validData))
            # dlModel.train(x=xCol, y=yCol, training_frame=h2o.H2OFrame(data))
            fnlModel = dlModel.get_best_model()

            # 학습 모델 저장
            saveModel = '{}/{}-{}-{}-{}-{}-{}.model'.format(
                globalVar['outPath'], serviceName, modelKey, 'final', 'h2o',
                'act',
                datetime.now().strftime('%Y%m%d'))
            log.info('[CHECK] saveModel : {}'.format(saveModel))
            os.makedirs(os.path.dirname(saveModel), exist_ok=True)

            # h2o.save_model(model=fnlModel, path=os.path.dirname(saveModel), filename=os.path.basename(saveModel), force=True)
            fnlModel.save_mojo(path=os.path.dirname(saveModel),
                               filename=os.path.basename(saveModel),
                               force=True)
        else:
            saveModel = saveModelList[0]
            log.info('[CHECK] saveModel : {}'.format(saveModel))
            fnlModel = h2o.import_mojo(saveModel)

        result = {
            'msg': 'succ',
            'dlModel': fnlModel,
            'saveModel': saveModel,
            'isExist': os.path.exists(saveModel)
        }

        return result

    except Exception as e:
        log.error('Exception : {}'.format(e))
        return result

    finally:
        # try, catch 구문이 종료되기 전에 무조건 실행
        log.info('[END] {}'.format('makeDlModel'))
예제 #16
0
    def __init__(self, training_frame=None, X=None, model=None,
                 N=None, discretize=None, quantiles=None, seed=None,
                 print_=None, top_n=None, intercept=None):

        # mandatory

        if training_frame is not None:
            self.training_frame = training_frame
        else:
            raise ValueError('Parameter training_frame must be defined.')

        if X is not None:
            self.X = X
        else:
            raise ValueError('Parameter X must be defined.')

        if model is not None:
            self.model = model
        else:
            raise ValueError('Parameter model must be defined.')

        # defaults

        if N is not None:
            self.N = N
        else:
            self.N = 10000

        if discretize is not None:
            self.discretize = discretize
        else:
            self.discretize = None

        if quantiles is not None:
            self.quantiles = quantiles
        else:
            self.quantiles = 4

        if seed is not None:
            self.seed = seed
        else:
            self.seed = 12345

        if print_ is not None:
            self.print_ = print_
        else:
            self.print_ = True

        if top_n is not None:
            self.top_n = top_n
        else:
            self.top_n = 5

        if intercept is not None:
            self.intercept = intercept
        else:
            self.intercept = True

        # internal storage

        self.reason_code_values = None

        self.lime_r2 = None

        self.lime = None

        self.lime_pred = None

        self.bins_dict = {}

        h2o.no_progress() # do not show h2o progress bars
예제 #17
0
    def __init__(self,
                 training_frame=None,
                 X=None,
                 model=None,
                 N=None,
                 discretize=None,
                 quantiles=None,
                 seed=None,
                 print_=None,
                 top_n=None,
                 intercept=None):

        # mandatory

        if training_frame is not None:
            self.training_frame = training_frame
        else:
            raise ValueError('Parameter training_frame must be defined.')

        if X is not None:
            self.X = X
        else:
            raise ValueError('Parameter X must be defined.')

        if model is not None:
            self.model = model
        else:
            raise ValueError('Parameter model must be defined.')

        # defaults

        if N is not None:
            self.N = N
        else:
            self.N = 10000

        if discretize is not None:
            self.discretize = discretize
        else:
            self.discretize = None

        if quantiles is not None:
            self.quantiles = quantiles
        else:
            self.quantiles = 4

        if seed is not None:
            self.seed = seed
        else:
            self.seed = 12345

        if print_ is not None:
            self.print_ = print_
        else:
            self.print_ = True

        if top_n is not None:
            self.top_n = top_n
        else:
            self.top_n = 5

        if intercept is not None:
            self.intercept = intercept
        else:
            self.intercept = True

        # internal storage

        self.reason_code_values = None

        self.lime_r2 = None

        self.lime = None

        self.lime_pred = None

        self.bins_dict = {}

        h2o.no_progress()  # do not show h2o progress bars
ntrees_opt = [5, 10, 15]
max_depth_opt = [2, 3, 4]
learn_rate_opt = [0.1, 0.2]
hyper_parameters = {"ntrees": ntrees_opt, "max_depth":max_depth_opt, "learn_rate":learn_rate_opt}
from h2o.grid.grid_search import H2OGridSearch
gs = H2OGridSearch(H2OGradientBoostingEstimator(distribution="multinomial"), hyper_params=hyper_parameters)
gs.train(x=range(0,iris_df.ncol-1), y=iris_df.ncol-1, training_frame=iris_df, nfold=10)
print gs.sort_by('logloss', increasing=True)

# Pipeline
from h2o.transforms.preprocessing import H2OScaler
from sklearn.pipeline import Pipeline

# Turn off h2o progress bars
h2o.__PROGRESS_BAR__=False
h2o.no_progress()

# build transformation pipeline using sklearn's Pipeline and H2O transforms
pipeline = Pipeline([("standardize", H2OScaler()),
                 ("pca", H2OPCA(k=2)),
                 ("gbm", H2OGradientBoostingEstimator(distribution="multinomial"))])
pipeline.fit(iris_df[:4],iris_df[4])

# Random CV using H2O and Scikit-learn
from sklearn.grid_search import RandomizedSearchCV
from h2o.cross_validation import H2OKFold
from h2o.model.regression import h2o_r2_score
from sklearn.metrics.scorer import make_scorer
params = {"standardize__center":    [True, False],             # Parameters to test
          "standardize__scale":     [True, False],
          "pca__k":                 [2,3],
예제 #19
0
Created on Feb 15, 2016

@author: molina
'''

from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.frame import H2OFrame
import numpy
import re

from mb.modelbase import numpytoordereddict
from ..mlutils.statistics import logpoissonpmf

import h2o
h2o.init()
h2o.no_progress()


class GBMPDN:
    def __init__(self,
                 data,
                 features,
                 families="poisson",
                 max_depth=10,
                 iterations=1):
        self.data = data
        self.nD = data.shape[0]
        self.nF = data.shape[1]

        self.config = {"max_depth": max_depth, "iterations": iterations}
def user_identification():
    cfg = get_config('h2o_different_nb_cls.cfg')
    h2o_cfg = get_config('h2o.cfg')

    h2o.init(nthreads=h2o_cfg.getint('h2o', 'nthreads'),
             max_mem_size=h2o_cfg.get('h2o', 'max_mem'))
    h2o.no_progress()
    h2o_seed = h2o_cfg.getint('h2o', 'seed')

    logger.info('intrusion_detection_synthetic')
    folder = cfg.get('data', 'path')
    if cfg.has_option('data', 'output_path'):
        output_folder = cfg.get('data', 'output_path')
        out_folder = path.join(output_folder, "H2O_" + timestamp())
    else:
        out_folder = path.join(folder, "H2O_" + timestamp())
    ensure_dir(out_folder)

    with open(path.join(out_folder, 'config.cfg'), 'w') as f:
        cfg.write(f)
    np.random.seed(cfg.getint('misc', 'random_seed'))

    # ignored columns, by name or prefix
    if cfg.has_option('data', 'ignored_columns'):
        ignored_columns = cfg.get('data', 'ignored_columns').split(',')
    else:
        ignored_columns = []
    if cfg.has_option('data', 'ignore_columns_starting_with'):
        ignore_columns_starting_with = cfg.get(
            'data', 'ignore_columns_starting_with').split(',')
    else:
        ignore_columns_starting_with = []
    ignored_columns_reason = dict()

    n_folds = cfg.getint('data', 'n_folds')
    min_users = cfg.getint('data', 'min_users')
    max_users = cfg.getint('data', 'max_users')
    step = cfg.getint('data', 'step')
    train_frame = cfg.get('data', 'train')
    test_frame = cfg.get('data', 'test')

    logger.info('Out folder: %s' % out_folder)
    cluster_dir = path.join(out_folder, 'clusters')
    ensure_dir(cluster_dir)

    # print and check features on train set
    train_df = pd.read_csv(path.join(folder, train_frame))
    test_df = pd.read_csv(path.join(folder, test_frame))
    important_features = [
        'pc', 'http_count', 'session_length', 'session_end_hour',
        'http_avg_duration', 'email_count', 'session_start_minute',
        'session_start_hour', 'user'
    ]
    if set(important_features).issubset(set(train_df.columns)):
        print('OK')
        train_df = train_df[important_features]
        test_df = test_df[important_features]
    column_types = get_h2o_column_types(list(train_df))
    unique_users = np.unique(train_df['user'].unique())
    logger.info('total unique users: %d' % unique_users.shape[0])

    for c in column_types:
        if ignore_columns_starting_with and c.startswith(
                tuple(ignore_columns_starting_with)):
            ignored_columns.append(c)
            ignored_columns_reason[c] = 'ignored by prefix'

    ignored_columns.append('user')
    ignored_columns.append('is_anomaly')
    columns_to_keep = [
        i for i in list(train_df.columns) if i not in ignored_columns
    ]

    # End of configuration
    # Preparation of the variables
    classification_types = [
        "meta_binary_tree_classifier", "random", "huffman", "balanced-tree",
        "meta-binary-tree-encoding", "standard-classifier"
    ]
    standard_dictionary = {}
    for i in classification_types:
        standard_dictionary[i] = []
    metrics_dict = {}
    for metric in METRICS:
        metrics_dict[metric] = deepcopy(standard_dictionary)
    training_time_dict = deepcopy(standard_dictionary)
    prediction_time_dict = deepcopy(standard_dictionary)
    number_of_users = [i for i in range(min_users, max_users + 1, step)]
    logger.info("number_of_users = " + str(number_of_users))
    for n_users in number_of_users:
        logger.info("____________________________________")
        logger.info("DATA FOR %d CLASSES" % n_users)
        n_metrics_dict = {}
        for metric in METRICS:
            n_metrics_dict[metric] = deepcopy(standard_dictionary)
        n_train_time = deepcopy(standard_dictionary)
        n_predict_time = deepcopy(standard_dictionary)
        rf = RandomForest(seed=h2o_seed,
                          ntrees=cfg.getint('random_forest', 'ntrees'),
                          max_depth=cfg.getint('random_forest', 'max_depth'),
                          categorical_encoding=cfg.get('random_forest',
                                                       'categorical_encoding'),
                          nbins_cats=cfg.getint('random_forest', 'nbins_cats'),
                          histogram_type=cfg.get('random_forest',
                                                 'histogram_type'))

        for i in range(n_folds):
            logger.info("++++++++++++++++++++++++++++++++++")
            logger.info("Fold %d" % (i + 1))
            fold_users = np.random.choice(unique_users, n_users, replace=False)
            x_train_fold = train_df.loc[train_df['user'].isin(
                fold_users)].reset_index(drop=True)
            x_test_fold = test_df.loc[test_df['user'].isin(
                fold_users)].reset_index(drop=True)
            y_train = x_train_fold['user']
            y_test = x_test_fold['user']
            x_train_fold = x_train_fold[columns_to_keep]
            x_test_fold = x_test_fold[columns_to_keep]
            while len(y_test) == 0:
                fold_users = np.random.choice(unique_users,
                                              n_users,
                                              replace=False)
                x_train_fold = train_df.loc[train_df['user'].isin(
                    fold_users)].reset_index(drop=True)
                x_test_fold = test_df.loc[test_df['user'].isin(
                    fold_users)].reset_index(drop=True)
                y_train = x_train_fold['user']
                y_test = x_test_fold['user']
                x_train_fold = x_train_fold[columns_to_keep]
                x_test_fold = x_test_fold[columns_to_keep]

            temp_metrics_dict, train, test = fold_prediction_result(
                x_train_fold, y_train, x_test_fold, y_test,
                classification_types, rf)
            for classification in classification_types:
                for metric in METRICS:
                    n_metrics_dict[metric][classification].append(
                        temp_metrics_dict[metric][classification])
                n_train_time[classification].append(train[classification])
                n_predict_time[classification].append(test[classification])
            h2o.remove_all()
        for classification in classification_types:
            logger.info("***___***___***___***___***___")
            logger.info("Average data for %s" % classification)
            for metric in METRICS:
                avg = np.average(n_metrics_dict[metric][classification])
                metrics_dict[metric][classification].append(avg)
                logger.info("Average %s for %s for %d users: %2.2f" %
                            (metric, classification, n_users, avg))
            avg = np.average(n_train_time[classification])
            training_time_dict[classification].append(avg)
            logger.info(
                "Average training time for %s for %d users : %d min %d s " %
                (classification, n_users, avg // 60, avg % 60))
            avg = np.average(n_predict_time[classification])
            prediction_time_dict[classification].append(avg)
            logger.info(
                "Average prediction time for %s for %d users : %d min %d s " %
                (classification, n_users, avg // 60, avg % 60))
    for metric in METRICS:
        logger.info("%s = %s" % (metric, metrics_dict[metric]))
    logger.info("training_time = " + str(training_time_dict))
    logger.info("prediction_time = " + str(prediction_time_dict))
    metrics_dict["training time (seconds)"] = training_time_dict
    metrics_dict["prediction time (seconds)"] = prediction_time_dict

    # Plot generation
    for classification in classification_types:
        fig = plt.figure(figsize=(15, 30))
        for i, score in enumerate(metrics_dict.keys()):
            ax = fig.add_subplot(len(set(metrics_dict)) // 2 + 1, 2, i + 1)
            ax.plot(number_of_users, metrics_dict[score][classification])
            plt.title("Classification %s for %s" % (score, classification))
            plt.xlabel("Number of classes")
            plt.ylabel(score)
        plt.savefig(path.join(out_folder, classification))
        plt.close(fig)

    fig = plt.figure(figsize=(15, 30))
    for i, score in enumerate(metrics_dict.keys()):
        ax = fig.add_subplot(len(set(metrics_dict)) // 2 + 1, 2, i + 1)
        for classification in classification_types:
            ax.plot(number_of_users, metrics_dict[score][classification])
        plt.title("Classification %s" % score)
        plt.legend(classification_types, loc="upper right")
        plt.xlabel("Number of classes")
        plt.ylabel(score)
    plt.savefig(path.join(out_folder, "all"))
    plt.close(fig)