Example #1
0
def test_ParamOpt():
    clf_pack = ClassifierPack(['skDecisionTreeClf'])
    dtree = clf_pack['skDecisionTreeClf']

    from sklearn.datasets import load_iris
    data = load_iris()
    y = data.target
    x = data.data

    opt = ParamOpt(cv=10, n_iter=10, n_jobs=1)
    dtree = opt.fit(dtree, x, y)

    train_score = dtree.score(x, y)
    print(train_score)

    clf_pack = ClassifierPack(['skDecisionTreeClf'])
    dtree = clf_pack['skDecisionTreeClf']

    from sklearn.datasets import load_iris
    data = load_iris()
    y = data.target
    x = data.data

    opt = ParamOpt(cv=10, n_iter=10, n_jobs=2)
    dtree = opt.fit(dtree, x, y)

    train_score = dtree.score(x, y)
    print(train_score)
Example #2
0
def test_pickle_clf_pack():
    clf_pack = ClassifierPack(['skMLPClf'])
    clf_pack.fit(train_Xs, train_Ys)

    score = clf_pack.score_pack(train_Xs, train_Ys)
    print(score)
    clf_pack.dump('./test_pickle_clf_pack.pkl')

    clf_pack = clf_pack.load('./test_pickle_clf_pack.pkl')
    score = clf_pack.score_pack(train_Xs, train_Ys)
    print(score)
Example #3
0
def test_HyperOpt_space_with_data():
    data_pack = DatasetPackLoader().load_dataset('titanic')
    clf_name = 'skMLPClf'
    clf_pack = ClassifierPack()

    train_set = data_pack['train']

    # train_set, valid_set = train_set.split()
    # train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys'])
    # valid_Xs, valid_Ys = valid_set.full_batch(['Xs', 'Ys'])
    # clf_pack.fit(train_Xs, train_Ys)
    # score = clf_pack.score(valid_Xs, valid_Ys)
    # pprint(score)

    clf = clf_pack[clf_name]
    space = clf.HyperOpt_space

    # data_pack = None
    opt = HyperOpt()

    # space.update({'args': ()})
    # space.update({'kwargs':})
    opt.fit_serial(fit_fn, space, 10, feed_args=(), feed_kwargs={'data_pack': data_pack.to_DummyDatasetPack()})
    pprint(opt.best_loss)
    pprint(opt.best_param)

    opt.fit_parallel(fit_fn, space, 10, feed_args=(), feed_kwargs={'data_pack': data_pack.to_DummyDatasetPack()})
    pprint(opt.best_loss)
    pprint(opt.best_param)
def test_wrapperRandomizedSearchCV():
    data_pack = DatasetPackLoader().load_dataset('titanic')
    train_set = data_pack['train']

    train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys'])

    clf_pack = ClassifierPack()
    clf = clf_pack['skMLPClf']

    from scipy import stats

    tuning_distributions = {
        'activation': ['relu'],
        'alpha': stats.expon(scale=0.001),
        # 'hidden_layer_sizes': [(128, 128), (64, 64), ],
        'max_iter': [1000],
        'learning_rate': ['constant', 'invscaling', 'adaptive'],
        # 'learning_rate_init': stats.expon(scale=0.001),
        # 'tol': [0.0001],
    }
    # 'alpha': hp.loguniform('alpha', -4, 3),
    # 'learning_rate': hp.choice('learning_rate', ['constant', 'invscaling', 'adaptive']),

    param_distributions = tuning_distributions
    search = wrapperRandomizedSearchCV(clf, param_distributions, n_iter=10)
    search.fit(train_Xs, train_Ys)
    # pprint(search.best_estimator_)
    # pprint(search.best_params_)
    # pprint(search.best_index_)
    pprint(search.best_score_)
    # pprint(search.cv_results_)

    pass
Example #5
0
def test_make_FoldingHardVote():
    clf = ClassifierPack(
        ['skMLPClf', "skBernoulli_NBClf", "skDecisionTreeClf"])
    clf = clf.to_FoldingHardVote()
    clf.fit(train_Xs, train_Ys)

    predict = clf.predict(sample_Xs)
    print(f'predict {predict}')

    proba = clf.predict_proba(sample_Xs)
    print(f'proba {proba}')

    predict_bincount = clf.predict_bincount(sample_Xs)
    print(f'predict_bincount {predict_bincount}')

    score = clf.score(sample_Xs, sample_Ys)
    print(f'score {score}')
Example #6
0
def test_wrapperclfpack_HyperOpt_parallel():
    data_pack = DatasetPackLoader().load_dataset('titanic')
    clf_name = 'skMLPClf'
    # clf_pack = ClassifierPack(['skGaussian_NBClf', 'skMLPClf'])
    clf_pack = ClassifierPack()

    train_set = data_pack['train']
    train_set.shuffle()
    train_set, valid_set = train_set.split()
    train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys'])
    valid_Xs, valid_Ys = valid_set.full_batch(['Xs', 'Ys'])

    clf_pack.HyperOptSearch(train_Xs, train_Ys, n_iter=3, parallel=True)
    pprint(clf_pack.optimize_result)
    pprint(clf_pack.HyperOpt_best_loss)
    pprint(clf_pack.HyperOpt_best_params)
    pprint(clf_pack.HyperOpt_best_result)
    pprint(clf_pack.HyperOpt_opt_info)

    score = clf_pack.score_pack(valid_Xs, valid_Ys)
    pprint(score)
Example #7
0
def test_ClassifierPack():
    clf = ClassifierPack(
        ['skMLPClf', "skBernoulli_NBClf", "skDecisionTreeClf"])
    clf.fit(train_Xs, train_Ys)
    predict = clf.predict(valid_Xs[:2])
    print('predict', predict)
    proba = clf.predict_proba(valid_Xs[:2])
    print('predict_proba', proba)

    score = clf.score(valid_Xs, valid_Ys)
    print('test score', score)

    score_pack = clf.score_pack(valid_Xs, valid_Ys)
    print('score pack', score_pack)
Example #8
0
def test_make_stackingCVClf():
    clf = ClassifierPack(
        ['skMLPClf', "skBernoulli_NBClf", "skDecisionTreeClf"])
    meta_clf = clf.pack["skBernoulli_NBClf"]
    clf = clf.to_stackingCVClf(meta_clf)
    clf.fit(train_Xs, train_Ys)

    predict = clf.predict(valid_Xs[:4])
    print(f'predict {predict}')

    proba = clf.predict_proba(valid_Xs[:4])
    print(f'proba {proba}')

    score = clf.score(valid_Xs, valid_Ys)
    print(f'score {score}')
Example #9
0
def fit_clf(params):
    train_set = data_pack['train']
    train_set.shuffle()
    train_set, valid_set = train_set.split()
    train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys'])
    valid_Xs, valid_Ys = valid_set.full_batch(['Xs', 'Ys'])

    clf_pack = ClassifierPack()
    clf = clf_pack['skMLPClf']
    clf = clf.__class__(**params)
    clf.fit(train_Xs, train_Ys)
    score = -clf.score(valid_Xs, valid_Ys)

    return score
Example #10
0
    def fn(params, feed_args, feed_kwargs):
        # pprint(params)
        # pprint(feed_args)
        # pprint(feed_kwargs)

        data_pack = feed_kwargs['data_pack']
        train_set = data_pack['train']

        train_set, valid_set = train_set.split()
        train_Xs, train_Ys = train_set.full_batch(['Xs', 'Ys'])
        valid_Xs, valid_Ys = valid_set.full_batch(['Xs', 'Ys'])

        clf_pack = ClassifierPack()
        clf = clf_pack['skMLPClf']
        clf = clf.__class__(**params)
        clf.fit(train_Xs, train_Ys)
        score = -clf.score(valid_Xs, valid_Ys)

        return score
Example #11
0
    def train_models(self, cache=True, path='./models.pkl'):
        p_types = self.p_types

        if os.path.exists(path) and cache:
            print('models cache found, use cache')
            clfs = load_pickle(path)
            return clfs

        print('train_model')

        full_df = self.full_set.to_DataFrame()
        print(full_df.info())
        print('load data')

        p_types_str = [str(val) for val in p_types]
        pprint(p_types)

        clf_dict = {}
        for p_type, p_type_str in list(zip(p_types, p_types_str)):
            print(f'train type : {p_type_str }')

            x_cols = list(self.x_cols)

            for y_col in p_type:
                x_cols.remove(y_col)

            clfs = {}
            for y_col in p_type:
                print(f'train label : {y_col}')
                print(x_cols, y_col)

                x_df = full_df[x_cols]
                y_df = full_df[[y_col]]
                dataset = BaseDataset(x=x_df, y=y_df)
                dataset.shuffle()
                train_set, test_set = dataset.split()
                train_xs, train_ys = train_set.full_batch(out_type='df')
                test_xs, test_ys = test_set.full_batch(out_type='df')
                # print(train_xs.info())

                class_pack_names = [
                    'skMLPClf',
                    # 'skRandomForestClf',
                    # 'skExtraTreesClf',
                    # 'skAdaBoostClf',
                    # 'skGradientBoostingClf',
                    # 'skLinear_SVCClf',
                    # 'skBaggingClf',
                    #
                    'XGBoostClf',
                    # 'LightGBMClf',
                    # 'skRidgeCVClf',
                ]
                clf_name = 'XGBoostClf'
                class_pack_names = [clf_name]
                clf = ClassifierPack(class_pack_names)

                # opt = ParamOpt(cv=3, n_jobs=6, n_iter=10)
                # clf.pack[clf_name] = opt.fit(clf[clf_name], train_xs, train_ys)
                clf.fit(train_xs, train_ys)

                train_score = clf.score(train_xs, train_ys)
                test_score = clf.score(test_xs, test_ys)
                if len(train_score) == 0:
                    raise ValueError(f'{y_col} in {p_type} fail')
                pprint(train_score)
                pprint(test_score)
                score_pack = clf.score_pack(test_xs, test_ys)
                pprint(score_pack)
                print(clf.feature_importance)
                pprint(f'score train = {train_score},\n test = {test_score}')

                predict = clf.predict(train_xs[:1])[clf_name]
                print(f'predict = {predict}, test_ys= {test_ys[:1]}')

                clfs[y_col] = clf

            clf_dict[p_type_str] = clfs
            # exit()

        dump_pickle(clf_dict, path)

        return clf_dict
Example #12
0
def test_param_search():
    clf_pack = ClassifierPack(['skMLPClf'])
    clf_pack.param_search(train_Xs, train_Ys)
    train_score = clf_pack.score(train_Xs, train_Ys)
    valid_score = clf_pack.score(valid_Xs, valid_Ys)
    print(train_score)
    print(valid_score)

    path = clf_pack.save_params()
    clf_pack.load_params(path)
    clf_pack.fit(train_Xs, train_Ys)
    train_score = clf_pack.score(train_Xs, train_Ys)
    valid_score = clf_pack.score(valid_Xs, valid_Ys)
    print(train_score)
    print(valid_score)
Example #13
0
def test_wrapper_pack_grid_search():
    path = './test_wrapper_pack_grid_search.pkl'
    clf_pack = ClassifierPack(
        ['skMLPClf', "skBernoulli_NBClf", "skDecisionTreeClf"])
    clf_pack.fit(train_Xs, train_Ys)
    clf_pack.gridSearchCV(train_Xs, train_Ys)
    score = clf_pack.score_pack(train_Xs, train_Ys)
    print(score)
    clf_pack.dump(path)

    clf_pack = ClassifierPack().load(path)
    score = clf_pack.score_pack(train_Xs, train_Ys)
    print(score)
    score = clf_pack.score_pack(valid_Xs, valid_Ys)
    print(score)

    score = clf_pack.score(valid_Xs, valid_Ys)
    print(score)
    result = clf_pack.optimize_result
    print(result)
Example #14
0
def test_HyperOpt_parallel():
    clf_pack = ClassifierPack()
    clf = clf_pack['skMLPClf']
    space = clf.HyperOpt_space

    opt = HyperOpt()
    trials1 = opt.fit_serial(fit_clf, space, 1, trials=FreeTrials())
    # pprint(trials)
    # pprint(opt.trials)
    # pprint(opt.losses)
    # pprint(opt.result)
    # pprint(opt.opt_info)
    pprint(opt.best_param)
    pprint(opt.best_loss)
    pprint(len(trials1))
    # pprint(len(opt.outer_trials))

    trials2 = opt.fit_serial(fit_clf, space, 2, trials=FreeTrials())
    pprint(opt.best_param)
    pprint(opt.best_loss)
    pprint(len(trials2))

    # pprint(trials1.trials)
    # pprint(trials2.trials)
    # pprint(trials1.__dict__['_dynamic_trials'])
    # pprint(trials2.__dict__['_dynamic_trials'])

    trials3 = trials1.concat(trials2)
    # pprint(trials3.__dict__['_dynamic_trials'])

    pprint(len(trials3))

    trials3 = opt.fit_serial(fit_clf, space, 1, trials=trials3)
    pprint(opt.best_param)
    pprint(opt.best_loss)
    # pprint(trials3.__dict__['_dynamic_trials'])
    pprint(len(trials3))

    trials4 = opt.fit_serial(fit_clf, space, 2, trials=FreeTrials.deepcopy(trials3))
    pprint(opt.best_param)
    pprint(opt.best_loss)
    # pprint(trials3.__dict__['_dynamic_trials'])
    pprint(len(trials4))

    trials5 = opt.fit_serial(fit_clf, space, 3, trials=FreeTrials.deepcopy(trials3))
    pprint(opt.best_param)
    pprint(opt.best_loss)
    # pprint(trials3.__dict__['_dynamic_trials'])
    pprint(len(trials5))

    trials6 = trials4.concat(trials5)
    pprint(len(trials6))

    trials6 = opt.fit_serial(fit_clf, space, 2, trials=trials6)
    pprint(opt.best_param)
    pprint(opt.best_loss)
    # pprint(trials3.__dict__['_dynamic_trials'])
    pprint(len(trials6))

    partial = trials6[:5]
    partial = FreeTrials.partial_deepcopy(trials6, 0, 5)
    pprint(partial)
    pprint(len(partial))

    partial = opt.fit_serial(fit_clf, space, 4, trials=partial)
    pprint(opt.best_param)
    pprint(opt.best_loss)
    # pprint(trials3.__dict__['_dynamic_trials'])

    partial = opt.fit_serial(fit_clf, space, 30, trials=FreeTrials())
    pprint(opt.best_param)
    pprint(opt.best_loss)

    n = 100
    partial = opt.fit_parallel(fit_clf, space, n, trials=FreeTrials())
    # pprint(opt.best_param)
    pprint(opt.best_loss)
    pprint(len(opt.trials))