def RepeatRealDatasetsDifferentRegressorsTest():
    for name, func in dataset_list.iteritems():
        print(name)
        dataset = func()
        make_ensemble(dataset,
                      "auto_repeat/auto_" + name + ".mat",
                      plotting=False)
def RealDatasetsDifferentRegressorsLargeTest():
    for name, func in dataset_list.iteritems():
        print(name)
        dataset = func()
        make_ensemble(dataset,
                      "auto_large/auto_" + name + ".mat",
                      plotting=False,
                      ensemble_type='auto_large',
                      scale_data=True)
def RealDatasetsManualEnsembleTest():
    for name, func in dataset_list.iteritems():
        print(name + ":", end="")
        dataset = func()
        print(" X.shape = " + str(dataset.data.shape))
        ensemble = EnsembleRegressor(type='auto', verbose=True)  #auto_large

        if name is 'blog_feedback':
            continue
            # samples_per_regressor = 2810
            # overlap = 2810
            # train_size = 2810
        else:
            samples_per_regressor = 200
            overlap = 0
            train_size = samples_per_regressor * ensemble.regressor_count

        if len(
                dataset.target
        ) < train_size + 500:  # ignore datasets with less than 6000 samples
            continue
        # if dataset.data.shape[1] < 5:  # ignore datasets with less than 5 covariates
        #     continue

        Xtrain, X, ytrain, y = model_selection.train_test_split(
            dataset.data,
            dataset.target,
            random_state=0,
            train_size=train_size)

        ensemble.fit(Xtrain,
                     ytrain,
                     samples_per_regressor=samples_per_regressor,
                     regressor_overlap=overlap)
        Ztrain = ensemble.predict(Xtrain)
        Z = ensemble.predict(X)

        sio.savemat(
            path.join('ManualEnsembleDatasets', name + '.mat'), {
                'names': ensemble.regressor_labels,
                'Z': Z,
                'y': y,
                'Ztrain': Ztrain,
                'ytrain': ytrain,
                'samples_per_regressor': train_size,
                'regressor_samples_overlap': train_size,
                'Ey': np.mean(y),
                'Ey2': np.mean(y**2),
                'Description': ('Different Regressors (%s)' % name)
            })
def RealDatasetsLargeMLPEnsembleTest():
    for name, func in dataset_list.iteritems():
        print(name)
        dataset = func()

        if len(dataset.target
               ) < 5500:  # ignore datasets with less than 6000 samples
            continue
        if dataset.data.shape[
                1] < 5:  # ignore datasets with less than 5 covariates
            continue

        if name is 'blog_feedback':
            train_size = 10000
        else:
            train_size = 500

        Xtrain, X, ytrain, y = model_selection.train_test_split(
            dataset.data,
            dataset.target,
            random_state=0,
            train_size=train_size)

        if name is 'affairs':
            # ytrain, y = [np_utils.to_categorical(x) for x in (ytrain, y)]
            continue

        ensemble = EnsembleRegressor(type='mlp_large', verbose=True)
        ensemble.fit(Xtrain,
                     ytrain,
                     samples_per_regressor=train_size,
                     regressor_overlap=train_size)
        Ztrain = ensemble.predict(Xtrain)
        Z = ensemble.predict(X)

        sio.savemat(
            path.join('ManualEnsembleDatasets', name + '_10mlp.mat'), {
                'names': ensemble.regressor_labels,
                'Z': Z,
                'y': y,
                'Ztrain': Ztrain,
                'ytrain': ytrain,
                'samples_per_regressor': train_size,
                'regressor_samples_overlap': train_size,
                'Ey': np.mean(y),
                'Ey2': np.mean(y**2),
                'Description': ('Different Regressors (%s)' % name)
            })
def RealDatasetsManualEnsembleTest():
    for name,func in dataset_list.iteritems():
        print(name + ":", end="")
        dataset = func()
        print(" X.shape = " + str(dataset.data.shape))
        ensemble = EnsembleRegressor(type='auto', verbose=True)  #auto_large

        if name is 'blog_feedback':
            continue
            # samples_per_regressor = 2810
            # overlap = 2810
            # train_size = 2810
        else:
            samples_per_regressor = 200
            overlap = 0
            train_size = samples_per_regressor * ensemble.regressor_count

        if len(dataset.target) < train_size + 500:  # ignore datasets with less than 6000 samples
            continue
        # if dataset.data.shape[1] < 5:  # ignore datasets with less than 5 covariates
        #     continue

        Xtrain, X, ytrain, y = cross_validation.train_test_split(
            dataset.data, dataset.target, random_state=0, train_size=train_size)

        ensemble.fit(Xtrain, ytrain, samples_per_regressor=samples_per_regressor, regressor_overlap=overlap)
        Ztrain = ensemble.predict(Xtrain)
        Z = ensemble.predict(X)

        sio.savemat(path.join('ManualEnsembleDatasets',name + '.mat'), {
            'names': ensemble.regressor_labels,
            'Z': Z, 'y': y,
            'Ztrain': Ztrain, 'ytrain': ytrain,
            'samples_per_regressor': train_size,
            'regressor_samples_overlap': train_size,
            'Ey': np.mean(y),
            'Ey2': np.mean(y ** 2),
            'Description': ('Different Regressors (%s)' % name)
        })
def RealDatasetsLargeMLPEnsembleTest():
    for name,func in dataset_list.iteritems():
        print(name)
        dataset = func()

        if len(dataset.target) < 5500:  # ignore datasets with less than 6000 samples
            continue
        if dataset.data.shape[1] < 5:  # ignore datasets with less than 5 covariates
            continue

        if name is 'blog_feedback':
            train_size = 10000
        else:
            train_size = 500

        Xtrain, X, ytrain, y = cross_validation.train_test_split(
            dataset.data, dataset.target, random_state=0, train_size=train_size)

        if name is 'affairs':
            # ytrain, y = [np_utils.to_categorical(x) for x in (ytrain, y)]
            continue

        ensemble = EnsembleRegressor(type='mlp_large', verbose=True)
        ensemble.fit(Xtrain, ytrain, samples_per_regressor=train_size, regressor_overlap=train_size)
        Ztrain = ensemble.predict(Xtrain)
        Z = ensemble.predict(X)

        sio.savemat(path.join('ManualEnsembleDatasets',name + '_10mlp.mat'), {
            'names': ensemble.regressor_labels,
            'Z': Z, 'y': y,
            'Ztrain': Ztrain, 'ytrain': ytrain,
            'samples_per_regressor': train_size,
            'regressor_samples_overlap': train_size,
            'Ey': np.mean(y),
            'Ey2': np.mean(y ** 2),
            'Description': ('Different Regressors (%s)' % name)
        })
def RealDatasetsDifferentRegressorsLargeTest():
    for name,func in dataset_list.iteritems():
        print(name)
        dataset = func()
        make_ensemble(dataset, "auto_large/auto_" + name + ".mat", plotting=False,
                      ensemble_type='auto_large', scale_data=True)
def RepeatRealDatasetsDifferentRegressorsTest():
    for name,func in dataset_list.iteritems():
        print(name)
        dataset = func()
        make_ensemble(dataset, "auto_repeat/auto_" + name + ".mat", plotting=False)