Exemple #1
0
def test_iris_ensemble_iterative_regression() -> None:
    print('\ntest_iris_ensemble_iterative_regression():')
    X_train, X_test, y_train, y_test = train_test_split(
        X_iris, y_iris, test_size=5, random_state=42)

    cls = ELMClassifier(
        input_to_node=FeatureUnion([
            ('tanh', InputToNode(hidden_layer_size=10, random_state=42,
                                 input_activation='tanh')),
            ('bounded_relu', InputToNode(hidden_layer_size=10, random_state=42,
                                         input_activation='bounded_relu'))]),
        regressor=IncrementalRegression(alpha=.01),
        random_state=42)

    for samples in np.split(np.arange(0, X_train.shape[0]), 5):
        cls.partial_fit(X_train[samples, :], y_train[samples],
                        classes=np.arange(3, dtype=int))
    y_predicted = cls.predict(X_test)

    for record in range(len(y_test)):
        print('predicted: {0} \ttrue: {1}'
              .format(y_predicted[record], y_test[record]))

    print('score: {0}'.format(cls.score(X_test, y_test)))
    print('proba: {0}'.format(cls.predict_proba(X_test)))
    print('log_proba: {0}'.format(cls.predict_log_proba(X_test)))
    assert cls.score(X_test, y_test) >= 4./5.
Exemple #2
0
def elm_coates(directory):
    self_name = 'elm_coates'
    logger = new_logger(self_name, directory=directory)
    X, y = get_mnist(directory)
    logger.info('Loaded MNIST successfully with {0} records'.format(
        X.shape[0]))

    label_encoder = LabelEncoder().fit(y)
    y_encoded = label_encoder.transform(y)

    filepath_label_encoder = os.path.join(
        directory, 'label_encoder_{0}.pickle'.format(self_name))

    # save label_encoder
    try:
        with open(filepath_label_encoder, 'wb') as f:
            pickle.dump(label_encoder, f)
    except Exception as e:
        logger.error('Unexpected error: {0}'.format(e))
        exit(1)

    # scale X so X in [0, 1]
    X /= 255.

    X_train, X_test, y_train, y_test = (X[:train_size, ...], X[train_size:],
                                        y_encoded[:train_size],
                                        y_encoded[train_size:])

    csv_filepath = os.path.join(directory, '{0}.csv'.format(self_name))

    # read input matrices from files
    list_filepaths = []
    for filepath in glob.glob(
            os.path.join(directory, '*pca*+kmeans*_matrix.npy')):
        logger.info('matrix file found: {0}'.format(filepath))
        list_filepaths.append(filepath)
        filename = os.path.splitext(os.path.basename(filepath))[0]

        est_filepath = os.path.join(directory,
                                    'est_coates-{0}.pickle'.format(filename))
        pred_filpath = os.path.join(
            directory, 'est_coates-{0}-predicted.npz'.format(filename))

        # only if files do not exist yet
        if (not os.path.isfile(csv_filepath)
                or not os.path.isfile(est_filepath)
                or not os.path.isfile(pred_filpath)):
            # setup estimator
            estimator = ELMClassifier(
                input_to_node=PredefinedWeightsInputToNode(
                    predefined_input_weights=np.load(filepath),
                    input_scaling=1.0,
                    bias_scaling=0.0,
                    input_activation='relu',
                    random_state=42),
                chunk_size=1000)
            logger.info('Estimator params: {0}'.format(
                estimator.get_params().keys()))

            # !run
            time_start = time.time()
            estimator.fit(X_train, y_train)
            time_fitted = time.time()
            y_pred = estimator.predict(X_test)
            time_predicted = time.time()
            # !run

            # results
            dict_results = estimator.get_params()
            dict_results.update({
                'filename': filename,
                'fit_time': time_fitted - time_start,
                'score_time': time_predicted - time_fitted,
                'score': accuracy_score(y_test, y_pred)
            })

            # drop data
            dict_results.pop('input_to_nodes__predefined_input_weights')
            dict_results.pop('input_to_nodes')
            dict_results.pop('regressor')

            logger.info('fitted time {1}, score on test set: {0}'.format(
                dict_results['score'], dict_results['fit_time']))

            # save estimator
            try:
                with open(est_filepath, 'wb') as f:
                    pickle.dump(estimator, f)
            except Exception as e:
                logger.error('Unexpected error: {0}'.format(e))
                exit(1)

            # save results
            try:
                if not os.path.isfile(csv_filepath):
                    with open(csv_filepath, 'a') as f:
                        f.write(','.join(dict_results.keys()))
                        f.write('\n')
                        f.write(','.join(
                            [str(item) for item in dict_results.values()]))
                        f.write('\n')
                else:
                    with open(csv_filepath, 'a') as f:
                        f.write(','.join(
                            [str(item) for item in dict_results.values()]))
                        f.write('\n')
            except PermissionError as e:
                print('Missing privileges: {0}'.format(e))

            # save prediction
            np.savez_compressed(pred_filpath,
                                X_test=X_test,
                                y_test=label_encoder.inverse_transform(y_test),
                                y_pred=label_encoder.inverse_transform(y_pred))

    if not list_filepaths:
        logger.warning('no input weights matrices found')
        return
Exemple #3
0
def elm_hidden_layer_size(directory):
    self_name = 'elm_hidden_layer_size'
    logger = new_logger(self_name, directory=directory)
    X, y = get_mnist(directory)
    logger.info('Loaded MNIST successfully with {0} records'.format(
        X.shape[0]))

    # encode y
    label_encoder = LabelEncoder().fit(y)
    y_encoded = label_encoder.transform(y)

    # scale X
    X /= 255.

    # split train test
    X_train, X_test, y_train, y_test = (X[:train_size, :], X[train_size:, :],
                                        y_encoded[:train_size],
                                        y_encoded[train_size:])

    # fan-out from paper
    fan_out = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 20]

    # prepare parameter grids
    param_grid_basic = {
        'hidden_layer_size': 0,
        'input_scaling': 1.,
        'bias_scaling': 0.,
        'activation': 'relu',
        'chunk_size': 1000,
        'alpha': 1e-5,
        'random_state': 42
    }

    param_grid_pca = {
        'hidden_layer_size': 0,
        'input_scaling': 1.,
        'bias_scaling': 0.,
        'activation': 'relu',
        'chunk_size': 1000,
        'alpha': 1e-5,
        'random_state': 42
    }

    # setup estimator
    estimator = ELMClassifier()

    # basic
    try:
        # initialize filepath
        csv_filepath = os.path.join(directory,
                                    '{0}_basic.csv'.format(self_name))

        # initialize param dict
        param_dict_job = estimator.get_params().copy()
        param_dict_job.update(param_grid_basic)

        # initialize results dict
        results_dict_job = param_dict_job.copy()
        # add dummy results
        results_dict_job.update({'time_fit': 0, 'time_pred': 0, 'score': 0})

        # write header
        with open(csv_filepath, 'w') as f:
            writer = csv.DictWriter(f, fieldnames=results_dict_job.keys())
            writer.writeheader()

        for hls in 784 * np.array(fan_out):
            param_dict_job.update({'hidden_layer_size': hls})
            estimator.set_params(**param_dict_job)

            # run!
            time_start = time.time()
            estimator.fit(X_train, y_train)
            time_fit = time.time()
            y_pred = estimator.predict(X_test)
            time_pred = time.time()
            # run end!

            results_dict_job.update(estimator.get_params())

            results_dict_job.update({
                'time_fit': time_fit - time_start,
                'time_pred': time_pred - time_fit,
                'score': accuracy_score(y_test, y_pred)
            })

            logger.info('hidden_layer_size: {0}, score: {1}'.format(
                hls, results_dict_job['score']))

            with open(csv_filepath, 'a') as f:
                writer = csv.DictWriter(f, fieldnames=results_dict_job.keys())
                writer.writerow(results_dict_job)

            del estimator.input_to_node._hidden_layer_state

            with open(
                    os.path.join(directory,
                                 'elmc_hls{0}_basic.pickle'.format(hls)),
                    'wb') as f:
                pickle.dump(estimator, f)
    except MemoryError as e:
        logger.error('Memory error: {0}'.format(e))
        pass
    except PermissionError as e:
        logger.error('Missing privileges: {0}'.format(e))
        pass

    # preprocessing pca
    try:
        # initialize filepath
        csv_filepath = os.path.join(directory, '{0}_pca.csv'.format(self_name))

        # preprocessing
        pca50 = PCA(n_components=50).fit(X_train)
        X_train_pca50, X_test_pca50 = (pca50.transform(X_train),
                                       pca50.transform(X_test))

        pca100 = PCA(n_components=100).fit(X_train)
        X_train_pca100, X_test_pca100 = (pca100.transform(X_train),
                                         pca100.transform(X_test))

        list_dict_pca = [{
            'n_components': 50,
            'X_train': X_train_pca50,
            'X_test': X_test_pca50
        }, {
            'n_components': 100,
            'X_train': X_train_pca100,
            'X_test': X_test_pca100
        }]
        logger.info('Preprocessing successful!')

        # initialize param dict
        param_dict_job = estimator.get_params().copy()
        param_dict_job.update(param_grid_pca)

        # initialize results dict
        results_dict_job = param_dict_job.copy()
        # add dummy results
        results_dict_job.update({
            'time_fit': 0,
            'time_pred': 0,
            'score': 0,
            'pca_n_components': 0
        })

        # write header
        with open(csv_filepath, 'w') as f:
            writer = csv.DictWriter(f, fieldnames=results_dict_job.keys())
            writer.writeheader()

        for dict_pca in list_dict_pca:
            results_dict_job.update(
                {'pca_n_components': dict_pca['n_components']})
            for hls in np.concatenate(
                (100 * np.array(fan_out), 784 * np.array(fan_out)), axis=0):
                param_dict_job.update({'hidden_layer_size': hls})
                estimator.set_params(**param_dict_job)

                # run!
                time_start = time.time()
                estimator.fit(dict_pca['X_train'], y_train)
                time_fit = time.time()
                y_pred = estimator.predict(dict_pca['X_test'])
                time_pred = time.time()
                # run end!

                results_dict_job.update(estimator.get_params())

                results_dict_job.update({
                    'time_fit': time_fit - time_start,
                    'time_pred': time_pred - time_fit,
                    'score': accuracy_score(y_test, y_pred)
                })

                logger.info(
                    'n_components: {2}, hidden_layer_size: {0}, score:'
                    ' {1}'.format(hls, results_dict_job['score'],
                                  results_dict_job['pca_n_components']))

                with open(csv_filepath, 'a') as f:
                    writer = csv.DictWriter(f,
                                            fieldnames=results_dict_job.keys())
                    writer.writerow(results_dict_job)

                with open(
                        os.path.join(
                            directory, 'elmc_hls{0}_pca{1}.pickle'.format(
                                hls, results_dict_job['pca_n_components'])),
                        'wb') as f:
                    pickle.dump(estimator, f)
    except MemoryError as e:
        logger.error('Memory error: {0}'.format(e))
        pass
    except PermissionError as e:
        logger.error('Missing privileges: {0}'.format(e))
        pass
Exemple #4
0
def elm_pca(directory):
    self_name = 'elm_pca'
    logger = new_logger(self_name, directory=directory)
    X, y = get_mnist(directory)
    logger.info('Loaded MNIST successfully with {0} records'.format(
        X.shape[0]))

    # scale X
    X /= 255.

    # split train test
    X_train, X_test, y_train, y_test = train_test_split(X[:train_size],
                                                        y[:train_size],
                                                        train_size=50000,
                                                        random_state=42)

    # prepare parameter grids
    param_grid_basic = {
        'hidden_layer_size': 2000,
        'input_scaling': 1.,
        'bias_scaling': 0.,
        'input_activation': 'relu',
        'alpha': 1e-5,
        'random_state': 42
    }

    # setup estimator
    estimator = ELMClassifier(regressor=Ridge())

    # initialize filepath
    filepath = os.path.join(directory, '{0}_basic.csv'.format(self_name))

    # initialize param dict
    param_dict_job = estimator.get_params().copy()
    param_dict_job.update(param_grid_basic)

    # initialize results dict
    results_dict_job = param_dict_job.copy()
    # add dummy results
    results_dict_job.update({
        'time_fit': 0,
        'time_pred': 0,
        'score': 0,
        'pca_n_components': 0
    })

    # preprocessing pca
    try:
        # write header
        with open(filepath, 'w') as f:
            writer = csv.DictWriter(f, fieldnames=results_dict_job.keys())
            writer.writeheader()

        for pca_n_components in [10, 20, 50, 100, 200, 500, 784]:
            results_dict_job.update({'pca_n_components': pca_n_components})
            estimator.set_params(**param_dict_job)

            # preprocessing
            pca = PCA(n_components=pca_n_components).fit(X_train)
            X_train_pca, X_test_pca = \
                pca.transform(X_train), pca.transform(X_test)

            # run!
            time_start = time.time()
            estimator.fit(X_train_pca, y_train)
            time_fit = time.time()
            y_pred = estimator.predict(X_test_pca)
            time_pred = time.time()
            # run end!

            results_dict_job.update({
                'time_fit': time_fit - time_start,
                'time_pred': time_pred - time_fit,
                'score': accuracy_score(y_test, y_pred)
            })

            logger.info('pca.n_components_: {0}, score: {1}'.format(
                pca_n_components, results_dict_job['score']))

            with open(filepath, 'a') as f:
                writer = csv.DictWriter(f, fieldnames=results_dict_job.keys())
                writer.writerow(results_dict_job)
    except MemoryError as e:
        logger.error('Memory error: {0}'.format(e))
    except PermissionError as e:
        logger.error('Missing privileges: {0}'.format(e))
    except Exception as e:
        logger.error('Unexpected exception: {0}'.format(e))