def test_iris_ensemble_iterative_regression() -> None: print('\ntest_iris_ensemble_iterative_regression():') X_train, X_test, y_train, y_test = train_test_split( X_iris, y_iris, test_size=5, random_state=42) cls = ELMClassifier( input_to_node=FeatureUnion([ ('tanh', InputToNode(hidden_layer_size=10, random_state=42, input_activation='tanh')), ('bounded_relu', InputToNode(hidden_layer_size=10, random_state=42, input_activation='bounded_relu'))]), regressor=IncrementalRegression(alpha=.01), random_state=42) for samples in np.split(np.arange(0, X_train.shape[0]), 5): cls.partial_fit(X_train[samples, :], y_train[samples], classes=np.arange(3, dtype=int)) y_predicted = cls.predict(X_test) for record in range(len(y_test)): print('predicted: {0} \ttrue: {1}' .format(y_predicted[record], y_test[record])) print('score: {0}'.format(cls.score(X_test, y_test))) print('proba: {0}'.format(cls.predict_proba(X_test))) print('log_proba: {0}'.format(cls.predict_log_proba(X_test))) assert cls.score(X_test, y_test) >= 4./5.
def test_elm_classifier_partial_fit() -> None: print('\ntest_elm_classifier_partial_fit():') X, y = load_digits(return_X_y=True) elm = ELMClassifier(hidden_layer_size=50) for k in range(10): elm.partial_fit(X[k:k+1, :], y[k:k+1], classes=np.arange(10)) print(elm.__sizeof__()) print(elm.hidden_layer_state) elm = ELMClassifier(hidden_layer_size=50, regressor=Ridge()) with pytest.raises(BaseException): for k in range(10): elm.partial_fit(X[k:k+1, :], y[k:k+1], classes=np.arange(10))
def elm_bip(directory): self_name = 'elm_bip' logger = new_logger(self_name, directory=directory) X, y = get_mnist(directory) logger.info('Loaded MNIST successfully with {0} records'.format( X.shape[0])) label_encoder = LabelEncoder().fit(y) y_encoded = label_encoder.transform(y) # preprocessing X /= 255. pca = PCA(n_components=50).fit(X) X_preprocessed = pca.transform(X) logger.info('{0} features remaining after preprocessing.'.format( X_preprocessed.shape[1])) # prepare parameter grid param_grid = [{ 'hidden_layer_size': [500, 1000, 2000, 4000], 'activation': ['tanh'], 'alpha': [1e-5], 'random_state': [42] }] # setup estimator estimator = ELMClassifier(input_to_node=BatchIntrinsicPlasticity(), regressor=Ridge()) # setup grid search cv = GridSearchCV(estimator=estimator, param_grid=param_grid, scoring='accuracy', n_jobs=1, verbose=2, refit=False, cv=[(np.arange(0, train_size), np.arange(train_size, 70000))]) # run! cv.fit(X, y_encoded) logger.info('best parameters: {0} (score: {1})'.format( cv.best_params_, cv.best_score_)) # refine results cv_results = cv.cv_results_ del cv_results['params'] # save results try: with open(os.path.join(directory, '{0}.csv'.format(self_name)), 'w') as f: f.write(','.join(cv_results.keys()) + '\n') for row in list(map(list, zip(*cv_results.values()))): f.write(','.join(map(str, row)) + '\n') except PermissionError as e: print('Missing privileges: {0}'.format(e))
def elm_coates_stacked(directory): self_name = 'elm_coates_stacked' logger = new_logger(self_name, directory=directory) X, y = get_mnist(directory) logger.info('Loaded MNIST successfully with {0} records'.format( X.shape[0])) label_encoder = LabelEncoder().fit(y) y_encoded = label_encoder.transform(y) # scale X so X in [0, 1] X /= 255. # setup parameter grid param_grid = { 'chunk_size': [10000], 'input_scaling': np.logspace(start=-3, stop=1, base=10, num=3), 'bias_scaling': [0.], # np.logspace(start=-3, stop=1, base=10, num=6), 'input_activation': ['relu'], 'alpha': [1e-5], 'random_state': [42] } # read input matrices from files list_filepaths = [] predefined_input_weights = np.empty((784, 0)) for filepath in glob.glob(os.path.join(directory, '*kmeans1*matrix.npy')): logger.info('matrix file found: {0}'.format(filepath)) list_filepaths.append(filepath) predefined_input_weights = np.append(predefined_input_weights, np.load(filepath), axis=1) # setup estimator estimator = ELMClassifier( PredefinedWeightsInputToNode( predefined_input_weights=predefined_input_weights), IncrementalRegression()) logger.info('Estimator params: {0}'.format(estimator.get_params().keys())) # return # setup grid search cv = GridSearchCV(estimator=estimator, param_grid=param_grid, scoring='accuracy', n_jobs=1, verbose=1, cv=[(np.arange(0, train_size), np.arange(train_size, 70000))]) # run! cv.fit(X, y_encoded) cv_best_params = cv.best_params_ del cv_best_params['input_to_nodes__predefined_input_weights'] # refine best params logger.info('best parameters: {0} (score: {1})'.format( cv_best_params, cv.best_score_)) # refine results cv_results = cv.cv_results_ del cv_results['params'] del cv_results['param_input_to_nodes__predefined_input_weights'] # save results try: with open(os.path.join(directory, '{0}.csv'.format(self_name)), 'w') as f: f.write(','.join(cv_results.keys()) + '\n') for row in list(map(list, zip(*cv_results.values()))): f.write(','.join(map(str, row)) + '\n') except PermissionError as e: print('Missing privileges: {0}'.format(e)) if not list_filepaths: logger.warning('no input weights matrices found') return
def elm_coates(directory): self_name = 'elm_coates' logger = new_logger(self_name, directory=directory) X, y = get_mnist(directory) logger.info('Loaded MNIST successfully with {0} records'.format( X.shape[0])) label_encoder = LabelEncoder().fit(y) y_encoded = label_encoder.transform(y) filepath_label_encoder = os.path.join( directory, 'label_encoder_{0}.pickle'.format(self_name)) # save label_encoder try: with open(filepath_label_encoder, 'wb') as f: pickle.dump(label_encoder, f) except Exception as e: logger.error('Unexpected error: {0}'.format(e)) exit(1) # scale X so X in [0, 1] X /= 255. X_train, X_test, y_train, y_test = (X[:train_size, ...], X[train_size:], y_encoded[:train_size], y_encoded[train_size:]) csv_filepath = os.path.join(directory, '{0}.csv'.format(self_name)) # read input matrices from files list_filepaths = [] for filepath in glob.glob( os.path.join(directory, '*pca*+kmeans*_matrix.npy')): logger.info('matrix file found: {0}'.format(filepath)) list_filepaths.append(filepath) filename = os.path.splitext(os.path.basename(filepath))[0] est_filepath = os.path.join(directory, 'est_coates-{0}.pickle'.format(filename)) pred_filpath = os.path.join( directory, 'est_coates-{0}-predicted.npz'.format(filename)) # only if files do not exist yet if (not os.path.isfile(csv_filepath) or not os.path.isfile(est_filepath) or not os.path.isfile(pred_filpath)): # setup estimator estimator = ELMClassifier( input_to_node=PredefinedWeightsInputToNode( predefined_input_weights=np.load(filepath), input_scaling=1.0, bias_scaling=0.0, input_activation='relu', random_state=42), chunk_size=1000) logger.info('Estimator params: {0}'.format( estimator.get_params().keys())) # !run time_start = time.time() estimator.fit(X_train, y_train) time_fitted = time.time() y_pred = estimator.predict(X_test) time_predicted = time.time() # !run # results dict_results = estimator.get_params() dict_results.update({ 'filename': filename, 'fit_time': time_fitted - time_start, 'score_time': time_predicted - time_fitted, 'score': accuracy_score(y_test, y_pred) }) # drop data dict_results.pop('input_to_nodes__predefined_input_weights') dict_results.pop('input_to_nodes') dict_results.pop('regressor') logger.info('fitted time {1}, score on test set: {0}'.format( dict_results['score'], dict_results['fit_time'])) # save estimator try: with open(est_filepath, 'wb') as f: pickle.dump(estimator, f) except Exception as e: logger.error('Unexpected error: {0}'.format(e)) exit(1) # save results try: if not os.path.isfile(csv_filepath): with open(csv_filepath, 'a') as f: f.write(','.join(dict_results.keys())) f.write('\n') f.write(','.join( [str(item) for item in dict_results.values()])) f.write('\n') else: with open(csv_filepath, 'a') as f: f.write(','.join( [str(item) for item in dict_results.values()])) f.write('\n') except PermissionError as e: print('Missing privileges: {0}'.format(e)) # save prediction np.savez_compressed(pred_filpath, X_test=X_test, y_test=label_encoder.inverse_transform(y_test), y_pred=label_encoder.inverse_transform(y_pred)) if not list_filepaths: logger.warning('no input weights matrices found') return
def elm_hidden_layer_size(directory): self_name = 'elm_hidden_layer_size' logger = new_logger(self_name, directory=directory) X, y = get_mnist(directory) logger.info('Loaded MNIST successfully with {0} records'.format( X.shape[0])) # encode y label_encoder = LabelEncoder().fit(y) y_encoded = label_encoder.transform(y) # scale X X /= 255. # split train test X_train, X_test, y_train, y_test = (X[:train_size, :], X[train_size:, :], y_encoded[:train_size], y_encoded[train_size:]) # fan-out from paper fan_out = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 20] # prepare parameter grids param_grid_basic = { 'hidden_layer_size': 0, 'input_scaling': 1., 'bias_scaling': 0., 'activation': 'relu', 'chunk_size': 1000, 'alpha': 1e-5, 'random_state': 42 } param_grid_pca = { 'hidden_layer_size': 0, 'input_scaling': 1., 'bias_scaling': 0., 'activation': 'relu', 'chunk_size': 1000, 'alpha': 1e-5, 'random_state': 42 } # setup estimator estimator = ELMClassifier() # basic try: # initialize filepath csv_filepath = os.path.join(directory, '{0}_basic.csv'.format(self_name)) # initialize param dict param_dict_job = estimator.get_params().copy() param_dict_job.update(param_grid_basic) # initialize results dict results_dict_job = param_dict_job.copy() # add dummy results results_dict_job.update({'time_fit': 0, 'time_pred': 0, 'score': 0}) # write header with open(csv_filepath, 'w') as f: writer = csv.DictWriter(f, fieldnames=results_dict_job.keys()) writer.writeheader() for hls in 784 * np.array(fan_out): param_dict_job.update({'hidden_layer_size': hls}) estimator.set_params(**param_dict_job) # run! time_start = time.time() estimator.fit(X_train, y_train) time_fit = time.time() y_pred = estimator.predict(X_test) time_pred = time.time() # run end! results_dict_job.update(estimator.get_params()) results_dict_job.update({ 'time_fit': time_fit - time_start, 'time_pred': time_pred - time_fit, 'score': accuracy_score(y_test, y_pred) }) logger.info('hidden_layer_size: {0}, score: {1}'.format( hls, results_dict_job['score'])) with open(csv_filepath, 'a') as f: writer = csv.DictWriter(f, fieldnames=results_dict_job.keys()) writer.writerow(results_dict_job) del estimator.input_to_node._hidden_layer_state with open( os.path.join(directory, 'elmc_hls{0}_basic.pickle'.format(hls)), 'wb') as f: pickle.dump(estimator, f) except MemoryError as e: logger.error('Memory error: {0}'.format(e)) pass except PermissionError as e: logger.error('Missing privileges: {0}'.format(e)) pass # preprocessing pca try: # initialize filepath csv_filepath = os.path.join(directory, '{0}_pca.csv'.format(self_name)) # preprocessing pca50 = PCA(n_components=50).fit(X_train) X_train_pca50, X_test_pca50 = (pca50.transform(X_train), pca50.transform(X_test)) pca100 = PCA(n_components=100).fit(X_train) X_train_pca100, X_test_pca100 = (pca100.transform(X_train), pca100.transform(X_test)) list_dict_pca = [{ 'n_components': 50, 'X_train': X_train_pca50, 'X_test': X_test_pca50 }, { 'n_components': 100, 'X_train': X_train_pca100, 'X_test': X_test_pca100 }] logger.info('Preprocessing successful!') # initialize param dict param_dict_job = estimator.get_params().copy() param_dict_job.update(param_grid_pca) # initialize results dict results_dict_job = param_dict_job.copy() # add dummy results results_dict_job.update({ 'time_fit': 0, 'time_pred': 0, 'score': 0, 'pca_n_components': 0 }) # write header with open(csv_filepath, 'w') as f: writer = csv.DictWriter(f, fieldnames=results_dict_job.keys()) writer.writeheader() for dict_pca in list_dict_pca: results_dict_job.update( {'pca_n_components': dict_pca['n_components']}) for hls in np.concatenate( (100 * np.array(fan_out), 784 * np.array(fan_out)), axis=0): param_dict_job.update({'hidden_layer_size': hls}) estimator.set_params(**param_dict_job) # run! time_start = time.time() estimator.fit(dict_pca['X_train'], y_train) time_fit = time.time() y_pred = estimator.predict(dict_pca['X_test']) time_pred = time.time() # run end! results_dict_job.update(estimator.get_params()) results_dict_job.update({ 'time_fit': time_fit - time_start, 'time_pred': time_pred - time_fit, 'score': accuracy_score(y_test, y_pred) }) logger.info( 'n_components: {2}, hidden_layer_size: {0}, score:' ' {1}'.format(hls, results_dict_job['score'], results_dict_job['pca_n_components'])) with open(csv_filepath, 'a') as f: writer = csv.DictWriter(f, fieldnames=results_dict_job.keys()) writer.writerow(results_dict_job) with open( os.path.join( directory, 'elmc_hls{0}_pca{1}.pickle'.format( hls, results_dict_job['pca_n_components'])), 'wb') as f: pickle.dump(estimator, f) except MemoryError as e: logger.error('Memory error: {0}'.format(e)) pass except PermissionError as e: logger.error('Missing privileges: {0}'.format(e)) pass
def elm_preprocessed(directory): self_name = 'elm_preprocessed' logger = new_logger(self_name, directory=directory) X, y = get_mnist(directory) logger.info('Loaded MNIST successfully with {0} records'.format( X.shape[0])) label_encoder = LabelEncoder().fit(y) y_encoded = label_encoder.transform(y) # preprocessing X /= 255. pca = PCA(n_components=50).fit(X) X_preprocessed = pca.transform(X) logger.info('{0} features remaining after preprocessing.'.format( X_preprocessed.shape[1])) # train test split X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y_encoded, train_size=train_size, random_state=42) # prepare parameter grid param_grid = [{ 'hidden_layer_size': [500, 2000], 'input_scaling': np.logspace(start=-3, stop=1, base=10, num=6), 'bias_scaling': np.logspace(start=-3, stop=1, base=10, num=6), 'input_activation': ['relu'], 'alpha': [1e-5], 'random_state': [42] }, { 'hidden_layer_size': [2000], 'input_scaling': np.logspace(start=-3, stop=1, base=10, num=6), 'bias_scaling': np.logspace(start=-3, stop=1, base=10, num=6), 'input_activation': ['tanh'], 'alpha': [1e-5], 'random_state': [42] }] # setup estimator estimator = ELMClassifier(regressor=Ridge()) # setup grid search cv = GridSearchCV(estimator=estimator, param_grid=param_grid, scoring='accuracy', n_jobs=1, verbose=2, refit=False, cv=StratifiedShuffleSplit(n_splits=1, test_size=1 / 7, random_state=42)) # run! cv.fit(X_train, y_train) logger.info('best parameters: {0} (score: {1})'.format( cv.best_params_, cv.best_score_)) # refine results cv_results = cv.cv_results_ del cv_results['params'] # save results try: with open(os.path.join(directory, 'elm_preprocessed.csv'), 'w') as f: f.write(','.join(cv_results.keys()) + '\n') for row in list(map(list, zip(*cv_results.values()))): f.write(','.join(map(str, row)) + '\n') except PermissionError as e: print('Missing privileges: {0}'.format(e))
def elm_pca(directory): self_name = 'elm_pca' logger = new_logger(self_name, directory=directory) X, y = get_mnist(directory) logger.info('Loaded MNIST successfully with {0} records'.format( X.shape[0])) # scale X X /= 255. # split train test X_train, X_test, y_train, y_test = train_test_split(X[:train_size], y[:train_size], train_size=50000, random_state=42) # prepare parameter grids param_grid_basic = { 'hidden_layer_size': 2000, 'input_scaling': 1., 'bias_scaling': 0., 'input_activation': 'relu', 'alpha': 1e-5, 'random_state': 42 } # setup estimator estimator = ELMClassifier(regressor=Ridge()) # initialize filepath filepath = os.path.join(directory, '{0}_basic.csv'.format(self_name)) # initialize param dict param_dict_job = estimator.get_params().copy() param_dict_job.update(param_grid_basic) # initialize results dict results_dict_job = param_dict_job.copy() # add dummy results results_dict_job.update({ 'time_fit': 0, 'time_pred': 0, 'score': 0, 'pca_n_components': 0 }) # preprocessing pca try: # write header with open(filepath, 'w') as f: writer = csv.DictWriter(f, fieldnames=results_dict_job.keys()) writer.writeheader() for pca_n_components in [10, 20, 50, 100, 200, 500, 784]: results_dict_job.update({'pca_n_components': pca_n_components}) estimator.set_params(**param_dict_job) # preprocessing pca = PCA(n_components=pca_n_components).fit(X_train) X_train_pca, X_test_pca = \ pca.transform(X_train), pca.transform(X_test) # run! time_start = time.time() estimator.fit(X_train_pca, y_train) time_fit = time.time() y_pred = estimator.predict(X_test_pca) time_pred = time.time() # run end! results_dict_job.update({ 'time_fit': time_fit - time_start, 'time_pred': time_pred - time_fit, 'score': accuracy_score(y_test, y_pred) }) logger.info('pca.n_components_: {0}, score: {1}'.format( pca_n_components, results_dict_job['score'])) with open(filepath, 'a') as f: writer = csv.DictWriter(f, fieldnames=results_dict_job.keys()) writer.writerow(results_dict_job) except MemoryError as e: logger.error('Memory error: {0}'.format(e)) except PermissionError as e: logger.error('Missing privileges: {0}'.format(e)) except Exception as e: logger.error('Unexpected exception: {0}'.format(e))
def elm_hyperparameters(directory): self_name = 'elm_hyperparameters' logger = new_logger(self_name, directory=directory) X, y = get_mnist(directory) logger.info('Loaded MNIST successfully with {0} records'.format( X.shape[0])) X = X / 255. label_encoder = LabelEncoder().fit(y) y_encoded = label_encoder.transform(y) # X_train, X_test, y_train, y_test = train_test_split( # X, y_encoded, train_size=train_size, random_state=42, shuffle=True) X_train, _, y_train, _ = (X[:train_size, :], X[train_size:, :], y_encoded[:train_size], y_encoded[train_size:]) param_grid = { 'hidden_layer_size': [2000], 'input_scaling': np.logspace(start=-2, stop=2, base=10, num=7), 'bias_scaling': np.logspace(start=-2, stop=2, base=10, num=7), 'input_activation': ['tanh'], 'alpha': [1e-5], 'random_state': [42] } estimator = ELMClassifier(regressor=Ridge()) cv = GridSearchCV(estimator, param_grid, cv=5, n_jobs=-1, scoring='accuracy') cv.fit(X_train, y_train) logger.info('best parameters: {0} (score: {1})'.format( cv.best_params_, cv.best_score_)) cv_results = cv.cv_results_ del cv_results['params'] with open(os.path.join(directory, '{0}_scaling.csv'.format(self_name)), 'w') as f: f.write(','.join(cv_results.keys()) + '\n') for row in list(map(list, zip(*cv_results.values()))): f.write(','.join(map(str, row)) + '\n') param_grid = { 'hidden_layer_size': [500, 1000, 2000, 4000], 'input_scaling': [cv.best_params_['input_scaling']], 'bias_scaling': [cv.best_params_['bias_scaling']], 'input_activation': ['tanh', 'relu', 'bounded_relu', 'logistic', 'identity'], 'alpha': [1e-5], 'random_state': [42] } cv = GridSearchCV(estimator, param_grid, cv=5, n_jobs=-1, scoring='accuracy') cv.fit(X_train, y_train) logger.info('best parameters: {0} (score: {1})'.format( cv.best_params_, cv.best_score_)) cv_results = cv.cv_results_ del cv_results['params'] with open(os.path.join(directory, '{0}_size.csv'.format(self_name)), 'w') as f: f.write(','.join(cv_results.keys()) + '\n') for row in list(map(list, zip(*cv_results.values()))): f.write(','.join(map(str, row)) + '\n') param_grid = { 'hidden_layer_size': [cv.best_params_['hidden_layer_size']], 'input_scaling': [cv.best_params_['input_scaling']], 'bias_scaling': [cv.best_params_['bias_scaling']], 'input_activation': [cv.best_params_['input_activation']], 'alpha': [.00001, .001, .1], 'random_state': [42] } cv = GridSearchCV(estimator, param_grid, cv=5, n_jobs=1, scoring='accuracy') cv.fit(X_train, y_train) logger.info('best parameters: {0} (score: {1})'.format( cv.best_params_, cv.best_score_)) cv_results = cv.cv_results_ del cv_results['params'] with open(os.path.join(directory, '{0}_alpha.csv'.format(self_name)), 'w') as f: f.write(','.join(cv_results.keys()) + '\n') for row in list(map(list, zip(*cv_results.values()))): f.write(','.join(map(str, row)) + '\n')
def test_elm_classifier_fit() -> None: print('\ntest_elm_classifier_fit():') X, y = load_digits(return_X_y=True) elm = ELMClassifier(hidden_layer_size=50) elm.fit(X, y)
def test_elm_get_params() -> None: print('\ntest_elm_get_params():') elm = ELMClassifier() elm_params = elm.get_params() print(elm_params)
def test_elm_classifier_no_valid_params() -> None: X, y = load_digits(return_X_y=True) with pytest.raises(TypeError): ELMClassifier(input_to_node=ELMRegressor()).fit(X, y) with pytest.raises(TypeError): ELMClassifier(regressor=InputToNode()).fit(X, y)
def test_elm_classifier_not_fitted() -> None: X, y = load_digits(return_X_y=True) with pytest.raises(NotFittedError): ELMClassifier(hidden_layer_size=50, verbose=True).predict(X)
'bias_scaling': 0.0, 'alpha': 1e-5, 'random_state': 42} step1_params = {'input_scaling': loguniform(1e-5, 1e1)} kwargs1 = {'random_state': 42, 'verbose': 1, 'n_jobs': -1, 'n_iter': 50, 'scoring': 'accuracy'} step2_params = {'bias_scaling': np.linspace(0.0, 1.6, 16)} kwargs2 = {'verbose': 1, 'n_jobs': -1, 'scoring': 'accuracy'} elm = ELMClassifier(regressor=Ridge(), **initially_fixed_params) # The searches are defined similarly to the steps of a sklearn.pipeline.Pipeline: searches = [('step1', RandomizedSearchCV, step1_params, kwargs1), ('step2', GridSearchCV, step2_params, kwargs2)] # # Perform the sequential search # In[ ]: sequential_search = SequentialSearchCV(elm, searches=searches).fit(X_train, y_train) # # Extract the final results