def single_hidden_layer_train(X, Y, epoch=150, lr=0.1, hiddenlayer_neurons=3, activation='sigmoid'): X, Y = np.reshape(X, (len(X), -1)), np.reshape(Y, (len(Y), -1)) inputlayer_neurons = X.shape[1] # number of features in data set output_neurons = Y.shape[1] if X.shape[0] != Y.shape[0]: raise IOError( 'The number of input samples ({}) is not equal to output samples ({})' .format(X.shape[0], Y.shape[0])) wh = np.random.uniform(size=(inputlayer_neurons, hiddenlayer_neurons)) bh = np.random.uniform(size=(1, hiddenlayer_neurons)) wout = np.random.uniform(size=(hiddenlayer_neurons, output_neurons)) bout = np.random.uniform(size=(1, output_neurons)) ipscaler = mms(feature_range=(-1, 1)) ipscaler.fit(X) opscaler = mms(feature_range=(-1, 1)) opscaler.fit(Y) net = dict(wh=wh, bh=bh, wout=wout, bout=bout, activation=activation, scaler=(ipscaler, opscaler)) NNtask = NN_1hid(net) for i in range(epoch): NNtask.forward(X) NNtask.backward(Y, lr) NNtask.forward(X) return NN_1hid
def validate_neural_network(self): """ Was used to test the model """ self.train = self.data[self.data['year'] == 2016] self.test = self.data[self.data['year'] == 2017] from sklearn.preprocessing import MinMaxScaler as mms del (self.data) self.X_transformer = mms().fit(self.train[self.features]) Y = self.train['traveltime'].values Y = Y.reshape(-1, 1) self.Y_transformer = mms().fit(Y) X = self.X_transformer.transform(self.train[self.features]) Y = self.Y_transformer.transform(Y) del (self.train) self.model = self.rgr.fit(X, Y) del (X) del (Y) distances = sorted(self.test['distance'].unique())[1:] number_samples = [] r2 = [] mae = [] mape = [] from sklearn import metrics for i in range(0, len(distances) - 1): test = self.test[(self.test['distance'] >= distances[i]) & (self.test['distance'] < distances[i + 1])] Y = test['traveltime'] number_samples.append(len(test)) X = self.X_transformer.transform(test[self.features]) preds = self.model.predict(X) real_preds = self.Y_transformer.inverse_transform( preds.reshape(-1, 1)) real_preds = np.array([i[0] for i in real_preds]) print(real_preds.mean()) input() r2_score = metrics.r2_score(Y, real_preds) MAE = metrics.mean_absolute_error(Y, real_preds) MAPE = ((abs(Y - real_preds) / Y) * 100).mean() r2.append(r2_score) mae.append(MAE) mape.append(MAPE) print(r2_score, MAE, MAPE) self.distances = distances[:-1] del (self.test) del (test) del (preds)
def normalizeColumnsUsingMinMax(self, df, columnNames): """ Method to normalize the data in specific columns using minmax :param df: Dataframe to process :param columnNames: Names of columns to normalize :return: Processed dataframe """ df[columnNames] = mms(df[columnNames]) return df
def build_neural_network(self): import numpy as np msk = np.random.rand(len(self.data)) < 0.5 self.train = self.data[msk] del (msk) from sklearn.preprocessing import MinMaxScaler as mms del (self.data) self.X_transformer = mms().fit(self.train[self.features]) Y = self.train['traveltime'].values Y = Y.reshape(-1, 1) self.Y_transformer = mms().fit(Y) X = self.X_transformer.transform(self.train[self.features]) Y = self.Y_transformer.transform(Y) self.model = self.rgr.fit(X, Y) print('Built') del (X) del (Y) del (self.train)
def build_neural_network(self): """ On the last iteration of this (in the notebooks), minmax scaler was replaced With a standard scaler for both X and Y """ import numpy as np msk = np.random.rand(len(self.data)) < 0.5 self.train = self.data[msk] del (msk) from sklearn.preprocessing import MinMaxScaler as mms del (self.data) self.X_transformer = mms().fit(self.train[self.features]) Y = self.train['traveltime'].values Y = Y.reshape(-1, 1) self.Y_transformer = mms().fit(Y) X = self.X_transformer.transform(self.train[self.features]) Y = self.Y_transformer.transform(Y) self.model = self.rgr.fit(X, Y) print('Built') del (X) del (Y) del (self.train)
def timeseries_scaling(X, is_training_data=True, list_of_transformers=None): #X += epsilon #X.shape = (nsamples, timesteps, features) #is_training_data and list_of_transformers can take values 'True' and 'None, and 'False' and 'not None' only. X_new = np.zeros_like(X) for i in range(X.shape[0]): X_new[i] = mms().fit_transform(X[i]) # #""" X_new2 = X_new.copy() if is_training_data: list_of_transformers = list() # for i in range(X.shape[1]): if is_training_data: tr = ss() tr.fit(X_new[:, i]) list_of_transformers.append(tr) else: tr = list_of_transformers[i] X_new2[:, i] = tr.transform(X_new[:, i]) return X_new2, list_of_transformers
def perform_iteration(current_gen_spectra, current_gen_conc, desired_spectra, n_parents, n_offspring, mutation_rate, mutation_rate_2): """ Perform one iteration of the GA algorithm. Inputs: - current_gen_spectra: The spectra of the current generation (batch). It is a 2D array with the number of rows equal to the number of samples in the generation and number of colums equal to the number of spectra datapoints. - current_gen_conc: The concentration of the current generation (batch). It is a 2D array with the number of rows equal to the number of samples in the generation and the number of columns equal to the number of dimensions, for exmaple, 3 columns if we are mixing red, blue, green dyes - desired_spectra: The desired spectra. It is a 1D array with one row and number of columns equal to the number of datapoints in the spectra. - n_parents: Integer which determines how many parents to create from the current generation. - n_offspring: Integer which determines how many offspring to create from the current generation. - mutation_rate: Float from range 0-1 which determines how often a mutation occurs. - mutation_rate_2: Float from range 0-1 which deterines how often a mutation occurs. Outputs: - next_gen_conc: The concentrations of the next generation to be tested. It is a 2D array with number of rows equal to n_offspring and number of columns equal to the number of dimensions. """ np.random.seed(seed) cgs = current_gen_spectra.T current_gen_spectra = mms().fit(cgs).transform(cgs).T desired_spectra = prepare_desired_spectra(desired_spectra) # Perfrom Genetic Algorithm to determine next Generation next_gen_conc, median_fitness, max_fitness = GA_algorithm( current_gen_spectra, current_gen_conc, desired_spectra, n_parents, n_offspring, mutation_rate, mutation_rate_2) return next_gen_conc, median_fitness, max_fitness
routes = json.loads( open('/home/student/dbanalysis/dbanalysis/resources/trimmed_routes.json', 'r').read()) route = routes['15'][1] models = [] features = ['day', 'month', 'hour', 'weekend', 'vappr'] for i in range(1, len(route) - 1): stopA = str(route[i]) stopB = str(route[i + 1]) print('Building for', stopA, 'to', stopB) df = stop_tools.stop_data(stopA, stopB) df['traveltime'] = df['actualtime_arr_to'] - df['actualtime_arr_from'] df['weekend'] = df['day'] > 4 print(df['traveltime'].mean()) Y = numpy.array([i for i in df['traveltime']]).reshape(-1, 1) transformer2 = mms().fit(Y) Y = transformer2.transform(Y) transformer1 = mms().fit(df[features]) X = transformer1.transform(df[features]) import numpy model = mlp(hidden_layer_sizes=(40, 40, 40)).fit(X, Y) models.append({ 'transformer': transformer1, 'transformer2': transformer2, 'model': model }) del (df) del (X) del (Y) with open('/data/chained_models_neural.bin', 'wb') as handle: import pickle
dh_data_x = pd.DataFrame(data=x_data_cols) dh_data_y = pd.DataFrame(data=y_data_cols) grand_set = pd.DataFrame(data=data_total) grand_set.to_csv("grand_set", sep=',') dh_data_x.to_csv("dh_data_x", sep='\t', index=False, index_label=False) dh_data_y.to_csv("dh_data_y", sep='\t', index_label=False, index=False) #Spliting & Preprocessing Data X_train, X_test, y_train, y_test = tts(dh_data_x, dh_data_y, test_size=0.33, random_state=101) scaler = mms() scaler.fit(X_train) X_train_scaled = pd.DataFrame(data=scaler.transform(X_train), columns=X_train.columns, index=X_train.index) X_test_scaled = pd.DataFrame(data=scaler.transform(X_test), columns=X_test.columns, index=X_test.index) #Creating feature columns feature_cols = [ tf.feature_column.numeric_column('Inverse_X'), tf.feature_column.numeric_column('Inverse_Y')
import torch from collections import OrderedDict import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from torch.utils.data import DataLoader, random_split import pytorch_lightning as pl import matplotlib.pyplot as plt data = pd.read_csv("./dataset/creditcard.csv") data.drop(["Time", "Class"], axis=1, inplace=True) cuda = True if torch.cuda.is_available() else False from sklearn.preprocessing import MinMaxScaler as mms num_scaler = mms(feature_range=(-1, 1)) columns = data.columns.tolist() data[columns] = num_scaler.fit_transform(data[columns]) data_np = data.values class TabularDataModule(pl.LightningDataModule): def __init__(self, data, batch_size: int = 32, num_workers: int = 3): super().__init__() self.data = data self.batch_size = batch_size self.num_workers = num_workers self.dims = self.data.shape[1] def prepare_data(self, ):
#Self Organizing Map import pandas as pd import numpy as np import matplotlib.pyplot as plt #Importing dataset dataset = pd.read_csv('Credit_Card_Applications.csv') X = dataset.iloc[:, :-1].values y = dataset.iloc[:, -1].values #Feature Scaling from sklearn.preprocessing import MinMaxScaler as mms sc = mms(feature_range=(0, 1)) X = sc.fit_transform(X) #Training an SOM from minisom import MiniSom as ms som = ms(x=10, y=10, input_len=15, sigma=1.0, learning_rate=0.5) som.random_weights_init(X) som.train_random(data=X, num_iteration=100) #Visualizing the SOM results from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som.distance_map().T) colorbar() markers = ['o', 's'] colors = ['r', 'g'] for i, x in enumerate(X): w = som.winner(x) plot(w[0] + 0.5, w[1] + 0.5, markers[y[i]], markeredgecolor=colors[y[i]], markerfacecolor='None',
def powertransform_func(x): # x is 2d array return pt().fit_transform(x) def timeseries_powertransformation(X): # X is np.array, 3D X_2D = [*zip(X[i] for i in range(X.shape[0]))] with mp.Pool() as pool: X_new = pool.starmap(powertransform_func, X_2D) return np.asarray(X_new) imputer_per_sample = make_pipeline(ft(timeseries_imputation, validate=False)) preprocessor_per_sample = make_pipeline(imputer_per_sample, ft(timeseries_powertransformation, validate=False), ft(timeseries_detrending, validate=False), ft(timeseries_normalization, validate=False)) preprocessor_per_timestep = make_pipeline(pt(), mms()) """ ## Run this commented part only once, so you are able to save the pickled files. Then comment it out. # Read in all data in a single file all_input, labels, ids = convert_json_data_to_nparray(path_to_data, file_name, selected_features) all_input_test, labels_test, ids_test = convert_json_data_to_nparray(path_to_data, file_name_test, selected_features) # Change X and y to numpy.array in the correct shape. X = np.array(all_input) y = np.array([labels]).T print("The shape of X is (sample_size x time_steps x feature_num) = {}.".format(X.shape)) print("the shape of y is (sample_size x 1) = {}, because it is a binary classification.".format(y.shape))
for i in range(len(offerid_te)): features_test.append([ day_of_week_te[i], hour_of_day_te[i], minute_of_hour_te[i], second_of_minute_te[i], abs_time_te[i], siteid_te[i], offerid_te[i], category_te[i], merchant_te[i], countrycode_te[i], browserid_te[i], devid_te[i] ]) features_test = np.asarray(features_test) features_train = features_train.astype(np.float) features_test = features_test.astype(np.float) from sklearn.preprocessing import MinMaxScaler as mms scalar = mms() features_train = scalar.fit_transform(features_train) features_test = scalar.fit_transform(features_test) print(features_train) print(features_test) def random_forest(f_train, l_train, f_test): from sklearn.ensemble import RandomForestClassifier #from sklearn.grid_search import GridSearchCV #param={'criterion' : ('gini','entropy'),'min_samples_split':[2,5,10,15,20,25,30],'n_estimators':[100]} #svr=RandomForestClassifier() #clf=GridSearchCV(svr,param) clf = RandomForestClassifier() import time start_time = time.time()
def scaler(a: pd.DataFrame): scaler = mms() scaler.fit(a) return scaler.transform(a)
temp[2+team_index] = team_info[team] #"cheating features" if team == 'towerKills': temp[4+team_index] = team_info[team] if team == 'inhibitorKills': temp[6+team_index] = team_info[team] if team == 'winner' and team_info[team]: winner_team = team_index teams['data'].append(temp) teams['label'].append(winner_team) kf = KFold(len(teams['data']), n_folds=10) X = np.array(teams['data']) Y = np.array(teams['label']) mimas = mms() i = 0 max_acc = 0 max_k = 0 k = 8 acc_total = 0 for train, test in kf: X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test] guesses = [] # scaler = mimas.fit(X_train) # scaler_train = scaler.transform(X_train) # scaler_test = scaler.transform(X_test) i+=1 for x in range(len(X_test)): neighbors = knn(k, X_train, X_test) votes = vote(neighbors)
def prepare_desired_spectra(x_test): """Preprocess spectra.""" x_test = mms().fit(x_test).transform(x_test).T x_test = x_test.reshape(1, -1)[0].reshape(-1, 1).T return x_test
def class_efficiency(t_act, t_pred): cols = ['t_act', 't_pred'] df = pd.DataFrame(np.concatenate( [training[1], pred_cat.reshape([n_obs, 1])], axis=1), columns=cols) ct = pd.crosstab(df.t_act, df.t_pred) return ct ohe1 = ohe(handle_unknown='ignore') ohe1 = ohe1.fit(training[1]) targ = ohe1.transform(training[1]).toarray() X = scores_trunc.copy() dim = len(X.T) mms1 = mms() X = mms1.fit_transform(X) train = np.concatenate([X, targ], axis=1) n_cats = len(targ.T) n_obs = len(train) #dim = n_obs-n_cats df_train = pd.DataFrame(train) old_col = np.arange(dim, n_obs).tolist() new_col = [] for i in range(0, n_cats): new_col.append("target" + str(i)) df_train.rename(columns={i: j for i, j in zip(old_col, new_col)}, inplace=True) #Compute mean vector per class
axl_corr[i], _ = pearsonr(malignant[:, i], mitf_cell_scores[tirosh_cell_type_labels == 0]) axl_corr[np.isnan(axl_corr)] = np.inf axl_program_gene_indices = np.argsort(axl_corr)[:100] axl_cell_scores = control(axl_program_gene_indices, tirosh_data_relative_expression) #mel = axl_cell_scores[np.logical_and(tirosh_labels == 81, tirosh_cell_type_labels == 0)] #plt.hist(mel) #plt.show() #mel = mitf_cell_scores[np.logical_and(tirosh_labels == 81, tirosh_cell_type_labels == 0)] #plt.hist(mel) #plt.show() mitf[:, 0] = np.clip( mms().fit_transform(mitf_cell_scores.reshape(-1, 1)).reshape(-1), 0, 1) axl[:, 0] = np.clip( mms().fit_transform(axl_cell_scores.reshape(-1, 1)).reshape(-1), 0, 1) #for tumor in [53, 81, 82, 79, 80, 59, 84, 78, 88, 71]: # m = np.mean(mitf_cell_scores[np.logical_and(tirosh_labels == tumor, tirosh_cell_type_labels == 0)]) # a = np.mean(axl_cell_scores[np.logical_and(tirosh_labels == tumor, tirosh_cell_type_labels == 0)]) # plt.scatter(m, a) # plt.annotate("Mel" + str(tumor), (m, a)) #plt.show() # #plt.subplot(211) #plt.hist(mitf_cell_scores) #plt.subplot(212) #plt.hist(axl_cell_scores) #plt.show()