def predict_from_csv(path_to_csv): df = pd.read_csv(path_to_csv) X, y = prep_data(df) reg = load("reg.joblib") predictions = reg.predict(X) return predictions
def predict_from_csv(path_to_csv): df = pd.read_csv(path_to_csv) X, y = prep_data(df) model = load("gbr.joblib") predictions = model.predict(X) return mean_squared_error(y, predictions)
def predict_from_csv(path_to_csv): df = pd.read_csv(path_to_csv) X, y = prep_data(df) wrf = load("wrf.joblib") predictions = wrf.predict(X) return predictions
def predict_from_csv(path_to_csv): df = pd.read_csv(path_to_csv) X, y = prep_data(df) est = load("est.joblib") predictions = est.predict(X) return predictions
def predict(s): X = prep_data(s) # loading the 4 models EorI_model = load(os.path.join("models", "clf_is_Extrovert.joblib")) SorN_model = load(os.path.join("models", "clf_is_Sensing.joblib")) TorF_model = load(os.path.join("models", "clf_is_Thinking.joblib")) JorP_model = load(os.path.join("models", "clf_is_Judging.joblib")) # predicting EorI_pred = EorI_model.predict(X) SorN_pred = SorN_model.predict(X) TorF_pred = TorF_model.predict(X) JorP_pred = JorP_model.predict(X) # combining the predictions from the 4 models result = combine_classes(EorI_pred, SorN_pred, TorF_pred, JorP_pred) return result
def predict_from_csv(path_to_csv): df = pd.read_csv(path_to_csv) X, y = prep_data(df) # reg = load("reg.joblib") model = load("dtmodel.joblib") # ###### "Decision Tree" seems TOO perfect, like the data was generated by this model! # ###### "Random Forest" acts great # model = load("rfmodel.joblib") # ###### The below models weren't considered good, e.g. predicting negative/extremly light/heavy weight # model = load("Scaledlassomodel.joblib") # model = load("scaledlrmodel.joblib") # model = load("scaledmlprmodel.joblib") # model = load("pcamodel.joblib") # model = load("mlprmodel.joblib") # model = load("lrmodel.joblib") # model = load("lassomodel.joblib") predictions = model.predict(X) return predictions
import pandas as pd from sklearn.ensemble import ExtraTreesRegressor from joblib import dump from preprocess import prep_data df = pd.read_csv("fish_participant.csv") X, y = prep_data(df) ttr = ExtraTreesRegressor() ttr.fit(X, y) dump(ttr, "reg.joblib")
def compute_kernel(name, hyper_param): """This function computes the test and the training kernels. Inputs: name: Kernel name. hyper_param: Kernel hyper-parameters. Outputs: Training Kernel: n times n np.float32 matrix. Test Kernel: nt times n np.float32 matrix. ytrain: vector of training labels. n times 1 np.float32. ytest: vector of test labels. nt times 1 np.float32. """ X, ytrain, Xtest, ytest = prep_data(dataset, False, noise_index) nt = Xtest.shape[0] n = X.shape[0] d = X.shape[1] + 0.0 # Read precomputed CNTK kernel and form the kernel matrix if dataset == 'CIFAR10' and name == 'ntk': K = np.zeros((n, n), dtype=np.float32) KT = np.zeros((n, nt), dtype=np.float32) main_dir = user_dirs['cntk_dir'] + 'LFGaussian_CIFAR10_Myrtle_%d/' % ( noise_index) m = 200 count = 250 for i in range(count): K[(m * i):(m * (i + 1)), :] = np.load(main_dir + 'train_ntk_%d.npy' % (i)) KT[(m * i):(m * (i + 1)), :] = np.load(main_dir + 'test_ntk_%d.npy' % (i)) KT = KT.T for i in range(n): K[:, i] = K[i, :] elif dataset == 'SYNTH' and name == 'ntk': n = hyper_param[0] K = np.load(user_dirs['synth_dir'] + 'NTK_TRAIN_%d.npy' % (noise_index)) K = K[:n, :n] KT = np.load(user_dirs['synth_dir'] + 'NTK_TEST_%d.npy' % (noise_index)) KT = KT[:, :n] ytrain = ytrain[:n, :] elif name == 'polynomial': print('Request to use degree %d polynomial kernel with intercept %f' % (hyper_param[0], hyper_param[1]), file=_file) p = hyper_param[0] intercept = hyper_param[1] intercept = intercept.astype(np.float32) K = (np.power(intercept + np.dot(X, X.T) / np.sqrt(d), p)) KT = (np.power(intercept + np.dot(Xtest, X.T) / np.sqrt(d), p)) elif name == 'rf': directory = user_dirs['rf_dir'] + 'RF_Kernel_noise_%d' % (noise_index) name = directory + '/RF_Kernel_Train_N_4200000.npy' K = np.load(name) name = directory + '/RF_Kernel_Test_N_4200000.npy' KT = np.load(name) K = K.astype(np.float32) KT = KT.astype(np.float32) elif name == 'ntk': # ntk KRR layers = hyper_param[0] if layers < 3: # For two-layers networks, compute the kernel directly K = NTK2(X.T, X.T) KT = NTK2(Xtest.T, X.T) else: # For multilayer networks, read it from the disk K = np.load(user_dirs['ntk_dir'] + 'Train_NTK_%d_layers_%d_NFMNIST.npy' % (noise_index, hyper_param[0])) KT = np.load(user_dirs['ntk_dir'] + 'Test_NTK_%d_layers_%d_NFMNIST.npy' % (noise_index, hyper_param[0])) elif name == 'gp': # ReLU RF KRR K = RFK2(X.T, X.T) KT = RFK2(Xtest.T, X.T) else: raise Exception('Non-valid Kernel') assert K.shape[0] == n and K.shape[1] == n assert K.dtype == np.float32 assert KT.shape[0] == nt and KT.shape[1] == n assert KT.dtype == np.float32 return (K, KT, ytrain, ytest)
from __future__ import print_function import math import os import sys import time from preprocess import prep_data import numpy as np from jax import random from neural_tangents import stax noise_id = np.int(sys.argv[1]) num_layers = np.int(sys.argv[2]) dataset = 'NFMNIST' X, Y, Xtest, Ytest = prep_data(dataset, False, noise_id) if num_layers == 2: init_fn, apply_fn, kernel_fn = stax.serial(stax.Dense(512), stax.Relu(), stax.Dense(1)) elif num_layers == 3: init_fn, apply_fn, kernel_fn = stax.serial(stax.Dense(512), stax.Relu(), stax.Dense(512), stax.Relu(), stax.Dense(1)) elif num_layers == 4: init_fn, apply_fn, kernel_fn = stax.serial(stax.Dense(512), stax.Relu(), stax.Dense(512), stax.Relu(), stax.Dense(512), stax.Relu(), stax.Dense(1)) else: raise Exception('Non-valid Kernel')
def _import_data(self, dataset, model, dim): self._X, self._Y, self._Xtest, self._Ytest = prep_data( dataset, model == 'CNN', dim)
from preprocess import prep_data from sklearn.model_selection import cross_validate from sklearn.model_selection import KFold from sklearn.metrics import mean_squared_error fish_data = pd.read_csv("fish_participant.csv") print(fish_data.head) print(fish_data.dtypes) X, y = prep_data(fish_data) decisiontree = DecisionTreeRegressor() cross_validate( decisiontree, X, y, scoring="neg_mean_squared_error", cv=KFold(random_state=123, shuffle=True), )["test_score"].mean() decisiontree.fit(X, y) fish_data_holdout = pd.read_csv("fish_holdout_demo.csv")
import pandas as pd from sklearn.linear_model import LinearRegression from joblib import dump from preprocess import prep_data dataset = pd.read_csv("fish_participant.csv") X, y = prep_data(dataset) regressor = LinearRegression() regressor.fit(X, y) dump(regressor, "reg.joblib") #print(X, y)
import pandas as pd from sklearn.metrics import mean_squared_error, r2_score from sklearn.preprocessing import PolynomialFeatures def predict_from_csv(path_to_csv): df = pd.read_csv(path_to_csv) X, y = prep_data(df) reg = load("reg.joblib") predictions = reg.predict(X) return predictions if __name__ == "__main__": df = pd.read_csv("fish_holdout_demo.csv") X, ho_truth = prep_data(df) pl = PolynomialFeatures(degree=2) X = pl.fit_transform(X) reg = load("reg_plr2.joblib") ho_predictions = reg.predict(X) print(ho_predictions) print(ho_truth) ho_mse = mean_squared_error(ho_truth, ho_predictions) print(ho_mse)
row_id = onp.int(sys.argv[2]) model_name = sys.argv[3] exp_name = sys.argv[4] job_id = onp.int(sys.argv[5]) # The directory used to save the results directory = './CNN_Kernels/%s_CIFAR10_%s_%d' % (exp_name, model_name, noise_index) if not os.path.exists(directory): os.makedirs(directory) files = os.listdir(directory) fileName = directory + "/" + 'log_file_%d_%d.txt' % (row_id, job_id) _file = open(fileName, 'w', buffering=1) X, _, Xtest, _ = prep_data('CIFAR10', False, noise_index) n = X.shape[0] ntest = Xtest.shape[0] W_std = 1.0 b_std = 0.0 # Number of rows generated at each job m = onp.int(200) if model_name == 'Myrtle': init_fn, apply_fn, kernel_fn = stax.serial(stax.Conv(512, (3, 3), strides=(1, 1), W_std=W_std, b_std=b_std, padding='SAME'),\ stax.Relu(),\ stax.Conv(512, (3, 3), strides=(1, 1), W_std=W_std, b_std=b_std, padding='SAME'),\ stax.Relu(),\ stax.AvgPool((2, 2), strides=(2, 2), padding='VALID'),\ stax.Conv(512, (3, 3), strides=(1, 1), W_std=W_std, b_std=b_std, padding='SAME'),\