예제 #1
0
def predict_from_csv(path_to_csv):

    df = pd.read_csv(path_to_csv)
    X, y = prep_data(df)

    reg = load("reg.joblib")
    predictions = reg.predict(X)

    return predictions
예제 #2
0
def predict_from_csv(path_to_csv):

    df = pd.read_csv(path_to_csv)
    X, y = prep_data(df)

    model = load("gbr.joblib")

    predictions = model.predict(X)

    return mean_squared_error(y, predictions)
예제 #3
0
def predict_from_csv(path_to_csv):

    df = pd.read_csv(path_to_csv)
    X, y = prep_data(df)

    wrf = load("wrf.joblib")

    predictions = wrf.predict(X)

    return predictions
예제 #4
0
def predict_from_csv(path_to_csv):

    df = pd.read_csv(path_to_csv)
    X, y = prep_data(df)

    est = load("est.joblib")

    predictions = est.predict(X)

    return predictions
예제 #5
0
def predict(s):

    X = prep_data(s)

    # loading the 4 models
    EorI_model = load(os.path.join("models", "clf_is_Extrovert.joblib"))
    SorN_model = load(os.path.join("models", "clf_is_Sensing.joblib"))
    TorF_model = load(os.path.join("models", "clf_is_Thinking.joblib"))
    JorP_model = load(os.path.join("models", "clf_is_Judging.joblib"))

    # predicting
    EorI_pred = EorI_model.predict(X)
    SorN_pred = SorN_model.predict(X)
    TorF_pred = TorF_model.predict(X)
    JorP_pred = JorP_model.predict(X)

    # combining the predictions from the 4 models
    result = combine_classes(EorI_pred, SorN_pred, TorF_pred, JorP_pred)

    return result
예제 #6
0
def predict_from_csv(path_to_csv):

    df = pd.read_csv(path_to_csv)
    X, y = prep_data(df)
    # reg = load("reg.joblib")

    model = load("dtmodel.joblib")
    # ###### "Decision Tree" seems TOO perfect, like the data was generated by this model!

    # ###### "Random Forest" acts great
    # model = load("rfmodel.joblib")

    # ###### The below models weren't considered good, e.g. predicting negative/extremly light/heavy weight
    # model = load("Scaledlassomodel.joblib")
    # model = load("scaledlrmodel.joblib")
    # model = load("scaledmlprmodel.joblib")
    # model = load("pcamodel.joblib")
    # model = load("mlprmodel.joblib")
    # model = load("lrmodel.joblib")
    # model = load("lassomodel.joblib")

    predictions = model.predict(X)
    return predictions
예제 #7
0
파일: train.py 프로젝트: nickszabo21/ML_HW
import pandas as pd
from sklearn.ensemble import ExtraTreesRegressor
from joblib import dump
from preprocess import prep_data

df = pd.read_csv("fish_participant.csv")

X, y = prep_data(df)

ttr = ExtraTreesRegressor()
ttr.fit(X, y)

dump(ttr, "reg.joblib")
예제 #8
0
def compute_kernel(name, hyper_param):
    """This function computes the test and the training kernels.
	Inputs:
		name: Kernel name.
		hyper_param: Kernel hyper-parameters.
	Outputs:
		Training Kernel: n times n np.float32 matrix.
		Test Kernel: nt times n np.float32 matrix.
		ytrain: vector of training labels. n times 1 np.float32.
		ytest: vector of test labels. nt times 1 np.float32.
	"""
    X, ytrain, Xtest, ytest = prep_data(dataset, False, noise_index)
    nt = Xtest.shape[0]
    n = X.shape[0]
    d = X.shape[1] + 0.0
    # Read precomputed CNTK kernel and form the kernel matrix
    if dataset == 'CIFAR10' and name == 'ntk':
        K = np.zeros((n, n), dtype=np.float32)
        KT = np.zeros((n, nt), dtype=np.float32)
        main_dir = user_dirs['cntk_dir'] + 'LFGaussian_CIFAR10_Myrtle_%d/' % (
            noise_index)
        m = 200
        count = 250
        for i in range(count):
            K[(m * i):(m * (i + 1)), :] = np.load(main_dir +
                                                  'train_ntk_%d.npy' % (i))
            KT[(m * i):(m * (i + 1)), :] = np.load(main_dir +
                                                   'test_ntk_%d.npy' % (i))
        KT = KT.T
        for i in range(n):
            K[:, i] = K[i, :]
    elif dataset == 'SYNTH' and name == 'ntk':
        n = hyper_param[0]
        K = np.load(user_dirs['synth_dir'] + 'NTK_TRAIN_%d.npy' %
                    (noise_index))
        K = K[:n, :n]
        KT = np.load(user_dirs['synth_dir'] + 'NTK_TEST_%d.npy' %
                     (noise_index))
        KT = KT[:, :n]
        ytrain = ytrain[:n, :]
    elif name == 'polynomial':
        print('Request to use degree %d polynomial kernel with intercept %f' %
              (hyper_param[0], hyper_param[1]),
              file=_file)
        p = hyper_param[0]
        intercept = hyper_param[1]
        intercept = intercept.astype(np.float32)
        K = (np.power(intercept + np.dot(X, X.T) / np.sqrt(d), p))
        KT = (np.power(intercept + np.dot(Xtest, X.T) / np.sqrt(d), p))
    elif name == 'rf':
        directory = user_dirs['rf_dir'] + 'RF_Kernel_noise_%d' % (noise_index)
        name = directory + '/RF_Kernel_Train_N_4200000.npy'
        K = np.load(name)
        name = directory + '/RF_Kernel_Test_N_4200000.npy'
        KT = np.load(name)

        K = K.astype(np.float32)
        KT = KT.astype(np.float32)
    elif name == 'ntk':
        # ntk KRR
        layers = hyper_param[0]
        if layers < 3:
            # For two-layers networks, compute the kernel directly
            K = NTK2(X.T, X.T)
            KT = NTK2(Xtest.T, X.T)
        else:
            # For multilayer networks, read it from the disk
            K = np.load(user_dirs['ntk_dir'] +
                        'Train_NTK_%d_layers_%d_NFMNIST.npy' %
                        (noise_index, hyper_param[0]))
            KT = np.load(user_dirs['ntk_dir'] +
                         'Test_NTK_%d_layers_%d_NFMNIST.npy' %
                         (noise_index, hyper_param[0]))
    elif name == 'gp':
        # ReLU RF KRR
        K = RFK2(X.T, X.T)
        KT = RFK2(Xtest.T, X.T)
    else:
        raise Exception('Non-valid Kernel')
    assert K.shape[0] == n and K.shape[1] == n
    assert K.dtype == np.float32
    assert KT.shape[0] == nt and KT.shape[1] == n
    assert KT.dtype == np.float32
    return (K, KT, ytrain, ytest)
from __future__ import print_function
import math
import os
import sys
import time
from preprocess import prep_data
import numpy as np

from jax import random
from neural_tangents import stax

noise_id = np.int(sys.argv[1])
num_layers = np.int(sys.argv[2])
dataset = 'NFMNIST'
X, Y, Xtest, Ytest = prep_data(dataset, False, noise_id)

if num_layers == 2:
    init_fn, apply_fn, kernel_fn = stax.serial(stax.Dense(512), stax.Relu(),
                                               stax.Dense(1))
elif num_layers == 3:
    init_fn, apply_fn, kernel_fn = stax.serial(stax.Dense(512), stax.Relu(),
                                               stax.Dense(512), stax.Relu(),
                                               stax.Dense(1))
elif num_layers == 4:
    init_fn, apply_fn, kernel_fn = stax.serial(stax.Dense(512), stax.Relu(),
                                               stax.Dense(512), stax.Relu(),
                                               stax.Dense(512), stax.Relu(),
                                               stax.Dense(1))
else:
    raise Exception('Non-valid Kernel')
예제 #10
0
 def _import_data(self, dataset, model, dim):
     self._X, self._Y, self._Xtest, self._Ytest = prep_data(
         dataset, model == 'CNN', dim)
from preprocess import prep_data

from sklearn.model_selection import cross_validate

from sklearn.model_selection import KFold

from sklearn.metrics import mean_squared_error

fish_data = pd.read_csv("fish_participant.csv")

print(fish_data.head)

print(fish_data.dtypes)

X, y = prep_data(fish_data)

decisiontree = DecisionTreeRegressor()

cross_validate(
    decisiontree,
    X,
    y,
    scoring="neg_mean_squared_error",
    cv=KFold(random_state=123, shuffle=True),
)["test_score"].mean()

decisiontree.fit(X, y)

fish_data_holdout = pd.read_csv("fish_holdout_demo.csv")
import pandas as pd
from sklearn.linear_model import LinearRegression
from joblib import dump
from preprocess import prep_data

dataset = pd.read_csv("fish_participant.csv")

X, y = prep_data(dataset)

regressor = LinearRegression()
regressor.fit(X, y)

dump(regressor, "reg.joblib")

#print(X, y)
예제 #13
0
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures


def predict_from_csv(path_to_csv):

    df = pd.read_csv(path_to_csv)
    X, y = prep_data(df)

    reg = load("reg.joblib")
    predictions = reg.predict(X)

    return predictions


if __name__ == "__main__":
    df = pd.read_csv("fish_holdout_demo.csv")
    X, ho_truth = prep_data(df)

    pl = PolynomialFeatures(degree=2)
    X = pl.fit_transform(X)

    reg = load("reg_plr2.joblib")
    ho_predictions = reg.predict(X)

    print(ho_predictions)
    print(ho_truth)

    ho_mse = mean_squared_error(ho_truth, ho_predictions)
    print(ho_mse)
예제 #14
0
row_id = onp.int(sys.argv[2])
model_name = sys.argv[3]
exp_name = sys.argv[4]
job_id = onp.int(sys.argv[5])

# The directory used to save the results
directory = './CNN_Kernels/%s_CIFAR10_%s_%d' % (exp_name, model_name,
                                                noise_index)
if not os.path.exists(directory):
    os.makedirs(directory)
files = os.listdir(directory)

fileName = directory + "/" + 'log_file_%d_%d.txt' % (row_id, job_id)
_file = open(fileName, 'w', buffering=1)

X, _, Xtest, _ = prep_data('CIFAR10', False, noise_index)

n = X.shape[0]
ntest = Xtest.shape[0]
W_std = 1.0
b_std = 0.0
# Number of rows generated at each job
m = onp.int(200)

if model_name == 'Myrtle':
    init_fn, apply_fn, kernel_fn = stax.serial(stax.Conv(512, (3, 3), strides=(1, 1), W_std=W_std, b_std=b_std, padding='SAME'),\
     stax.Relu(),\
     stax.Conv(512, (3, 3), strides=(1, 1), W_std=W_std, b_std=b_std, padding='SAME'),\
     stax.Relu(),\
     stax.AvgPool((2, 2), strides=(2, 2), padding='VALID'),\
     stax.Conv(512, (3, 3), strides=(1, 1), W_std=W_std, b_std=b_std, padding='SAME'),\