Beispiel #1
0
def get_consumptions(id_, solar, start, length):

    # Initialize data

    DFS = [data_original, data_forecast]
    load_ = get_data(id_, start, length, DFS[0])
    forecast_ = get_data(id_, start, length, DFS[1])

    return load_, forecast_
Beispiel #2
0
def trainByCnn():
    x_train, y_train, x_val, y_val, embedding_mat = process_data.get_data(cnum=1000, test_size=0.8)

    model = cnn.get_model(x_train, y_train, embedding_mat)

    model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=[x_val, y_val])
    model.save('../ckpt/cnn.h5')
Beispiel #3
0
def run():

    x_train, x_test, y_train, y_test = get_data()
    model = lin_reg(x_train, x_test, y_train, y_test)

    print("\nPHM Linear Regression")

    while True:
        user = input(
            "1. Train\n2. Normal_solve\n3. Predict\n4. Save\n5. Load\n6. Quit\n"
        )

        if user == '1':
            model.fit()
        elif user == '2':
            model.normal_fit()
        elif user == '3':
            model.predict()
        elif user == '4':
            model.save()
        elif user == '5':
            model.load()
        elif user == '6':
            break

        print("\n--------Linear Regression---------\n")
Beispiel #4
0
def trainByLstm():
    x_train,y_train,x_val,y_val,embedding_mat = process_data.get_data(cnum=10000,test_size=0.2)

    model = lsmcrf.get_model(x_train,y_train,embedding_mat)

    model.fit(x_train,y_train,batch_size=BATCH_SIZE,epochs=EPOCHS, validation_data=[x_val,y_val])

    crfmodel_Weights = model.get_weights()

    with open('../ckpt/crfmodel_Weights.pkl', 'wb') as outp:
        pickle.dump(crfmodel_Weights, outp)
Beispiel #5
0
def get_data(file_arr):
    template, rate = process_data.get_data(file_arr)

    data = np.array(template[0])
    n = len(data[1])
    data = data.reshape(n, inputs)

    for idx in range(1, len(template)):
        sub_arr = np.array(template[idx])
        n = len(sub_arr[0])
        sub_arr = sub_arr.reshape(n, inputs)
        data = np.concatenate((data, sub_arr), axis=0)

    return data, rate
Beispiel #6
0
def train(args):
    '''Function for training the model,
	sets the gpu configrations,loads the data, creates the savers and
	loaders, perfoms training, writes summaries.'''

    ## Setting the GPU configrations - reverse in order of nvidia-smi
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    ## Limit from taking the whole gpu
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    ## loading data
    train_data = get_data(args.img_size,
                          args.dataset,
                          is_train=True,
                          debug=False)
    print 'loaded data successfully...'
    ## model definitoin
    with tf.variable_scope('bc_gan'):
        model = Bicycle_GAN(args)
        print 'Graph definition for model created...'
    ## Starting a session
    init = tf.global_variables_initializer()
    sess = tf.Session(config=config)
    sess.run(init)

    ## savers and loaders
    global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                    scope='bc_gan')
    trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope='bc_gan')
    saver = tf.train.Saver(global_vars)
    loader = tf.train.Saver(global_vars)
    if args.pretrained_weights != "":
        loader.restore(sess, args.pretrained_weights)

    ## Summaries
    if not os.path.exists('./logs'):
        os.mkdir('./logs')
    logdir = os.path.join('./logs', 'bcgan')
    summary_writer = tf.summary.FileWriter(logdir, sess.graph)

    ## Training
    model.train(sess, train_data, saver, summary_writer)

    print "Model is trained ...."
def main():
    # step 1: get the data and define all the usual variables
    X, Y, d = get_data()
    # Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.03)
    X, Y = shuffle(X, Y)
    Xtrain, Ytrain = X[:-50], Y[:-50]
    Xtest, Ytest = X[-50:], Y[-50:]

    ann = ANN([500, 300])

    session = tf.InteractiveSession()
    ann.set_session(session)

    ann.fit(Xtrain, Ytrain, Xtest, Ytest, show_fig=True)

    print("Train accuracy:", ann.score(Xtrain, Ytrain))
    print("Test accuracy:", ann.score(Xtest, Ytest))
Beispiel #8
0
def test(args):
    '''Function for testing the model,
	sets the gpu configrations,loads the test data, loads weights,
	 perfoms testing, writes summaries.'''

    ## Setting the GPU configrations
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    ## Limit from taking the whole gpu
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    ## loading data
    test_data = get_data(args.img_size,
                         args.dataset,
                         is_train=False,
                         debug=False)

    ## model definitoin
    with tf.variable_scope('bc_gan'):
        model = Bicycle_GAN(args)

    ## Starting a session
    init = tf.global_variables_initializer()
    sess = tf.Session(config=config)
    sess.run(init)

    ## savers and loaders
    global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                    scope='bc_gan')
    trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope='bc_gan')
    loader = tf.train.Saver(global_vars)
    if args.pretrained_weights != None:
        loader.restore(sess, args.pretrained_weights)

    ## results
    if not os.path.exists('./results'):
        os.mkdir('./results')
    write_dir = os.path.join('results')

    ## Training
    model.test(sess, test_data, write_dir)

    print "Testing complete ...."
Beispiel #9
0
def run():
    x_train, x_test, y_train, y_test = get_data()
    model = rf_model(x_train, x_test, y_train, y_test)

    print("\nPHM Random Forest Regression")

    while True:
        user = input("1. Train\n2. Predict\n3. Save\n4. Load\n5. Quit\n")

        if user == '1':
            model.train()
        elif user == '2':
            model.predict()
        elif user == '3':
            save(model)
        elif user == '4':
            load(model)
        elif user == '5':
            break

        print("\n-----------RF------------")
Beispiel #10
0
def run():
    x_train, x_test, y_train, y_test = get_data(features='previous_capacity', caps=7)

    model = SVR_model(x_train, x_test, y_train, y_test)

    print("\nSVR")
    
    while True:
        user = input("1. Train\n2. Predict\n3. Save\n4. Load\n5. Quit\n")
        
        if user == '1':
            model.fit()
        elif user == '2':
            model.predict()
        elif user == '3':
            save(model)
        elif user == '4':
            load(model)
        elif user == '5':
            break

        print("\n-----------SVR------------")
Beispiel #11
0
def run():
    x_train, x_test, y_train, y_test = get_data(features=['min_discharge_voltagem'])

    model = Poly_model(x_train, x_test, y_train, y_test)

    print("\nPoly_reg")
    
    while True:
        user = input("1. Train\n2. Predict\n3. Save\n4. Load\n5. Quit\n")
        
        if user == '1':
            model.fit()
        elif user == '2':
            model.predict()
        elif user == '3':
            save(model)
        elif user == '4':
            load(model)
        elif user == '5':
            break

        print("\n-----------Poly------------")
def cut_result_data(score):
    score = str(score).split(".")[1]
    data = process_data.get_data("data/test_result.csv")
    # greatest = []
    # for index,row in data.iterrows():
    #     if row[0] > row[1]:
    #         greatest.append(row[0])
    #     else:
    #         greatest.append(row[1])
    # data["Probability"] = greatest
    try:
        zero_score = get_column_accuracy(data,"0")
        one_score = get_column_accuracy(data,"1")

        print("Zero Column Score => \t",zero_score)
        print("One Column Score => \t",one_score)


        if zero_score > one_score:
            myData = {"id":data["result_id"].to_list(),\
                    "Probability":data["0"].to_list(),\
                    }
            myData = pd.DataFrame(myData)
        else:
            myData = {"id":data["result_id"].to_list(),\
                        "Probability":data["1"].to_list(),\
                    }
            myData = pd.DataFrame(myData)
    except:
        myData = {"id":data["result_id"].to_list(),\
                    "Probability":data["0"].to_list(),\
                    }
        myData = pd.DataFrame(myData)


    print(myData)
    myData.to_csv("data/test_results_"+score+".csv",index=0)
    return
def main(argv):
    if len(argv) > 1:
        filename = argv[1]
    else:
        filename = 'a.csv'

    if os.path.exists(filename):
        basename, ext = filename.split('.')
        data = process_data.get_data(filename)

        predictor_pipeline = process_data.make_predictor_pipeline(
            do_one_hot=False)
        label_pipeline = process_data.make_label_pipeline()

        predictors_processed = predictor_pipeline.fit_transform(data)
        labels_processed = label_pipeline.fit_transform(data)

        display_data(predictors_processed, labels_processed, basename)

    else:
        print(filename + " doesn't exist.")

    return
Beispiel #14
0
def get_data(file_arr):
    template, rate = process_data.get_data(file_arr)

    data = np.array(template[0])
    n = len(data[1])
    data = data.reshape(n, inputs)

    # Ch2
    #ch2_arr = np.array(ch2[0])
    #n2 = len(ch2_arr[1])
    #ch2_arr = ch2_arr.reshape(n2, inputs)

    for idx in range(1, len(template)):
        sub_arr = np.array(template[idx])
        n = len(sub_arr[0])
        sub_arr = sub_arr.reshape(n, inputs)
        data = np.concatenate((data, sub_arr), axis=0)

    #for idx in range(1, len(ch2)):
    #   sub_arr2 = np.array(ch2[idx])
    #  n2 = len(sub_arr2[1])
    # sub_arr2 = sub_arr2.reshape(n2, inputs)
    #ch2_arr=np.concatenate((ch2_arr, sub_arr2), axis=0)
    return data, rate
Beispiel #15
0
    predicted_vol = decoder.predict(outputs)
    # print(np.max(predicted_vol),np.min(predicted_vol),np.mean(predicted_vol), np.median(predicted_vol))
    np.save('D:/Master-Thesis/water_collapse/code/Material_ENKF_full.npy',
            predicted_vol)
    return predicted_vol


if __name__ == "__main__":

    path, verify_rate, sequence_length, originalFile, destinationFile = variable_value(
    )
    #-------------------------------------#

    print("Data Preprocessing...")
    vol = Pcd.get_data(path)
    dataset, verify = Pcd.train_and_vertify(vol,
                                            verify_rate)  # predict and verify

    # scaler_data = MinMaxScaler()
    # scaler_vol = scaler_data.fit_transform(verify)

    # scaler_vol = verify

    print("Data Predicting...")

    print('dataset shape = ' + str(dataset.shape))
    print('vertify shape = ' + str(verify.shape))

    predicted_vol = predict_vol(verify, verify.shape[0] - sequence_length,
                                sequence_length)
Beispiel #16
0
def main( argv ):
    my_args = process_args(argv)
    #my_args is a dict containing all opts mapped to their args
    basename, ext = my_args['DataFileName'].split('.')
    data = process_data.get_data(my_args['DataFileName'], ext)
    train_data, test_data = sklearn.model_selection.train_test_split( data, test_size=.20 )

    # search for good fit and analysis
    label_pipeline = process_data.make_label_pipeline()
    # ravel() just reshapes the data for easier processing
    actual_train_labels = label_pipeline.fit_transform(train_data).values.ravel()

    if my_args["ModelType"] == "tree":
        fit_pipeline = make_decision_tree_fit_pipeline()
        fit_params = make_decision_tree_params()
    elif my_args["ModelType"] == "svm":
        fit_pipeline = make_svm_fit_pipeline()
        fit_params = make_svm_params()
    else:
        print("pick --model type")
        sys.exit(1)

    if my_args["SplitterType"] == "k-fold":
        cv = sklearn.model_selection.KFold(n_splits=my_args["Folds"])
    elif my_args["SplitterType"] == "stratified":
        cv = sklearn.model_selection.StratifiedKFold(n_splits=my_args["Folds"])
    else:
        print("pick --splitter type")
        sys.exit(1)

    if my_args["SearchType"] == "grid":
        search_grid = sklearn.model_selection.GridSearchCV( fit_pipeline,
                                                            fit_params,
                                                            scoring="f1_micro",
                                                            n_jobs=-1,
                                                            cv=cv,
                                                            refit=True,
                                                            verbose=1 )
    elif my_args["SearchType"] == "random":
        search_grid = sklearn.model_selection.RandomizedSearchCV( fit_pipeline,
                                                                  fit_params,
                                                                  scoring="f1_micro",
                                                                  n_iter=my_args["Iterations"],
                                                                  n_jobs=-1,
                                                                  cv=cv,
                                                                  refit=True,
                                                                  verbose=1 )
    else:
        print("pick --search type")
        sys.exit(1)

    search_grid.fit(train_data, actual_train_labels)


    # examine best parameters
    print( "Best Score:", search_grid.best_score_ )
    print( "Best Params:", search_grid.best_params_ )

    print()
    print()
    print()

    scores = sklearn.model_selection.cross_val_score(search_grid.best_estimator_, train_data, actual_train_labels, scoring="f1", cv=cv, n_jobs=-1 )
    print( "CV:", scores.mean( ), scores.std( ) )


    print()
    print()
    print()

    predicted_train_labels = search_grid.best_estimator_.predict(train_data)

    print("actual training labels", actual_train_labels)
    print("predicted training labels", predicted_train_labels)
    print("Training Labels Correct:", calculateCorrectLabels(actual_train_labels, predicted_train_labels))

    actual_test_labels = label_pipeline.fit_transform(test_data).values.ravel()
    predicted_test_labels = search_grid.best_estimator_.predict(test_data)

    print("actual test labels", actual_test_labels)
    print("predicted test labels", predicted_test_labels)
    print("Test Labels Correct:", calculateCorrectLabels(actual_test_labels, predicted_test_labels))
        
    return
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras import metrics
from util import y2indicator
from process_data import get_data

import matplotlib.pyplot as plt

# NOTE: do NOT name your file keras.py because it will conflict
# with importing keras

# installation is easy! just the usual "sudo pip(3) install keras"

# get the data, same as Theano + Tensorflow examples
# no need to split now, the fit() function will do it
X, Y, d = get_data()

# get shapes

# by default Keras wants one-hot encoded labels
# there's another cost function we can use
# where we can just pass in the integer labels directly
# just like Tensorflow / Theano
Y = y2indicator(Y)

# the model will be a sequence of layers
model = Sequential()

# ANN with layers [29 (D)] -> [500] -> [300] -> [2]
model.add(Dense(units=500, input_dim=D))
model.add(Activation('relu'))
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from process_data import get_data


def y2indicator(y, K):
    N = len(y)
    ind = np.zeros((N, K))
    for i in range(N):
        ind[i, y[i]] = 1
    return ind


Xtrain, Ytrain, Xtest, Ytest, datatrain, datatest = get_data()
D = Xtrain.shape[1]
K = len(set(Ytrain) | set(Ytest))
M = 100  # num hidden units

# convert to indicator
Ytrain_ind = y2indicator(Ytrain, K)
Ytest_ind = y2indicator(Ytest, K)

# randomly initialize weights
W1 = np.random.randn(D, M)
b1 = np.zeros(M)
W2 = np.random.randn(M, K)
b2 = np.zeros(K)

Beispiel #19
0
def getResults(ldf, rdf):
    ldf.insert(0, "Probability", "0")
    for (col_name, data) in ldf.iteritems():
        if (col_name == "Probability"):
            for i in range(len(data)):
                print(rdf['0'][i])
                ldf[col_name][i] = rdf['0'][i] * 100
        else:
            continue

    ldf = ldf[['playerID', 'Contestant', 'Probability']]
    return ldf


test_df = process_data.get_data("s40-test-updated.csv", "csv")
predictions_df = process_data.get_data("s40-predictions.csv", "csv")

results_df = getResults(test_df, predictions_df)
results_df.to_csv("s40-final-results.csv", index=False)

results = results_df.to_numpy()
# print("results np: ", results)
ids = []
contestants = []
prob = []
for i in range(len(results)):
    ids.append(results[i][0])
    contestants.append(results[i][1])
    prob.append(results[i][2])
Beispiel #20
0
def main(in_csv, batch_size, eps, mn):
    x_train, x_test, y_train, y_test = pd.get_data(in_csv)

    train_obo(x_train, y_train, bs=batch_size, ep=eps, mod_name="models/" + mn)
Beispiel #21
0
def run_sacssan(args):
    """
    Run SACSANN
    """
    if args.test_chromosomes:
        test_chromosomes = [
            int(i) for i in args.test_chromosomes[0].split(",")
        ]
    else:
        test_chromosomes = []

    if args.mode == "predict":
        if (args.intermediate_network_weights_path is None
                or args.smoothing_network_weights_path is None):
            raise ValueError(
                "Path to pre-trained weights need to be specified in predict mode"
            )
        intermediate_classifier = pickle.load(
            open(args.intermediate_network_weights_path, "rb"))
        final_classifier = pickle.load(
            open(args.smoothing_network_weights_path, "rb"))
        intermediate_scaler = pickle.load(
            open(args.intermediate_scaler_path, "rb"))
        final_scaler = pickle.load(open(args.final_scaler_path, "rb"))
        _, chromosomes_lengths = process_data.format_test_data(
            args.features_path, test_chromosomes, scaler=intermediate_scaler)
        predict_compartments(
            intermediate_classifier,
            intermediate_scaler,
            final_scaler,
            final_classifier,
            test_chromosomes,
            chromosomes_lengths,
            args.features_path,
            args.output_folder,
        )

    else:
        train_chromosomes = [
            int(i) for i in args.train_chromosomes[0].split(",")
        ]

        chromosomes = train_chromosomes + test_chromosomes
        possible_chrs = process_data.get_chromosome_list(args.genome)

        for i in range(len(chromosomes)):
            if chromosomes[i] not in possible_chrs:
                logger.warning(
                    f"Unvalid chromosome, "
                    f"possible chromosomes for the input genome are {possible_chrs}"
                )
                sys.exit()

        if not os.path.exists(args.output_folder):
            os.makedirs(args.output_folder)

        (
            X_train,
            y_train,
            X_test,
            y_test,
            A_indexes,
            B_indexes,
            scaler,
            testChrLen,
        ) = process_data.get_data(
            args.labels_path,
            args.features_path,
            train_chromosomes,
            test_chromosomes,
            scaling=True,
            balance=True,
            save_model=args.save_model,
            output_folder=args.output_folder,
        )
        train_and_predict_compartments(
            args.features_path,
            train_chromosomes,
            test_chromosomes,
            testChrLen,
            X_train,
            y_train,
            X_test,
            y_test,
            A_indexes,
            B_indexes,
            scaler,
            args.output_folder,
            args.save_model,
        )
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tabulate import tabulate
from sklearn.utils import shuffle
from process_data import get_data

Xtrain, Ytrain, Xtest, Ytest, datatrain, datatest = get_data(regression=True)

X = Xtrain
Y = Ytrain
# normalize, keep original to unscale later
Yorig = Y
Y = (Y - np.min(Y)) / (np.max(Y) - np.min(Y))

D = X.shape[1]
K = len(set(Ytrain) | set(Ytest))
M = 10  # num hidden units
# layer 1
W = np.random.randn(D, M) / np.sqrt(D)
b = np.zeros(M)
# layer 2
V = np.random.randn(M) / np.sqrt(M)
c = 0

Beispiel #23
0
def draw_data():
    fig = plt.figure(1)
    ax = fig.add_subplot(111)
    data = filter(lambda v: v[0] == 81390, get_data())

    ax.plot(map(lambda v: v[1], data), map(lambda v: v[2], data))
    # estimate for the weight vector W, can be found by minimizing (y - WB).T * (y - WB)
    W = np.matmul(np.linalg.inv(np.matmul(X.T, X)), np.matmul(X.T, Y))
    predictions = []
    # iterate over the test set
    for entry in range(len(X_)):
        # get predicition
        p = np.matmul(W.T, X_[entry])
        # threshold for binary classification
        if p >= 0.5:
            pred = 1
        elif p < 0.5:
            pred = 0
        predictions.append(pred)
    predictions = np.array(predictions)
    # compute MSE 
    error = np.sum(np.power(predictions - test_labels, 2)) / len(features)
    return (W, error, predictions)


# fetch and clean the data
pre_data = process_data.get_data()
data = process_data.process_data(pre_data)
features = data[0]
labels = data[1]
train_features = np.array(features[:80])
train_labels = np.array(labels[:80]).reshape(80, 1)
test_features = np.array(features[80:])
test_labels = np.array(labels[80:]).reshape(20, 1)
lr_pred = linear_regression(train_features, train_labels,
                            test_features, test_labels)
Beispiel #25
0
def run(N,
        T,
        D,
        pt,
        market,
        freq,
        seed,
        onlyprice=False,
        flat=False,
        real_data=-1):

    r = np.random.RandomState(seed)
    player_ids = r.choice(np.arange(126), N, replace=False)

    data_original = pd.read_csv(DATA, index_col='date', parse_dates=True)
    data_forcast = pd.read_csv(DATA_FORCAST,
                               index_col='date',
                               parse_dates=True)
    dfs_nosolar = [data_original, data_forcast]

    data_solar = pd.read_csv(DATA_SOLAR, index_col='date', parse_dates=True)
    data_solar_forcast = pd.read_csv(DATA_SOLAR_FORCAST,
                                     index_col='date',
                                     parse_dates=True)
    dfs_solar = [data_solar, data_solar_forcast]

    #    real_data = int(real_data)
    #    if real_data > 0:
    #        loads = get_data(real_data, D + 1, N, r)
    #    else:
    #        loads = None

    players = {}
    for n in range(N):
        has_solar = n <= (N // 2)
        DFS = dfs_solar if has_solar else dfs_nosolar
        if real_data > 0:
            load_ = get_data(n, real_data, D, DFS[0])
            forcast_ = get_data(n, real_data, D, DFS[1])
        else:
            load_ = None
            forcast_ = None
        val = random_player(T,
                            D,
                            pt,
                            r,
                            flat,
                            load=load_,
                            forcast=forcast_,
                            solar=has_solar)
        players[n] = val

    for p in range(N):
        players[p]['freq'] = freq

    CONFIG = {
        'ROUNDS': T * (D - 1) + 1,
        'SLICE': T,
        'RANDOM_STATE': r,
        'MARKET': market,
        'ONLYPRICE': onlyprice,
    }

    start = time.perf_counter()
    welfare, traded = core_loop(players, CONFIG)
    end = time.perf_counter() - start

    for k, pl in players.items():
        pl.pop('model', None)
        pl.pop('con', None)
        pl.pop('var', None)

    return (end, players, welfare, traded)
		D = len(self.gaussian)
		P = np.zeros((N,D))
		for i in self.labels:
			mean = self.gaussian[i]['mean']
			cov = self.gaussian[i]['cov']
			P[:,i] = mvn.logpdf(X,mean=mean,cov=cov) + np.log(self.prior[i])
		return np.argmax(P,axis=1)
	def score(self,X,Y):

		P = self.project(X)
		return np.mean(Y == P)


if __name__ == '__main__':

	X,Y = get_data()
	X,Y = shuffle(X,Y)
	N = len(Y)//2
	Xtrain = X[:N]
	Ytrain = Y[:N]
	Xtest = X[N:]
	Ytest = Y[N:]
	model = Facial_Rec()
	model.fit(Xtrain,Ytrain)
	print('Train accuracy: ',model.score(Xtrain,Ytrain))
	print('Test accuracy: ',model.score(Xtest,Ytest))
	print()
	alphabet = np.array([chr(i) for i in range(65,91)])
	idx = [22,7,8,18,11,4,17]
	delim = ''
	print(delim.join(alphabet[idx]))
Beispiel #27
0
import sys

# sys.argv[1] = learning_rate
# sys.argv[2] = iterations


def T_indicator(t, K):
    N = len(t)
    ind = np.zeros((N, K))
    for n in range(N):
        ind[n, t[n]] = 1
    return ind


# Get the data
X, t = get_data()
X, t = shuffle(X, t)
t = t.astype(np.int32)

#N = len(t)
D = X.shape[1]
M = 5
K = len(set(t))

X_train = X[:-100, :]
t_train = t[:-100]
T_train = T_indicator(t_train, K)

X_test = X[-100:, :]
t_test = t[-100:]
T_test = T_indicator(t_test, K)
def random_search():

    X, Y, data = get_data()
    X, Y = shuffle(X, Y)
    Ntrain = int(0.75 * len(X))
    Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
    Xtest, Ytest = X[Ntrain:], Y[Ntrain:]

    # Make copies of the small data (because variance matters?)
    Xtrain = np.concatenate((Xtrain, Xtrain, Xtrain), 0)
    Ytrain = np.concatenate((Ytrain, Ytrain, Ytrain), 0)

    print('size Xtrain: ' + str(Xtrain.shape))
    print('size Ytrain: ' + str(Ytrain.shape))
    print('size Xtest: ' + str(Xtest.shape))
    print('size Ytest: ' + str(Ytest.shape))

    # starting hyperparameters
    M = 20  # hidden units
    nHidden = 2  # hidden layers
    log_lr = -4  # learning rate
    log_l2 = -2  # l2 regularization, since we always want it to be positive
    max_tries = 30

    # loop through all possible hyperparameter settings
    best_validation_rate = 0
    best_hls = None
    best_lr = None
    best_l2 = None
    validation_accuracies = []
    for _ in range(max_tries):
        print('on try: ' + str(_ + 1) + '/' + str(max_tries))
        model = ANN([M] * nHidden)
        # choose params randomly on log base 10 scale
        model.fit(Xtrain,
                  Ytrain,
                  learning_rate=10**log_lr,
                  reg=10**log_l2,
                  mu=0.99,
                  epochs=4000,
                  show_fig=True)
        validation_accuracy = model.score(Xtest, Ytest)
        train_accuracy = model.score(Xtrain, Ytrain)
        print(
            "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s (layers), %s (log_lr), %s (log_l2)"
            % (validation_accuracy, train_accuracy, [M] * nHidden, log_lr,
               log_l2))
        # keep track of all
        validation_accuracies.append(validation_accuracy)

        # keep the best parameters, then make modifications to them
        if validation_accuracy > best_validation_rate:
            best_validation_rate = validation_accuracy
            best_M = M
            best_nHidden = nHidden
            best_lr = log_lr
            best_l2 = log_l2

        # select new hyperparams
        nHidden = best_nHidden + np.random.randint(
            -1, 2)  # -1, 0, or 1, add, remove or keep same the layers
        nHidden = max(1, nHidden)
        M = best_M + np.random.randint(-1, 2) * 10
        M = max(10, M)
        log_lr = best_lr + np.random.randint(-1, 2)
        log_l2 = best_l2 + np.random.randint(-1, 2)

    # TODO: save these in mongodb, then read them and see if we beat it, in a new file run forward on best params
    print("Best validation_accuracy:", best_validation_rate)
    print("Mean validation_accuracy:", np.mean(validation_accuracies))
    print("Best settings:")
    print("Best M (hidden units):", best_M)
    print("Best nHidden (hidden layers):", best_nHidden)
    print("Best learning_rate:", best_lr)
    print("Best l2 regularization:", best_l2)
def main(argv):
    my_args = process_args(argv)

    basename, ext = my_args['DataFileName'].split('.')
    data = process_data.get_data(my_args['DataFileName'])

    # search for good fit and analysis
    label_pipeline = process_data.make_label_pipeline()
    # ravel() just reshapes the data for easier processing
    actual_labels = label_pipeline.fit_transform(data).values.ravel()

    if my_args["ModelType"] == "tree":
        fit_pipeline = make_decision_tree_fit_pipeline()
        fit_params = make_decision_tree_params()
    elif my_args["ModelType"] == "svm":
        fit_pipeline = make_svm_fit_pipeline()
        fit_params = make_svm_params()
    elif my_args["ModelType"] == "bagging-tree":
        fit_pipeline = make_bagging_tree_fit_pipeline()
        fit_params = make_bagging_tree_params()
    elif my_args["ModelType"] == "adaboost-tree":
        fit_pipeline = make_adaboost_tree_fit_pipeline()
        fit_params = make_adaboost_tree_params()
    else:
        print("pick --model type")
        sys.exit(1)

    if my_args["SplitterType"] == "k-fold":
        cv = sklearn.model_selection.KFold(n_splits=my_args["Folds"])
    elif my_args["SplitterType"] == "stratified":
        cv = sklearn.model_selection.StratifiedKFold(n_splits=my_args["Folds"])
    else:
        print("pick --splitter type")
        sys.exit(1)

    if my_args["SearchType"] == "grid":
        search_grid = sklearn.model_selection.GridSearchCV(fit_pipeline,
                                                           fit_params,
                                                           scoring="f1_micro",
                                                           n_jobs=-1,
                                                           cv=cv,
                                                           refit=True,
                                                           verbose=1)
    elif my_args["SearchType"] == "random":
        search_grid = sklearn.model_selection.RandomizedSearchCV(
            fit_pipeline,
            fit_params,
            scoring="f1_micro",
            n_iter=my_args["Iterations"],
            n_jobs=-1,
            cv=cv,
            refit=True,
            verbose=1)
    else:
        print("pick --search type")
        sys.exit(1)

    search_grid.fit(data, actual_labels)

    # examine best parameters
    print("Best Score:", search_grid.best_score_)
    print("Best Params:", search_grid.best_params_)

    print()
    print()
    print()

    scores = sklearn.model_selection.cross_val_score(
        search_grid.best_estimator_,
        data,
        actual_labels,
        scoring="f1_micro",
        cv=cv,
        n_jobs=-1)
    print("CV:", scores.mean(), scores.std())

    print()
    print()
    print()

    predicted_labels = search_grid.best_estimator_.predict(data)

    cm = sklearn.metrics.confusion_matrix(actual_labels, predicted_labels)
    print("confusion_matrix:")
    print("TN: ", cm[0][0])
    print("FN: ", cm[0][1])
    print("FP: ", cm[1][0])
    print("TP: ", cm[1][1])

    f1_score = sklearn.metrics.f1_score(actual_labels,
                                        predicted_labels,
                                        average="micro")
    print()
    print(
        "Precision:",
        sklearn.metrics.precision_score(actual_labels,
                                        predicted_labels,
                                        average="micro"))
    print(
        "Recall:",
        sklearn.metrics.recall_score(actual_labels,
                                     predicted_labels,
                                     average="micro"))
    print("F1:", f1_score)

    test_data = process_data.get_data("data/test.csv")

    actual_test_labels = label_pipeline.fit_transform(test_data).values.ravel()
    predicted_test_labels = search_grid.best_estimator_.predict_proba(
        test_data)

    labels = pd.DataFrame(predicted_test_labels)

    ids = test_data.result_id.to_list()

    labels["result_id"] = ids

    labels.to_csv("data/test_result.csv", index=False)

    cut_data.cut_result_data(f1_score)

    return
import dash_html_components as html

import pandas as pd
import plotly.graph_objects as go
import process_data as process_data
from dash.dependencies import Input, Output
import plotly
import random

from collections import deque

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = process_data.get_data()
df = df[df.unit_number == 3]
# df=(df-df.mean())/df.std()
df = (df - df.min()) / (df.max() - df.min())

# time_deque = deque(df['time'].tolist())
# sensor_data_deque = deque(df['sensor_3'].tolist())

time_deque = deque(maxlen=150)
time_deque = time_deque + deque(
    list(range(1,
               len(df['sensor_3'].tolist()) + 1)))

print(time_deque)

full_sensor_data = deque(df['sensor_3'].tolist())
Beispiel #31
0
import numpy as np
from process_data import get_data

# Data
X, T = get_data()

# Weights
M = 5
D = X.shape[1]
K = len(set(T))
W1 = np.random.randn(D, M)
b1 = np.zeros(M)
W2 = np.random.randn(M, K)
b2 = np.zeros(K)

def softmax(z):
    expZ= np.exp(z)
    return(expZ/ expZ.sum(axis=1, keepdims=True))

def forward(X, W1, W2, b1, b2):
    Z = X.dot(W1) + b1
    A = np.tanh(Z)
    return(softmax(A.dot(W2 + b2)))

def classification_rate(P, T):
    return np.mean(P == T)

Y = forward(X, W1, W2, b1, b2)
P = np.argmax(Y, axis = 1)

print('Classification rate with random weights: {}'.format(classification_rate(P, T)))