def main(argv=None): print('load data') mnist = load_data() print('Finished!') print('Start training!') train(mnist)
def main(argv=None): print('Loading data') x = load_data() print('Finished!') print('Starting training') train(x)
def train(root, res, neighbor, model_save_path, TEST_SPLIT): print(f'Training {neighbor} Neighbor CNN model with Resolution {res}') X_train, X_test, y_train, y_test = load_data(root, res, TEST_SPLIT, neighbor) # define model callbacks model_save_path = os.path.join(model_save_path, res.strip('.pkl'), "Neighbor_%s" % neighbor + '.hdf5') checkpoint = ModelCheckpoint(model_save_path, monitor='val_loss', save_best_only=True) earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1) model = CNN(neighbor) # training history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=256, epochs=500, shuffle=True, callbacks=[checkpoint, earlystopper]) # save training history with open( '../history/%s/' % res.strip('.pkl') + 'Neighbor_%s' % neighbor + '.pkl', 'wb') as f: pickle.dump(history.history, f)
def main(argv=None): print('This is to build a simple RNN.') print('Loading data') x = load_data() print('Finished!') print('Starting training') train(x)
def main(argv=None): print( 'This is to pretrain a shallow neural network using autoencoder or restricted boltzmann machine.' 'There are two options.' '1 -> pretrain_with_RBM' '2 -> pretrain_with_AE') print('Loading data') x = load_data() print('Finished!') print() option = 'pretrain_with_RBM' print('Starting training') train(x, option)
def main(): X_train, X_test, y_train, y_test = load_data(path, res, args.test_ratio) X_train, X_test, y_train, y_test = Preprocessing_DNN( X_train, X_test, y_train, y_test) # Training if args.action == 'train': if args.model_use == 'Linear': model = LinearRegression() model.fit(X_train, y_train) with open(args.model_save_path + 'Linear_model.pik', 'wb') as f: pickle.dump(model, f) elif args.model_use == 'DNN': model = DNN() filepath = args.model_save_path + "/weights-improvement-{epoch:03d}-{loss:.3e}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_loss', save_best_only=False, period=1) history = model.fit(X_train, y_train, validation_data=(X_test, y_test), shuffle=True, callbacks=[checkpoint]) # save history with open(args.history_save_path + args.model_name + '.pkl', 'wb') as f: pickle.dump(history.history, f) # Testing elif args.action == 'test': if args.model_use == 'Linear': with open(args.load_model_path, 'rb') as f: model = pickle.load(f) elif args.model_use == 'DNN': model = load_model(args.load_model_path) model.predict(X_test)
def main(argv=None): x = load_data() retrain(x)
from numpy import * import matplotlib.pyplot as plt import warnings warnings.filterwarnings("ignore") import pandas as pd import pylab as P plt.style.use('ggplot') from Preprocessing import load_data preproc_data, features_data, rcvcall_data = load_data( 'train_2011_2012_2013.csv') #Plot CSPL_RECEIVED_CALLS in function of ASS_ASSIGNMENT preproc_data1 = preproc_data.groupby(["ASS_ASSIGNMENT" ])['CSPL_RECEIVED_CALLS'].sum() plt.figure() preproc_data1.plot(kind="bar", x="ASS_ASSIGNMENT", y="CSPL_RECEIVED_CALLS") plt.legend() plt.show() #Plot CSPL_RECEIVED_CALLS in function of HOUR preproc_data2 = preproc_data.groupby(["HOUR"])['CSPL_RECEIVED_CALLS'].sum() plt.figure() preproc_data2.plot(kind="bar", x="HOUR", y="CSPL_RECEIVED_CALLS") plt.legend() plt.show()
def main(argv=None): x = load_data() evaluate(x)
def main(): # parse the raw data files first normal_file_raw = 'dataset/normalTrafficTraining.txt' anomaly_file_raw = 'dataset/anomalousTrafficTest.txt' normal_test_raw = 'dataset/normalTrafficTest.txt' normal_test_parse = 'dataset/normalRequestTest.txt' normal_file_parse = 'dataset/normalRequestTraining.txt' anomaly_file_parse = 'dataset/anomalousRequestTest.txt' # Parse the files to decode the URLs in the raw HTTP requests and write them in a proper format parse_file(normal_file_raw, normal_file_parse) parse_file(anomaly_file_raw, anomaly_file_parse) parse_file(normal_test_raw, normal_test_parse) # Convert each HTTP request into a string and append each of these strings to a list X_train = to_string('../input/normalRequestTraining.txt') X_test_bad = to_string('../input/anomalousRequestTest.txt') X_test_good = to_string('../input/normalRequestTest.txt') # Label the good requests and bad requests # 0 --> good --> [1. 0.] # 1 --> bad --> [0. 1.] y_train = [0] * len(X_train) y_bad = [1] * len(X_test_bad) y_good = [0] * len(X_test_good) # Put all the requests in the X and y lists y_unshuffled = y_bad + y_good + y_train X_unshuffled = X_test_bad + X_test_good + X_train # Shuffle the data X_shuffled, y_shuffled = shuffle(X_unshuffled, y_unshuffled) # use categorical output y_shuffled = to_categorical(y_shuffled) # set parameters: subset = None # Maximum length. Longer gets chopped. Shorter gets padded. maxlen = 1000 # Model params # Filters for conv layers nb_filter = 64 # Number of units in the dense layer dense_outputs = 64 # Conv layer kernel size filter_kernels = [7, 7] # Number of units in the final output layer. Number of classes. cat_output = 2 # Compile/fit params batch_size = 128 nb_epoch = 20 print('Loading data...') # # Expect x to be a list of sentences. Y to be index of the categories. (xt, yt), (x_test, y_test) = load_data(X_shuffled, y_shuffled) print('Creating vocab...') vocab, reverse_vocab, vocab_size, alphabet = create_vocab_set() print('Compile model...') model = create_model(filter_kernels, dense_outputs, maxlen, vocab_size, nb_filter, cat_output) # Encode data xt = encode_data(xt, maxlen, vocab) x_test = encode_data(x_test, maxlen, vocab) print('Chars vocab: {}'.format(alphabet)) print('Chars vocab size: {}'.format(vocab_size)) print('X_train.shape: {}'.format(xt.shape)) model.summary() print('Fit model...') patience = 5 # this is the number of epochs with no improvment after which the training will stop history = fit_model(model, xt, yt, patience, batch_size, nb_epoch) print("Testing model...") score = test_model(x_test, y_test, batch_size) print('Test loss:', score[0]) print('Test accuracy:', score[1]) # Graphs and data visualisation # Training Accuracy Vs validation Accuracy plt.figure(0) plt.figsize = (10, 10) plt.plot(history.history['acc'], 'r') plt.plot(history.history['val_acc'], 'g') plt.xticks(np.arange(0, 20, 1.0)) plt.xlabel("Num of Epochs") plt.ylabel("Accuracy") plt.title("Training Accuracy Vs validation Accuracy") plt.legend(['train', 'validation']) # Training Loss Vs Validation Loss plt.figure(0) plt.figsize = (10, 10) plt.plot(history.history['loss'], 'r') plt.plot(history.history['val_loss'], 'g') plt.xticks(np.arange(0, 20, 1.0)) plt.yticks(np.arange(0, 0.5, 0.1)) plt.xlabel("Num of Epochs") plt.ylabel("Loss") plt.title("Training Loss Vs validation Loss") plt.legend(['train', 'validation']) # Classification Matrix y_pred = model.predict(x_test) y_pred1 = (y_pred > 0.5) matrix = confusion_matrix(y_test.argmax(axis=1), y_pred1.argmax(axis=1)) print(matrix) plt.matshow(matrix, cmap=plt.cm.gray) plt.show() row_sum = matrix.sum(axis=1, keepdims=True) norm_conf = matrix / row_sum print(norm_conf) plt.matshow(norm_conf, cmap=plt.cm.gray) plt.show()