def fit(self, epochs, batch_size, params): config = tf.ConfigProto() config.gpu_options.allow_growth = True #Do not assign whole gpu memory, just use it on the go config.allow_soft_placement = True #If an operation is not defined in the default device, let it execute in another. timestr = time.strftime("%Y%m%d-%H%M%S") random_seed = 123 np.random.seed(random_seed) tf.set_random_seed(random_seed) # Data Path CallFolder = '../../Raw_Data/' StoreFolder = 'Final_Results/' if not os.path.isdir(StoreFolder): os.makedirs(StoreFolder) StoreFolder_selfeval = 'Selfeval_Results/' if not os.path.isdir(StoreFolder_selfeval): os.makedirs(StoreFolder_selfeval) StoreFolder_all_labeled = 'All_labeled_data/' if not os.path.isdir(StoreFolder_all_labeled): os.makedirs(StoreFolder_all_labeled) StoreFolder_Model = 'Models/' if os.path.exists(StoreFolder_Model) and os.path.isdir( StoreFolder_Model): shutil.rmtree(StoreFolder_Model) if not os.path.isdir(StoreFolder_selfeval): os.makedirs(StoreFolder_selfeval) ######################################################### # Decide whether self-evaluation or final submission Val_split = 9.5 / 10 # You want to preprocess the data? preprocessing = True # Hyperparameters # epochs = 400 # batch_size = 128 learning_rate = 0.0002 # params = 800 activation = tf.nn.tanh # At which sample starts the prediction for the test data? sample_number = 30000 ######################################################### # LOAD AND SHUFFLE DATA! DataTrain = np.array( pd.read_hdf(CallFolder + "train_labeled.h5", "train")) X_train_labeled = DataTrain[:, 1:] features = X_train_labeled.shape[1] y_train_labeled = DataTrain[:, 0] classes = np.max(y_train_labeled) + 1 X_test = np.array( pd.read_hdf(CallFolder + "train_unlabeled.h5", "train")) # X_test = unlabeled data print('Unpreprocessed Data') print('X_train_labeled: ', X_train_labeled.shape, end=' || ') print('y_train: ', y_train_labeled.shape) print('X_test: ', X_test.shape, '\n') (X_train_labeled, y_train_labeled) = shuffle(X_train_labeled, y_train_labeled) X_train = np.concatenate((X_train_labeled, X_test), axis=0) np.save(os.path.join(StoreFolder_all_labeled, 'X_train.npy'), X_train) # STORE BEFORE PREPROCESSING, BUT AFTER SHUFFLING! ######################################################### if preprocessing == True: X_train_labeled, X_test = centering(X_train_labeled, X_test) X_train_labeled = normalize(X_train_labeled) X_test = normalize(X_test) samples = len(X_train_labeled) X_valid = X_train_labeled[int(Val_split * samples):samples, :] y_valid = y_train_labeled[int(Val_split * samples):samples] X_train = X_train_labeled[0:int(Val_split * samples), :] y_train = y_train_labeled[0:int(Val_split * samples)] print('Final Data') print('Shape of X_train:', X_train.shape) print('Shape of y_train:', y_train.shape) print('Shape of X_valid:', X_valid.shape) print('Shape of y_valid:', y_valid.shape, '\n') ################## # CREATE GRAPH g = tf.Graph() with g.as_default(): # build the graph NN.build_NN(features, classes, learning_rate, params, activation) ################## # TRAINING print() print('Training... ') with tf.Session(graph=g, config=config) as sess: [avg_loss_plot, valid_accuracy_plot, test_accuracy_plot] = train(path=StoreFolder_Model, sess=sess, epochs=epochs, random_seed=random_seed, batch_size=batch_size, training_set=(X_train, y_train), validation_set=(X_valid, y_valid), test_set=None) del g ################## # CREATE GRAPH g2 = tf.Graph() with g2.as_default(): # build the graph NN.build_NN(features, classes, learning_rate, params, activation) # Saver saver = tf.train.Saver() ################## # PREDICTION with tf.Session(graph=g2, config=config) as sess: epoch = np.argmax(valid_accuracy_plot) + 1 load(saver=saver, sess=sess, epoch=epoch, path=StoreFolder_Model) y_test_pred = predict(sess, X_test) ################## # CREATE NEW DATASET y_train = np.concatenate((y_train_labeled, y_test_pred), axis=0) np.save(os.path.join(StoreFolder_all_labeled, 'y_train.npy'), y_train)
X_train= DataTrain[:, 1:] y_train = DataTrain[:, 0] X_test = np.array(pd.read_hdf(CallFolder + "train_unlabeled.h5", "train")) # X_test = unlabeled data (X_train, y_train) = shuffle(X_train, y_train) y_train_onehot = keras.utils.to_categorical(y_train) print('First 3 labels: ', y_train[:3]) print('First 3 onehot labels:\n', y_train_onehot[:3]) X_train_all = np.concatenate((X_train, X_test), axis=0) np.save(os.path.join(StoreFolder_all_labeled, 'X_train.npy'), X_train_all) # STORE BEFORE PREPROCESSING, BUT AFTER SHUFFLING! ######################################################### # TRAIN DATA X_train, X_test = centering(X_train, X_test) # build model: model = KERAS.build(X_train, y_train_onehot, param, layers) # train model: trained_model, losses = KERAS.fit(model, X_train, y_train_onehot, epochs, batch_size) # predict labels: y_test_pred = KERAS.predict(trained_model, X_test) timestr = time.strftime("%Y%m%d-%H%M%S") PrintOutput(y_test_pred, sample_number, os.path.join(StoreFolder_selfeval, timestr + '_' + str(epochs) + '_' + str(param) + '_' + str(layers) + '_' + str(batch_size) + '_y_test.csv')) y_train = np.concatenate((y_train, y_test_pred), axis=0) np.save(os.path.join(StoreFolder_all_labeled, 'y_train.npy'), y_train) plt.figure(1)
def fit(self, epochs, batch_size, params): config = tf.ConfigProto() config.gpu_options.allow_growth = True #Do not assign whole gpu memory, just use it on the go config.allow_soft_placement = True #If an operation is not defined in the default device, let it execute in another. timestr = time.strftime("%Y%m%d-%H%M%S") random_seed = 123 np.random.seed(random_seed) tf.set_random_seed(random_seed) # Data Path CallFolder = '../../Raw_Data/' StoreFolder = 'Final_Results/' if not os.path.isdir(StoreFolder): os.makedirs(StoreFolder) StoreFolder_selfeval = 'Selfeval_Results/' if not os.path.isdir(StoreFolder_selfeval): os.makedirs(StoreFolder_selfeval) StoreFolder_Model = 'Models/' if os.path.exists(StoreFolder_Model) and os.path.isdir( StoreFolder_Model): shutil.rmtree(StoreFolder_Model) if not os.path.isdir(StoreFolder_selfeval): os.makedirs(StoreFolder_selfeval) ######################################################### # Decide whether self-evaluation or final submission final_submission = False Test_split = 9.5 / 10 Val_split = 9.5 / 10 # You want to preprocess the data? preprocessing = True # Hyperparameters # epochs = 120 # batch_size = 128 learning_rate = 0.0002 # params = 200 activation = tf.nn.relu # At which sample starts the prediction for the test data? sample_number = 30000 ######################################################### # LOAD AND SHUFFLE DATA! DataTrain = np.array( pd.read_hdf(CallFolder + "train_labeled.h5", "train")) X_train = DataTrain[:, 1:] features = X_train.shape[1] y_train = DataTrain[:, 0] classes = np.max(y_train) + 1 X_test = np.array(pd.read_hdf(CallFolder + "test.h5", "test")) print('Unpreprocessed Data') print('X_train_labeled: ', X_train.shape, end=' || ') print('y_train: ', y_train.shape) print('X_test: ', X_test.shape, '\n') (X_train, y_train) = shuffle(X_train, y_train) ######################################################### # FINAL DATA if final_submission == True: if preprocessing == True: X_train, X_test = centering(X_train, X_test) X_train = normalize(X_train) X_test = normalize(X_test) samples = len(X_train) X_valid = X_train[int(Val_split * samples):samples, :] y_valid = y_train[int(Val_split * samples):samples] X_train = X_train[0:int(Val_split * samples), :] y_train = y_train[0:int(Val_split * samples)] print('Final Data') print('Shape of X_train:', X_train.shape) print('Shape of y_train:', y_train.shape) print('Shape of X_valid:', X_valid.shape) print('Shape of y_valid:', y_valid.shape, '\n') ################## # CREATE GRAPH g = tf.Graph() with g.as_default(): # build the graph NN.build_NN(features, classes, learning_rate, params, activation) ################## # TRAINING print() print('Training... ') with tf.Session(graph=g, config=config) as sess: [avg_loss_plot, valid_accuracy_plot, test_accuracy_plot] = train(path=StoreFolder_Model, sess=sess, epochs=epochs, random_seed=random_seed, batch_size=batch_size, training_set=(X_train, y_train), validation_set=(X_valid, y_valid), test_set=None) np.save( os.path.join(StoreFolder, timestr + '_avg_loss_plot.npy'), avg_loss_plot) del g ################## # CREATE GRAPH g2 = tf.Graph() with g2.as_default(): # build the graph NN.build_NN(features, classes, learning_rate, params, activation) # Saver saver = tf.train.Saver() ################## # PREDICTION with tf.Session(graph=g2, config=config) as sess: epoch = np.argmax(valid_accuracy_plot) + 1 load(saver=saver, sess=sess, epoch=epoch, path=StoreFolder_Model) y_test_pred = predict(sess, X_test) PrintOutput( y_test_pred, sample_number, os.path.join( StoreFolder, timestr + '_' + str(epochs) + '_' + str(batch_size) + '_' + str(params) + '_y_test.csv')) ################################################################################################# ################################################################################################# # SELFEVALUATION else: samples = len(X_train) X_train_selfeval = X_train[0:int(Test_split * samples), :] y_train_selfeval = y_train[0:int(Test_split * samples)] X_test_selfeval = X_train[int(Test_split * samples):samples, :] y_test_selfeval = y_train[int(Test_split * samples):samples] print('Self-evaluation data') print('Shape of X_train:', X_train_selfeval.shape) print('Shape of y_train:', y_train_selfeval.shape) print('Shape of X_test:', X_test_selfeval.shape) print('Shape of y_test:', y_test_selfeval.shape) if preprocessing == True: X_train_selfeval, X_test_selfeval = centering( X_train_selfeval, X_test_selfeval) X_train_selfeval = normalize(X_train_selfeval) X_test_selfeval = normalize(X_test_selfeval) ################## # CREATE GRAPH TRAINING g = tf.Graph() with g.as_default(): # build the graph NN.build_NN(features, classes, learning_rate, params, activation) ################## # TRAINING print() print('Training... ') with tf.Session(graph=g, config=config) as sess: [avg_loss_plot, valid_accuracy_plot, test_accuracy_plot ] = train(path=StoreFolder_Model, sess=sess, epochs=epochs, random_seed=random_seed, batch_size=batch_size, training_set=(X_train_selfeval, y_train_selfeval), validation_set=None, test_set=(X_test_selfeval, y_test_selfeval)) np.save( os.path.join(StoreFolder_selfeval, timestr + '_avg_loss_plot.npy'), avg_loss_plot) np.save( os.path.join(StoreFolder_selfeval, timestr + '_test_accuracy_plot.npy'), test_accuracy_plot) ################## # POSTPROCESS # plt.figure(1) # plt.plot(range(1, len(avg_loss_plot) + 1), avg_loss_plot) # plt.title('Training loss') # plt.xlabel('Epoch') # plt.ylabel('Average Training Loss') # if final_submission == True: # plt.savefig(os.path.join(StoreFolder, timestr + '_' + str(epochs) + '_' + str(batch_size) + '_' + str(params) + '_TrainLoss.jpg')) # else: # plt.savefig(os.path.join(StoreFolder_selfeval, timestr + '_' + str(epochs) + '_' + str(batch_size) + '_' + str(params) + '_TrainLoss.jpg')) # if final_submission == False: # plt.figure(2) # plt.plot(range(1, len(test_accuracy_plot) + 1), test_accuracy_plot, label='Test Accuracy') # plt.title('Test Accuracy') # plt.xlabel('Epoch') # plt.ylabel('Accuracy') # plt.legend() # plt.savefig(os.path.join(StoreFolder_selfeval, timestr + '_' + str(epochs) + '_' + str(batch_size) + '_' + str(params) + '_TestAccuracy.jpg')) print('\nJob Done!') return np.average(test_accuracy_plot[-10:])
y_train = DataTrain[:, 0] classes = np.max(y_train) + 1 X_test = np.array(pd.read_hdf(CallFolder + "test.h5", "test")) print('Unpreprocessed Data') print('X_train_labeled: ', X_train.shape, end=' || ') print('y_train: ', y_train.shape) print('X_test: ', X_test.shape, '\n') (X_train, y_train) = shuffle(X_train, y_train) ######################################################### # FINAL DATA if final_submission == True: if preprocessing == True: X_train, X_test = centering(X_train, X_test) X_train = normalize(X_train) X_test = normalize(X_test) samples = len(X_train) X_valid = X_train[int(Val_split * samples):samples, :] y_valid = y_train[int(Val_split * samples):samples] X_train = X_train[0:int(Val_split * samples), :] y_train = y_train[0:int(Val_split * samples)] print('Final Data') print('Shape of X_train:', X_train.shape) print('Shape of y_train:', y_train.shape) print('Shape of X_valid:', X_valid.shape) print('Shape of y_valid:', y_valid.shape, '\n') ##################
classes = np.max(y_train_labeled) + 1 print('Unpreprocessed Data') print('X_train_labeled: ', X_train_labeled.shape, end=' || ') print('y_train: ', y_train_labeled.shape) X_test = np.array(pd.read_hdf(CallFolder + "train_unlabeled.h5", "train")) # X_test = unlabeled data print('X_test: ', X_test.shape, '\n') (X_train_labeled, y_train_labeled) = shuffle(X_train_labeled, y_train_labeled) X_train = np.concatenate((X_train_labeled, X_test), axis=0) np.save(os.path.join(StoreFolder_all_labeled, 'X_train.npy'), X_train) # STORE BEFORE PREPROCESSING, BUT AFTER SHUFFLING! ######################################################### if preprocessing == True: X_train_labeled, X_test = centering(X_train_labeled, X_test) X_train_labeled = normalize(X_train_labeled) X_test = normalize(X_test) samples = len(X_train_labeled) X_valid = X_train_labeled[int(Val_split * samples):samples, :] y_valid = y_train_labeled[int(Val_split * samples):samples] X_train = X_train_labeled[0:int(Val_split * samples), :] y_train = y_train_labeled[0:int(Val_split * samples)] print('Final Data') print('Shape of X_train:', X_train.shape) print('Shape of y_train:', y_train.shape) print('Shape of X_valid:', X_valid.shape) print('Shape of y_valid:', y_valid.shape, '\n') print('Shape of X_train - labeled:', X_train.shape)