Exemple #1
0
    def fit(self, epochs, batch_size, params):
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  #Do not assign whole gpu memory, just use it on the go
        config.allow_soft_placement = True  #If an operation is not defined in the default device, let it execute in another.

        timestr = time.strftime("%Y%m%d-%H%M%S")

        random_seed = 123
        np.random.seed(random_seed)
        tf.set_random_seed(random_seed)

        # Data Path
        CallFolder = '../../Raw_Data/'

        StoreFolder = 'Final_Results/'
        if not os.path.isdir(StoreFolder):
            os.makedirs(StoreFolder)

        StoreFolder_selfeval = 'Selfeval_Results/'
        if not os.path.isdir(StoreFolder_selfeval):
            os.makedirs(StoreFolder_selfeval)

        StoreFolder_all_labeled = 'All_labeled_data/'
        if not os.path.isdir(StoreFolder_all_labeled):
            os.makedirs(StoreFolder_all_labeled)

        StoreFolder_Model = 'Models/'
        if os.path.exists(StoreFolder_Model) and os.path.isdir(
                StoreFolder_Model):
            shutil.rmtree(StoreFolder_Model)
        if not os.path.isdir(StoreFolder_selfeval):
            os.makedirs(StoreFolder_selfeval)

        #########################################################
        # Decide whether self-evaluation or final submission
        Val_split = 9.5 / 10

        # You want to preprocess the data?
        preprocessing = True

        # Hyperparameters
        # epochs = 400
        # batch_size = 128
        learning_rate = 0.0002
        # params = 800
        activation = tf.nn.tanh

        # At which sample starts the prediction for the test data?
        sample_number = 30000

        #########################################################
        # LOAD AND SHUFFLE DATA!
        DataTrain = np.array(
            pd.read_hdf(CallFolder + "train_labeled.h5", "train"))
        X_train_labeled = DataTrain[:, 1:]
        features = X_train_labeled.shape[1]
        y_train_labeled = DataTrain[:, 0]
        classes = np.max(y_train_labeled) + 1

        X_test = np.array(
            pd.read_hdf(CallFolder + "train_unlabeled.h5",
                        "train"))  # X_test = unlabeled data
        print('Unpreprocessed Data')
        print('X_train_labeled:   ', X_train_labeled.shape, end=' ||  ')
        print('y_train:   ', y_train_labeled.shape)
        print('X_test:    ', X_test.shape, '\n')

        (X_train_labeled, y_train_labeled) = shuffle(X_train_labeled,
                                                     y_train_labeled)

        X_train = np.concatenate((X_train_labeled, X_test), axis=0)
        np.save(os.path.join(StoreFolder_all_labeled, 'X_train.npy'),
                X_train)  # STORE BEFORE PREPROCESSING, BUT AFTER SHUFFLING!

        #########################################################
        if preprocessing == True:
            X_train_labeled, X_test = centering(X_train_labeled, X_test)
            X_train_labeled = normalize(X_train_labeled)
            X_test = normalize(X_test)

        samples = len(X_train_labeled)
        X_valid = X_train_labeled[int(Val_split * samples):samples, :]
        y_valid = y_train_labeled[int(Val_split * samples):samples]
        X_train = X_train_labeled[0:int(Val_split * samples), :]
        y_train = y_train_labeled[0:int(Val_split * samples)]
        print('Final Data')
        print('Shape of X_train:', X_train.shape)
        print('Shape of y_train:', y_train.shape)
        print('Shape of X_valid:', X_valid.shape)
        print('Shape of y_valid:', y_valid.shape, '\n')

        ##################
        # CREATE GRAPH
        g = tf.Graph()
        with g.as_default():
            # build the graph
            NN.build_NN(features, classes, learning_rate, params, activation)

        ##################
        # TRAINING
        print()
        print('Training... ')
        with tf.Session(graph=g, config=config) as sess:
            [avg_loss_plot, valid_accuracy_plot,
             test_accuracy_plot] = train(path=StoreFolder_Model,
                                         sess=sess,
                                         epochs=epochs,
                                         random_seed=random_seed,
                                         batch_size=batch_size,
                                         training_set=(X_train, y_train),
                                         validation_set=(X_valid, y_valid),
                                         test_set=None)

        del g

        ##################
        # CREATE GRAPH
        g2 = tf.Graph()
        with g2.as_default():
            # build the graph
            NN.build_NN(features, classes, learning_rate, params, activation)

            # Saver
            saver = tf.train.Saver()

        ##################
        # PREDICTION
        with tf.Session(graph=g2, config=config) as sess:
            epoch = np.argmax(valid_accuracy_plot) + 1
            load(saver=saver, sess=sess, epoch=epoch, path=StoreFolder_Model)
            y_test_pred = predict(sess, X_test)

        ##################
        #  CREATE NEW DATASET
        y_train = np.concatenate((y_train_labeled, y_test_pred), axis=0)
        np.save(os.path.join(StoreFolder_all_labeled, 'y_train.npy'), y_train)
Exemple #2
0
    def fit(self, epochs, batch_size, params):
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  #Do not assign whole gpu memory, just use it on the go
        config.allow_soft_placement = True  #If an operation is not defined in the default device, let it execute in another.

        timestr = time.strftime("%Y%m%d-%H%M%S")

        random_seed = 123
        np.random.seed(random_seed)
        tf.set_random_seed(random_seed)

        # Data Path
        CallFolder = '../../Raw_Data/'

        StoreFolder = 'Final_Results/'
        if not os.path.isdir(StoreFolder):
            os.makedirs(StoreFolder)

        StoreFolder_selfeval = 'Selfeval_Results/'
        if not os.path.isdir(StoreFolder_selfeval):
            os.makedirs(StoreFolder_selfeval)

        StoreFolder_Model = 'Models/'
        if os.path.exists(StoreFolder_Model) and os.path.isdir(
                StoreFolder_Model):
            shutil.rmtree(StoreFolder_Model)
        if not os.path.isdir(StoreFolder_selfeval):
            os.makedirs(StoreFolder_selfeval)

        #########################################################
        # Decide whether self-evaluation or final submission
        final_submission = False
        Test_split = 9.5 / 10
        Val_split = 9.5 / 10

        # You want to preprocess the data?
        preprocessing = True

        # Hyperparameters
        # epochs = 120
        # batch_size = 128
        learning_rate = 0.0002
        # params = 200
        activation = tf.nn.relu

        # At which sample starts the prediction for the test data?
        sample_number = 30000

        #########################################################
        # LOAD AND SHUFFLE DATA!
        DataTrain = np.array(
            pd.read_hdf(CallFolder + "train_labeled.h5", "train"))
        X_train = DataTrain[:, 1:]
        features = X_train.shape[1]
        y_train = DataTrain[:, 0]
        classes = np.max(y_train) + 1

        X_test = np.array(pd.read_hdf(CallFolder + "test.h5", "test"))
        print('Unpreprocessed Data')
        print('X_train_labeled:   ', X_train.shape, end=' ||  ')
        print('y_train:   ', y_train.shape)
        print('X_test:    ', X_test.shape, '\n')

        (X_train, y_train) = shuffle(X_train, y_train)

        #########################################################
        # FINAL DATA
        if final_submission == True:
            if preprocessing == True:
                X_train, X_test = centering(X_train, X_test)
                X_train = normalize(X_train)
                X_test = normalize(X_test)

            samples = len(X_train)
            X_valid = X_train[int(Val_split * samples):samples, :]
            y_valid = y_train[int(Val_split * samples):samples]
            X_train = X_train[0:int(Val_split * samples), :]
            y_train = y_train[0:int(Val_split * samples)]
            print('Final Data')
            print('Shape of X_train:', X_train.shape)
            print('Shape of y_train:', y_train.shape)
            print('Shape of X_valid:', X_valid.shape)
            print('Shape of y_valid:', y_valid.shape, '\n')

            ##################
            # CREATE GRAPH
            g = tf.Graph()
            with g.as_default():
                # build the graph
                NN.build_NN(features, classes, learning_rate, params,
                            activation)

            ##################
            # TRAINING
            print()
            print('Training... ')
            with tf.Session(graph=g, config=config) as sess:
                [avg_loss_plot, valid_accuracy_plot,
                 test_accuracy_plot] = train(path=StoreFolder_Model,
                                             sess=sess,
                                             epochs=epochs,
                                             random_seed=random_seed,
                                             batch_size=batch_size,
                                             training_set=(X_train, y_train),
                                             validation_set=(X_valid, y_valid),
                                             test_set=None)

                np.save(
                    os.path.join(StoreFolder, timestr + '_avg_loss_plot.npy'),
                    avg_loss_plot)
            del g

            ##################
            # CREATE GRAPH
            g2 = tf.Graph()
            with g2.as_default():
                # build the graph
                NN.build_NN(features, classes, learning_rate, params,
                            activation)

                # Saver
                saver = tf.train.Saver()

            ##################
            # PREDICTION
            with tf.Session(graph=g2, config=config) as sess:
                epoch = np.argmax(valid_accuracy_plot) + 1
                load(saver=saver,
                     sess=sess,
                     epoch=epoch,
                     path=StoreFolder_Model)
                y_test_pred = predict(sess, X_test)

            PrintOutput(
                y_test_pred, sample_number,
                os.path.join(
                    StoreFolder, timestr + '_' + str(epochs) + '_' +
                    str(batch_size) + '_' + str(params) + '_y_test.csv'))

        #################################################################################################
        #################################################################################################
        # SELFEVALUATION
        else:
            samples = len(X_train)
            X_train_selfeval = X_train[0:int(Test_split * samples), :]
            y_train_selfeval = y_train[0:int(Test_split * samples)]
            X_test_selfeval = X_train[int(Test_split * samples):samples, :]
            y_test_selfeval = y_train[int(Test_split * samples):samples]
            print('Self-evaluation data')
            print('Shape of X_train:', X_train_selfeval.shape)
            print('Shape of y_train:', y_train_selfeval.shape)
            print('Shape of X_test:', X_test_selfeval.shape)
            print('Shape of y_test:', y_test_selfeval.shape)

            if preprocessing == True:
                X_train_selfeval, X_test_selfeval = centering(
                    X_train_selfeval, X_test_selfeval)
                X_train_selfeval = normalize(X_train_selfeval)
                X_test_selfeval = normalize(X_test_selfeval)

            ##################
            # CREATE GRAPH TRAINING
            g = tf.Graph()
            with g.as_default():
                # build the graph
                NN.build_NN(features, classes, learning_rate, params,
                            activation)

            ##################
            # TRAINING
            print()
            print('Training... ')
            with tf.Session(graph=g, config=config) as sess:
                [avg_loss_plot, valid_accuracy_plot, test_accuracy_plot
                 ] = train(path=StoreFolder_Model,
                           sess=sess,
                           epochs=epochs,
                           random_seed=random_seed,
                           batch_size=batch_size,
                           training_set=(X_train_selfeval, y_train_selfeval),
                           validation_set=None,
                           test_set=(X_test_selfeval, y_test_selfeval))

                np.save(
                    os.path.join(StoreFolder_selfeval,
                                 timestr + '_avg_loss_plot.npy'),
                    avg_loss_plot)
                np.save(
                    os.path.join(StoreFolder_selfeval,
                                 timestr + '_test_accuracy_plot.npy'),
                    test_accuracy_plot)

        ##################
        # POSTPROCESS

        # plt.figure(1)
        # plt.plot(range(1, len(avg_loss_plot) + 1), avg_loss_plot)
        # plt.title('Training loss')
        # plt.xlabel('Epoch')
        # plt.ylabel('Average Training Loss')
        # if final_submission == True:
        #     plt.savefig(os.path.join(StoreFolder, timestr + '_' + str(epochs) + '_' + str(batch_size) + '_' + str(params) + '_TrainLoss.jpg'))
        # else:
        #     plt.savefig(os.path.join(StoreFolder_selfeval, timestr + '_' + str(epochs) + '_' + str(batch_size) + '_' + str(params) + '_TrainLoss.jpg'))

        # if final_submission == False:
        #     plt.figure(2)
        #     plt.plot(range(1, len(test_accuracy_plot) + 1), test_accuracy_plot, label='Test Accuracy')
        #     plt.title('Test Accuracy')
        #     plt.xlabel('Epoch')
        #     plt.ylabel('Accuracy')
        #     plt.legend()
        #     plt.savefig(os.path.join(StoreFolder_selfeval, timestr + '_' + str(epochs) + '_' + str(batch_size) + '_' + str(params) + '_TestAccuracy.jpg'))

        print('\nJob Done!')
        return np.average(test_accuracy_plot[-10:])
    X_valid = X_train[int(Val_split * samples):samples, :]
    y_valid = y_train[int(Val_split * samples):samples]
    X_train = X_train[0:int(Val_split * samples), :]
    y_train = y_train[0:int(Val_split * samples)]
    print('Final Data')
    print('Shape of X_train:', X_train.shape)
    print('Shape of y_train:', y_train.shape)
    print('Shape of X_valid:', X_valid.shape)
    print('Shape of y_valid:', y_valid.shape, '\n')

    ##################
    # CREATE GRAPH
    g = tf.Graph()
    with g.as_default():
        # build the graph
        NN.build_NN(features, classes, learning_rate, params, activation)

    ##################
    # TRAINING
    print()
    print('Training... ')
    with tf.Session(graph=g, config=config) as sess:
        [avg_loss_plot, valid_accuracy_plot,
         test_accuracy_plot] = train(path=StoreFolder_Model,
                                     sess=sess,
                                     epochs=epochs,
                                     random_seed=random_seed,
                                     batch_size=batch_size,
                                     training_set=(X_train, y_train),
                                     validation_set=(X_valid, y_valid),
                                     test_set=None)
Exemple #4
0
if BDownsampling == 1:
    downsampler = preprocessing.downsampling()
    (X_train, y_train) = downsampler.transform(X_train, y_train)

print(X_train.shape)
print(y_train.shape)
print(y_train)
if BGridSearch == 0 or BFinalPrediction == 1:
    ##################
    # CREATE GRAPH
    ## create a graph
    g = tf.Graph()
    with g.as_default():
        tf.set_random_seed(random_seed)
        ## build the graph
        NN.build_NN(classes, learning_rate, params)

    ##################
    # TRAINING & PREDICTION
    print()
    print('Training... ')
    with tf.Session(graph=g, config=config) as sess:
        [avg_loss_plot,
         test_accuracy_plot] = train(sess=sess,
                                     epochs=epochs,
                                     random_seed=random_seed,
                                     batch_size=batch_size,
                                     training_set=(X_train, y_train),
                                     test_set=None)

        # np.save(os.path.join(StoreFolder, timestr + '_avg_loss_plot.npy'), avg_loss_plot)