Example #1
0
def load_data(if_norm=True):
    print('Loading data set ...')
    load_time = time.time()
    test_data = read_test('./data/test.csv')
    test_data = np.array(test_data)
    train_data = read_train('./data/train.csv')
    train_data = np.array(train_data)
    loaded_time = time.time() - load_time
    print('test_data: {0}, train_data shape: {1}'.format(
        test_data.shape, train_data.shape))
    train_features, train_labels = train_data[:,
                                              1:-1], train_data[:,
                                                                -1].astype(int)
    test_features = test_data[:, 1:]
    if if_norm is True:
        test_min, test_max = test_features.min(), test_features.max()
        test_features_norm = (test_features - test_min) / (test_max - test_min)
        train_min, train_max = train_features.min(), train_features.max()
        train_features_norm = (train_features - train_min) / (train_max -
                                                              train_min)
        print('Data set loaded successfully in {0:.4f} seconds.'.format(
            loaded_time))
        return test_features_norm, train_features_norm, train_labels
    else:
        print('Data set loaded successfully in {0:.4f} seconds.'.format(
            loaded_time))
        return test_features, train_features, train_labels