예제 #1
0
def make_dataset():
    """
    To make tf.data.Dataset for training and test_x, test_y for testing and evaluation.
    :return: train_dataset, val_dataset, test_features, test_labels
    """
    reader = Reader()
    with open(os.path.join(os.path.dirname(__file__), 'config.json'),
              'r') as json_file:
        config = json.load(json_file)

    features = config['features']
    train = reader.read_many('train_resample', features)
    val = reader.read_many('val_resample', features)
    test = reader.read_many('test_resample', features)

    train_mean = train.mean(axis=0)['RPH']
    train_std = train.std(axis=0)['RPH']

    def normalization(dataframe):
        return (dataframe - train.mean(axis=0)) / train.std(axis=0)

    def dataframe_to_dataset(dataframe,
                             shuffle=True,
                             repeat=True,
                             batch_size=32):
        dataframe = dataframe.copy()
        labels = dataframe.pop('RPH')
        dataset = tf.data.Dataset.from_tensor_slices((dataframe, labels))
        if shuffle:
            dataset = dataset.shuffle(buffer_size=len(dataframe))
        dataset = dataset.batch(batch_size)
        if repeat:
            dataset = dataset.repeat()
        return dataset

    train, val, test = normalization(train), normalization(val), normalization(
        test)
    train_dataset = dataframe_to_dataset(train)
    val_dataset = dataframe_to_dataset(val, shuffle=False)
    test_labels = test.pop('RPH')
    test_features = test
    return train_dataset, val_dataset, test_features, test_labels, train_mean, train_std
예제 #2
0
def concat_data():
    """
    To concat raw data to one csv, used for training, validation, and testing.
    :return: None
    """
    with open(os.path.join(os.path.dirname(__file__), 'config.json'), 'r') as json_file:
        config = json.load(json_file)
    features = config['features_data']
    path = config['directory']
    reader = Reader()
    for year_used_for, years in config['years'].items():
        temp_list = []
        for year in years:
            temp = reader.read_many(year, features)
            temp_list.append(temp)
        temp_frame = pd.concat(temp_list)
        temp_frame.to_csv((os.path.join(path, '{}.csv').format(year_used_for)), index=False)