def make_dataset(): """ To make tf.data.Dataset for training and test_x, test_y for testing and evaluation. :return: train_dataset, val_dataset, test_features, test_labels """ reader = Reader() with open(os.path.join(os.path.dirname(__file__), 'config.json'), 'r') as json_file: config = json.load(json_file) features = config['features'] train = reader.read_many('train_resample', features) val = reader.read_many('val_resample', features) test = reader.read_many('test_resample', features) train_mean = train.mean(axis=0)['RPH'] train_std = train.std(axis=0)['RPH'] def normalization(dataframe): return (dataframe - train.mean(axis=0)) / train.std(axis=0) def dataframe_to_dataset(dataframe, shuffle=True, repeat=True, batch_size=32): dataframe = dataframe.copy() labels = dataframe.pop('RPH') dataset = tf.data.Dataset.from_tensor_slices((dataframe, labels)) if shuffle: dataset = dataset.shuffle(buffer_size=len(dataframe)) dataset = dataset.batch(batch_size) if repeat: dataset = dataset.repeat() return dataset train, val, test = normalization(train), normalization(val), normalization( test) train_dataset = dataframe_to_dataset(train) val_dataset = dataframe_to_dataset(val, shuffle=False) test_labels = test.pop('RPH') test_features = test return train_dataset, val_dataset, test_features, test_labels, train_mean, train_std
def concat_data(): """ To concat raw data to one csv, used for training, validation, and testing. :return: None """ with open(os.path.join(os.path.dirname(__file__), 'config.json'), 'r') as json_file: config = json.load(json_file) features = config['features_data'] path = config['directory'] reader = Reader() for year_used_for, years in config['years'].items(): temp_list = [] for year in years: temp = reader.read_many(year, features) temp_list.append(temp) temp_frame = pd.concat(temp_list) temp_frame.to_csv((os.path.join(path, '{}.csv').format(year_used_for)), index=False)