def main(expt_name, force_download, output_folder): """Runs main download routine. Args: expt_name: Name of experiment force_download: Whether to force data download from scratch output_folder: Folder path for storing data """ print('#### Running download script ####') expt_config = ExperimentConfig(expt_name, output_folder) if os.path.exists(expt_config.data_csv_path) and not force_download: print('Data has been processed for {}. Skipping download...'.format( expt_name)) sys.exit(0) else: print('Resetting data folder...') recreate_folder(expt_config.data_folder) # Default download functions download_functions = {'dacon': process_dacon, 'ulsan': process_dacon} if expt_name not in download_functions: raise ValueError('Unrecongised experiment! name={}'.format(expt_name)) download_function = download_functions[expt_name] # Run data download print('Getting {} data...'.format(expt_name)) download_function(expt_config) print('Download completed.')
tmp['categorical_id']: Series = label tmp['date']: Series = date tmp['id']: Series = label tmp['hour']: Series = date.hour tmp['day']: Series = date.day tmp['day_of_week']: Series = date.dayofweek tmp['month']: Series = date.month df_list.append(tmp) output: DataFrame = pd.concat(df_list, axis=0, join='outer').reset_index(drop=True) output['categorical_id']: Series = output['id'].copy() output['hours_from_start']: Series = output['t'] output['categorical_day_of_week']: Series = output['day_of_week'].copy() output['categorical_hour']: Series = output['hour'].copy() # Filter to match range used by other academic papers output: DataFrame = output[(output['days_from_start'] >= 1096) & (output['days_from_start'] < 1346)].copy() output.to_csv(config.data_csv_path) print(f'Saved in {config.data_csv_path}') print('Done.') if __name__ == "__main__": expt_config = ExperimentConfig('electricity', './outputs/data/electricity') csv_path: str = download_electricity(expt_config) preprocess_electricty(csv_path, expt_config)
help="Path to folder for data download") parser.add_argument("use_gpu", metavar="g", type=str, nargs="?", choices=["yes", "no"], default="no", help="Whether to use gpu for training.") args: Namespace = parser.parse_known_args()[0] root_folder = None if args.output_folder == "." else args.output_folder return args.expt_name, root_folder, args.use_gpu == 'yes' name, output_folder, use_tensorflow_with_gpu = get_args() print("Using output folder {}".format(output_folder)) config = ExperimentConfig(name, output_folder) formatter = config.make_data_formatter() # Customise inputs to main() for new datasets. main(expt_name=name, use_gpu=use_tensorflow_with_gpu, model_folder=os.path.join(config.model_folder, "fixed"), data_csv_path=config.data_csv_path, data_formatter=formatter, use_testing_mode=True ) # Change to false to use original default params
def main(exp_name: str, data_csv_path: str): exp_config = ExperimentConfig(exp_name, 'outputs') data_formatter = exp_config.make_data_formatter() print("*** Training from defined parameters for {} ***".format('electricity')) print("Loading & splitting data...") raw_data: DataFrame = pd.read_csv(data_csv_path, index_col=0) train, valid, test = data_formatter.split_data(raw_data) train_samples, valid_samples = data_formatter.get_num_samples_for_calibration( ) # Sets up default params fixed_params: Dict = data_formatter.get_experiment_params() params: Dict = data_formatter.get_default_model_params() # TODO set the following in a proper config object id_col = 'id' time_col = 'hours_from_start' input_cols = ['power_usage', 'hour', 'day_of_week', 'hours_from_start', 'categorical_id'] target_col = 'power_usage' static_cols = ['categorical_id'] time_steps = 192 num_encoder_steps = 168 output_size = 1 max_samples = 1000 input_size = 5 elect: TSDataset = ts_dataset.TSDataset(id_col, static_cols, time_col, input_cols, target_col, time_steps, max_samples, input_size, num_encoder_steps, 1, output_size, train) batch_size = 64 loader = DataLoader( elect, batch_size=batch_size, num_workers=2, shuffle=True ) for batch in loader: break static_cols = ['meter'] categorical_cols = ['hour'] real_cols: List = ['power_usage', 'hour', 'day'] config['static_variables'] = len(static_cols) print(f"Using {config['device']}") # instantiate model model: TFT = TFT(config) # do a forward pass output, encoder_output, decoder_output, \ attn, attn_output_weights, embeddings_encoder, embeddings_decoder = model.forward(batch) # define loss q_loss_func: QuantileLoss = QuantileLoss([0.1, 0.5, 0.9]) # define optimizer optimizer = optim.Adam(model.parameters(), lr=0.0001) # start training cycle model.train() epochs = 10 losses = [] for i in range(epochs): epoch_loss = [] j = 0 for batch in loader: output, encoder_ouput, decoder_output, attn, attn_weights, emb_enc, emb_dec = model(batch) loss: Tensor = q_loss_func(output[:, :, :].view(-1, 3), batch['outputs'][:, :, 0].flatten().float()) loss.backward() optimizer.step() epoch_loss.append(loss.item()) j += 1 if j > 5: break losses.append(np.mean(epoch_loss)) print(np.mean(epoch_loss)) output, encoder_ouput, decoder_output, attn, attn_weights, emb_enc, emb_dec = model(batch) ind = np.random.choice(64) print(ind) plt.plot(output[ind, :, 0].detach().cpu().numpy(), label='pred_1') plt.plot(output[ind, :, 1].detach().cpu().numpy(), label='pred_5') plt.plot(output[ind, :, 2].detach().cpu().numpy(), label='pred_9') plt.plot(batch['outputs'][ind, :, 0], label='true') plt.legend() plt.matshow(attn_weights.detach().numpy()[0, :, :]) plt.imshow(attn_weights.detach().numpy()[0, :, :])