예제 #1
0
def main(expt_name, force_download, output_folder):
    """Runs main download routine.

  Args:
    expt_name: Name of experiment
    force_download: Whether to force data download from scratch
    output_folder: Folder path for storing data
  """

    print('#### Running download script ####')

    expt_config = ExperimentConfig(expt_name, output_folder)

    if os.path.exists(expt_config.data_csv_path) and not force_download:
        print('Data has been processed for {}. Skipping download...'.format(
            expt_name))
        sys.exit(0)
    else:
        print('Resetting data folder...')
        recreate_folder(expt_config.data_folder)

    # Default download functions
    download_functions = {'dacon': process_dacon, 'ulsan': process_dacon}

    if expt_name not in download_functions:
        raise ValueError('Unrecongised experiment! name={}'.format(expt_name))

    download_function = download_functions[expt_name]

    # Run data download
    print('Getting {} data...'.format(expt_name))
    download_function(expt_config)

    print('Download completed.')
            tmp['categorical_id']: Series = label
            tmp['date']: Series = date
            tmp['id']: Series = label
            tmp['hour']: Series = date.hour
            tmp['day']: Series = date.day
            tmp['day_of_week']: Series = date.dayofweek
            tmp['month']: Series = date.month

            df_list.append(tmp)

        output: DataFrame = pd.concat(df_list, axis=0, join='outer').reset_index(drop=True)

        output['categorical_id']: Series = output['id'].copy()
        output['hours_from_start']: Series = output['t']
        output['categorical_day_of_week']: Series = output['day_of_week'].copy()
        output['categorical_hour']: Series = output['hour'].copy()

        # Filter to match range used by other academic papers
        output: DataFrame = output[(output['days_from_start'] >= 1096)
                                   & (output['days_from_start'] < 1346)].copy()

        output.to_csv(config.data_csv_path)
        print(f'Saved in {config.data_csv_path}')
        print('Done.')


if __name__ == "__main__":
    expt_config = ExperimentConfig('electricity', './outputs/data/electricity')
    csv_path: str = download_electricity(expt_config)
    preprocess_electricty(csv_path, expt_config)
                            help="Path to folder for data download")
        parser.add_argument("use_gpu",
                            metavar="g",
                            type=str,
                            nargs="?",
                            choices=["yes", "no"],
                            default="no",
                            help="Whether to use gpu for training.")

        args: Namespace = parser.parse_known_args()[0]

        root_folder = None if args.output_folder == "." else args.output_folder

        return args.expt_name, root_folder, args.use_gpu == 'yes'

    name, output_folder, use_tensorflow_with_gpu = get_args()

    print("Using output folder {}".format(output_folder))

    config = ExperimentConfig(name, output_folder)
    formatter = config.make_data_formatter()

    # Customise inputs to main() for new datasets.
    main(expt_name=name,
         use_gpu=use_tensorflow_with_gpu,
         model_folder=os.path.join(config.model_folder, "fixed"),
         data_csv_path=config.data_csv_path,
         data_formatter=formatter,
         use_testing_mode=True
         )  # Change to false to use original default params
예제 #4
0
def main(exp_name: str, data_csv_path: str):
    exp_config = ExperimentConfig(exp_name, 'outputs')
    data_formatter = exp_config.make_data_formatter()
    print("*** Training from defined parameters for {} ***".format('electricity'))
    print("Loading & splitting data...")
    raw_data: DataFrame = pd.read_csv(data_csv_path, index_col=0)
    train, valid, test = data_formatter.split_data(raw_data)
    train_samples, valid_samples = data_formatter.get_num_samples_for_calibration(
    )
    # Sets up default params
    fixed_params: Dict = data_formatter.get_experiment_params()
    params: Dict = data_formatter.get_default_model_params()
    # TODO set the following in a proper config object
    id_col = 'id'
    time_col = 'hours_from_start'
    input_cols = ['power_usage', 'hour', 'day_of_week', 'hours_from_start', 'categorical_id']
    target_col = 'power_usage'
    static_cols = ['categorical_id']
    time_steps = 192
    num_encoder_steps = 168
    output_size = 1
    max_samples = 1000
    input_size = 5
    elect: TSDataset = ts_dataset.TSDataset(id_col, static_cols, time_col, input_cols,
                                            target_col, time_steps, max_samples,
                                            input_size, num_encoder_steps, 1, output_size, train)
    batch_size = 64
    loader = DataLoader(
        elect,
        batch_size=batch_size,
        num_workers=2,
        shuffle=True
    )
    for batch in loader:
        break
    static_cols = ['meter']
    categorical_cols = ['hour']
    real_cols: List = ['power_usage', 'hour', 'day']
    config['static_variables'] = len(static_cols)
    print(f"Using {config['device']}")
    # instantiate model
    model: TFT = TFT(config)
    # do a forward pass
    output, encoder_output, decoder_output, \
    attn, attn_output_weights, embeddings_encoder, embeddings_decoder = model.forward(batch)
    # define loss
    q_loss_func: QuantileLoss = QuantileLoss([0.1, 0.5, 0.9])
    # define optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    # start training cycle
    model.train()
    epochs = 10
    losses = []
    for i in range(epochs):
        epoch_loss = []
        j = 0
        for batch in loader:
            output, encoder_ouput, decoder_output, attn, attn_weights, emb_enc, emb_dec = model(batch)
            loss: Tensor = q_loss_func(output[:, :, :].view(-1, 3), batch['outputs'][:, :, 0].flatten().float())
            loss.backward()
            optimizer.step()
            epoch_loss.append(loss.item())
            j += 1
            if j > 5:
                break
        losses.append(np.mean(epoch_loss))
        print(np.mean(epoch_loss))

    output, encoder_ouput, decoder_output, attn, attn_weights, emb_enc, emb_dec = model(batch)
    ind = np.random.choice(64)
    print(ind)
    plt.plot(output[ind, :, 0].detach().cpu().numpy(), label='pred_1')
    plt.plot(output[ind, :, 1].detach().cpu().numpy(), label='pred_5')
    plt.plot(output[ind, :, 2].detach().cpu().numpy(), label='pred_9')

    plt.plot(batch['outputs'][ind, :, 0], label='true')
    plt.legend()
    plt.matshow(attn_weights.detach().numpy()[0, :, :])
    plt.imshow(attn_weights.detach().numpy()[0, :, :])