Python create_datasets Examples, ts.utils.helper_funcs.create_datasets Python Examples

Example #1

0

Show file

def main():
    set_seed(0)

    run_id = str(int(time.time()))
    print("Starting run={}, model={} ".format(run_id, MODEL_TYPE.BENCHMARK.value))

    BASE_DIR = Path("data/raw/")
    LOG_DIR = Path("logs/" + MODEL_TYPE.BENCHMARK.value)
    FIGURE_PATH = Path("figures-temp/" + MODEL_TYPE.BENCHMARK.value)

    print("Loading config")
    config = get_config("Monthly")
    print("Frequency:{}".format(config["variable"]))

    print("loading data")
    info = pd.read_csv(str(BASE_DIR / "M4info.csv"))
    train_path = str(BASE_DIR / "train/%s-train.csv") % (config["variable"])
    test_path = str(BASE_DIR / "test/%s-test.csv") % (config["variable"])

    sample = config["sample"]
    sample_ids = config["sample_ids"] if "sample_ids" in config else []
    train, ts_labels, _, test, test_idx = create_datasets(train_path, test_path, config["output_size"],
                                                            create_val_dataset=False,
                                                            sample_ids=sample_ids, sample=sample,
                                                            sampling_size=4)
    generate_timeseries_length_stats(train)
    print("#.train:{}, #.test ts:{}".format(len(train), len(test)))
    reload = config["reload"]
    add_run_id = config["add_run_id"]
    criterion = PinballLoss(config["training_tau"], config["output_size"] * config["batch_size"], config["device"])
    trainer = Trainer(MODEL_TYPE.BENCHMARK.value, None, None, criterion, run_id, add_run_id, config,
                      csv_path=LOG_DIR, figure_path=FIGURE_PATH,
                      sampling=sample, reload=reload)
    trainer.train_epochs()

Example #2

0

Show file

def main():
    set_seed(0)

    run_id = str(int(time.time()))
    print("Starting run={}, model={} ".format(run_id, MODEL_TYPE.NBEATS.value))

    BASE_DIR = Path("data/raw/")
    LOG_DIR = Path("logs/" + MODEL_TYPE.NBEATS.value)
    FIGURE_PATH = Path("figures-temp/" + MODEL_TYPE.NBEATS.value)

    print("Loading config")
    config = get_config("Quarterly")
    print("Frequency:{}".format(config["variable"]))
    forecast_length = config["output_size"]
    backcast_length = 1 * forecast_length

    print("loading data")
    info = pd.read_csv(str(BASE_DIR / "M4info.csv"))
    train_path = str(BASE_DIR / "train/%s-train.csv") % (config["variable"])
    test_path = str(BASE_DIR / "test/%s-test.csv") % (config["variable"])

    sample = config["sample"]
    sample_ids = config["sample_ids"] if "sample_ids" in config else []
    train, ts_labels, val, test, test_idx = create_datasets(
        train_path,
        test_path,
        config["output_size"],
        sample_ids=sample_ids,
        sample=sample,
        sampling_size=4)
    generate_timeseries_length_stats(train)
    print("#.Train before chopping:{}".format(train.shape[0]))
    train_before_chopping_count = train.shape[0]
    chop_val = determine_chop_value(train, backcast_length, forecast_length)
    print("Chop value:{:6.3f}".format(chop_val))
    train, val, test, data_infocat_ohe, data_infocat_headers, data_info_cat = \
        filter_timeseries(info, config["variable"], sample, ts_labels, train, chop_val, val, test)
    print("#.Train after chopping:{}, lost:{:5.2f}%".format(
        len(train), (train_before_chopping_count - len(train)) /
        train_before_chopping_count * 100.))
    print("#.train:{}, #.validation ts:{}, #.test ts:{}".format(
        len(train), len(val), len(test)))

    dataset = SeriesDataset(data_infocat_ohe, data_infocat_headers,
                            data_info_cat, ts_labels, train, val, test,
                            config["device"])

    # dataloader = DataLoader(dataset, batch_size=config["batch_size"], collate_fn=collate_lines, shuffle=True)
    dataloader = DataLoader(dataset,
                            batch_size=config["batch_size"],
                            shuffle=False)
    model = NBeatsNet(stack_types=config["stack_types"],
                      forecast_length=forecast_length,
                      thetas_dims=config["thetas_dims"],
                      nb_blocks_per_stack=config["nb_blocks_per_stack"],
                      backcast_length=backcast_length,
                      hidden_layer_units=config["hidden_layer_units"],
                      share_weights_in_stack=config["share_weights_in_stack"],
                      dropout=config["dropout"],
                      device=config["device"])
    reload = config["reload"]
    add_run_id = config["add_run_id"]
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config["learning_rate"])
    criterion = PinballLoss(config["training_tau"],
                            config["output_size"] * config["batch_size"],
                            config["device"])
    trainer = Trainer(MODEL_TYPE.NBEATS.value,
                      model,
                      optimizer,
                      criterion,
                      dataloader,
                      run_id,
                      add_run_id,
                      config,
                      forecast_length,
                      backcast_length,
                      ohe_headers=dataset.data_info_cat_headers,
                      csv_path=LOG_DIR,
                      figure_path=FIGURE_PATH,
                      sampling=sample,
                      reload=reload)
    trainer.train_epochs()

Example #3

0

Show file

def main():
    set_seed(0)

    run_id = str(int(time.time()))
    print("Starting run={}, model={} ".format(run_id,
                                              MODEL_TYPE.BENCHMARK.value))

    BASE_DIR = Path("data/raw/")
    LOG_DIR = Path("logs/" + MODEL_TYPE.BENCHMARK.value)
    FIGURE_PATH = Path("figures-temp/" + MODEL_TYPE.BENCHMARK.value)

    print("Loading config")
    config = get_config("Monthly")
    print("Frequency:{}".format(config["variable"]))

    print("loading data")
    info = pd.read_csv(str(BASE_DIR / "M4info.csv"))
    train_path = str(BASE_DIR / "train/%s-train.csv") % (config["variable"])
    test_path = str(BASE_DIR / "test/%s-test.csv") % (config["variable"])

    sample = config["sample"]
    sample_ids = config["sample_ids"] if "sample_ids" in config else []
    train, ts_labels, val, test, test_idx = create_datasets(
        train_path,
        test_path,
        config["output_size"],
        create_val_dataset=True,
        sample_ids=sample_ids,
        sample=sample,
        sampling_size=4)
    generate_timeseries_length_stats(train)
    train_before_chopping_count = train.shape[0]
    print("#.Train before chopping:{}".format(train.shape[0]))
    chop_val = config["chop_val"]
    print("Chop value:{:6.3f}".format(chop_val))
    train, val, test, data_infocat_ohe, data_infocat_headers, data_info_cat = \
        filter_timeseries(info, config["variable"], sample, ts_labels, train, chop_val, val, test)
    train, val, test, data_infocat_ohe, data_infocat_headers, data_info_cat = \
        filter_timeseries(info, config["variable"], sample, ts_labels, train, chop_val, val, test)
    print("#.Train after chopping:{}, lost:{:5.2f}%".format(
        len(train), (train_before_chopping_count - len(train)) /
        train_before_chopping_count * 100.))
    print("#.train:{}, #.validation ts:{}, #.test ts:{}".format(
        len(train), len(val), len(test)))

    dataset = SeriesDataset(data_infocat_ohe, data_infocat_headers,
                            data_info_cat, ts_labels, train, val, test,
                            config["device"])

    # dataloader = DataLoader(dataset, batch_size=config["batch_size"], collate_fn=collate_lines, shuffle=True)
    dataloader = DataLoader(dataset,
                            batch_size=config["batch_size"],
                            shuffle=False)
    add_run_id = config["add_run_id"]
    trainer = Trainer(MODEL_TYPE.BENCHMARK.value,
                      dataloader,
                      run_id,
                      add_run_id,
                      config,
                      csv_path=LOG_DIR,
                      figure_path=FIGURE_PATH)
    trainer.train()

Example #4

0

Show file

File: main.py Project: zc674/DS-GA-3001-001-Project

print("loading config")
config = get_config("Quarterly")
print("Frequency:{}".format(config["variable"]))

print("loading data")
info = pd.read_csv(str(BASE_DIR / "M4info.csv"))

train_path = str(BASE_DIR / "train/%s-train.csv") % (config["variable"])
test_path = str(BASE_DIR / "test/%s-test.csv") % (config["variable"])

sample = config["sample"]
sample_ids = config["sample_ids"] if "sample_ids" in config else []
train, ts_labels, val, test, test_idx = create_datasets(train_path,
                                                        test_path,
                                                        config["output_size"],
                                                        sample_ids=sample_ids,
                                                        sample=sample,
                                                        sampling_size=4)
generate_timeseries_length_stats(train)
print("#.Train before chopping:{}".format(train.shape[0]))
train_before_chopping_count = train.shape[0]
chop_val = config["chop_val"]
print("Chop value:{:6.3f}".format(chop_val))
train, val, test, data_infocat_ohe, data_infocat_headers, data_info_cat = \
    filter_timeseries(info, config["variable"], sample, ts_labels, train, chop_val, val, test)
print("#.Train after chopping:{}, lost:{:5.2f}%".format(
    len(train), (train_before_chopping_count - len(train)) /
    train_before_chopping_count * 100.))
print("#.train:{}, #.validation ts:{}, #.test ts:{}".format(
    len(train), len(val), len(test)))