예제 #1
0
def main():
    set_seed(0)

    run_id = str(int(time.time()))
    print("Starting run={}, model={} ".format(run_id, MODEL_TYPE.BENCHMARK.value))

    BASE_DIR = Path("data/raw/")
    LOG_DIR = Path("logs/" + MODEL_TYPE.BENCHMARK.value)
    FIGURE_PATH = Path("figures-temp/" + MODEL_TYPE.BENCHMARK.value)

    print("Loading config")
    config = get_config("Monthly")
    print("Frequency:{}".format(config["variable"]))

    print("loading data")
    info = pd.read_csv(str(BASE_DIR / "M4info.csv"))
    train_path = str(BASE_DIR / "train/%s-train.csv") % (config["variable"])
    test_path = str(BASE_DIR / "test/%s-test.csv") % (config["variable"])

    sample = config["sample"]
    sample_ids = config["sample_ids"] if "sample_ids" in config else []
    train, ts_labels, _, test, test_idx = create_datasets(train_path, test_path, config["output_size"],
                                                            create_val_dataset=False,
                                                            sample_ids=sample_ids, sample=sample,
                                                            sampling_size=4)
    generate_timeseries_length_stats(train)
    print("#.train:{}, #.test ts:{}".format(len(train), len(test)))
    reload = config["reload"]
    add_run_id = config["add_run_id"]
    criterion = PinballLoss(config["training_tau"], config["output_size"] * config["batch_size"], config["device"])
    trainer = Trainer(MODEL_TYPE.BENCHMARK.value, None, None, criterion, run_id, add_run_id, config,
                      csv_path=LOG_DIR, figure_path=FIGURE_PATH,
                      sampling=sample, reload=reload)
    trainer.train_epochs()
예제 #2
0
def main():
    set_seed(0)

    run_id = str(int(time.time()))
    print("Starting run={}, model={} ".format(run_id, MODEL_TYPE.NBEATS.value))

    BASE_DIR = Path("data/raw/")
    LOG_DIR = Path("logs/" + MODEL_TYPE.NBEATS.value)
    FIGURE_PATH = Path("figures-temp/" + MODEL_TYPE.NBEATS.value)

    print("Loading config")
    config = get_config("Quarterly")
    print("Frequency:{}".format(config["variable"]))
    forecast_length = config["output_size"]
    backcast_length = 1 * forecast_length

    print("loading data")
    info = pd.read_csv(str(BASE_DIR / "M4info.csv"))
    train_path = str(BASE_DIR / "train/%s-train.csv") % (config["variable"])
    test_path = str(BASE_DIR / "test/%s-test.csv") % (config["variable"])

    sample = config["sample"]
    sample_ids = config["sample_ids"] if "sample_ids" in config else []
    train, ts_labels, val, test, test_idx = create_datasets(
        train_path,
        test_path,
        config["output_size"],
        sample_ids=sample_ids,
        sample=sample,
        sampling_size=4)
    generate_timeseries_length_stats(train)
    print("#.Train before chopping:{}".format(train.shape[0]))
    train_before_chopping_count = train.shape[0]
    chop_val = determine_chop_value(train, backcast_length, forecast_length)
    print("Chop value:{:6.3f}".format(chop_val))
    train, val, test, data_infocat_ohe, data_infocat_headers, data_info_cat = \
        filter_timeseries(info, config["variable"], sample, ts_labels, train, chop_val, val, test)
    print("#.Train after chopping:{}, lost:{:5.2f}%".format(
        len(train), (train_before_chopping_count - len(train)) /
        train_before_chopping_count * 100.))
    print("#.train:{}, #.validation ts:{}, #.test ts:{}".format(
        len(train), len(val), len(test)))

    dataset = SeriesDataset(data_infocat_ohe, data_infocat_headers,
                            data_info_cat, ts_labels, train, val, test,
                            config["device"])

    # dataloader = DataLoader(dataset, batch_size=config["batch_size"], collate_fn=collate_lines, shuffle=True)
    dataloader = DataLoader(dataset,
                            batch_size=config["batch_size"],
                            shuffle=False)
    model = NBeatsNet(stack_types=config["stack_types"],
                      forecast_length=forecast_length,
                      thetas_dims=config["thetas_dims"],
                      nb_blocks_per_stack=config["nb_blocks_per_stack"],
                      backcast_length=backcast_length,
                      hidden_layer_units=config["hidden_layer_units"],
                      share_weights_in_stack=config["share_weights_in_stack"],
                      dropout=config["dropout"],
                      device=config["device"])
    reload = config["reload"]
    add_run_id = config["add_run_id"]
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config["learning_rate"])
    criterion = PinballLoss(config["training_tau"],
                            config["output_size"] * config["batch_size"],
                            config["device"])
    trainer = Trainer(MODEL_TYPE.NBEATS.value,
                      model,
                      optimizer,
                      criterion,
                      dataloader,
                      run_id,
                      add_run_id,
                      config,
                      forecast_length,
                      backcast_length,
                      ohe_headers=dataset.data_info_cat_headers,
                      csv_path=LOG_DIR,
                      figure_path=FIGURE_PATH,
                      sampling=sample,
                      reload=reload)
    trainer.train_epochs()
예제 #3
0
def main():
    set_seed(0)

    run_id = str(int(time.time()))
    print("Starting run={}, model={} ".format(run_id,
                                              MODEL_TYPE.BENCHMARK.value))

    BASE_DIR = Path("data/raw/")
    LOG_DIR = Path("logs/" + MODEL_TYPE.BENCHMARK.value)
    FIGURE_PATH = Path("figures-temp/" + MODEL_TYPE.BENCHMARK.value)

    print("Loading config")
    config = get_config("Monthly")
    print("Frequency:{}".format(config["variable"]))

    print("loading data")
    info = pd.read_csv(str(BASE_DIR / "M4info.csv"))
    train_path = str(BASE_DIR / "train/%s-train.csv") % (config["variable"])
    test_path = str(BASE_DIR / "test/%s-test.csv") % (config["variable"])

    sample = config["sample"]
    sample_ids = config["sample_ids"] if "sample_ids" in config else []
    train, ts_labels, val, test, test_idx = create_datasets(
        train_path,
        test_path,
        config["output_size"],
        create_val_dataset=True,
        sample_ids=sample_ids,
        sample=sample,
        sampling_size=4)
    generate_timeseries_length_stats(train)
    train_before_chopping_count = train.shape[0]
    print("#.Train before chopping:{}".format(train.shape[0]))
    chop_val = config["chop_val"]
    print("Chop value:{:6.3f}".format(chop_val))
    train, val, test, data_infocat_ohe, data_infocat_headers, data_info_cat = \
        filter_timeseries(info, config["variable"], sample, ts_labels, train, chop_val, val, test)
    train, val, test, data_infocat_ohe, data_infocat_headers, data_info_cat = \
        filter_timeseries(info, config["variable"], sample, ts_labels, train, chop_val, val, test)
    print("#.Train after chopping:{}, lost:{:5.2f}%".format(
        len(train), (train_before_chopping_count - len(train)) /
        train_before_chopping_count * 100.))
    print("#.train:{}, #.validation ts:{}, #.test ts:{}".format(
        len(train), len(val), len(test)))

    dataset = SeriesDataset(data_infocat_ohe, data_infocat_headers,
                            data_info_cat, ts_labels, train, val, test,
                            config["device"])

    # dataloader = DataLoader(dataset, batch_size=config["batch_size"], collate_fn=collate_lines, shuffle=True)
    dataloader = DataLoader(dataset,
                            batch_size=config["batch_size"],
                            shuffle=False)
    add_run_id = config["add_run_id"]
    trainer = Trainer(MODEL_TYPE.BENCHMARK.value,
                      dataloader,
                      run_id,
                      add_run_id,
                      config,
                      csv_path=LOG_DIR,
                      figure_path=FIGURE_PATH)
    trainer.train()
예제 #4
0
import time
from pathlib import Path

import pandas as pd
import torch
from torch.nn import SmoothL1Loss
from torch.utils.data import DataLoader

from ts.es_rnn.config import get_config
from ts.es_rnn.model import ESRNN
from ts.es_rnn.trainer import ESRNNTrainer
from ts.utils.data_loading import SeriesDataset
from ts.utils.helper_funcs import MODEL_TYPE, set_seed, create_datasets, generate_timeseries_length_stats, \
    filter_timeseries

set_seed(0)

run_id = str(int(time.time()))
print("Starting run={}, model={} ".format(run_id, MODEL_TYPE.ESRNN.value))

try:
    user_paths = os.environ["PYTHONPATH"].split(os.pathsep)
    print(user_paths)
except KeyError:
    user_paths = []

BASE_DIR = Path("data/raw/")
LOG_DIR = Path("logs/" + MODEL_TYPE.ESRNN.value)
FIGURE_PATH = Path("figures-temp/" + MODEL_TYPE.ESRNN.value)

print("loading config")