Ejemplo n.º 1
0
    def download() -> None:
        """
        Download M4 dataset if doesn't exist.
        """
        if os.path.isdir(DATASET_PATH):
            logging.info(f'skip: {DATASET_PATH} directory already exists.')
            return

        download(INFO_URL, INFO_FILE_PATH)
        m4_ids = pd.read_csv(INFO_FILE_PATH).M4id.values

        def build_cache(files: str, cache_path: str) -> None:
            timeseries_dict = OrderedDict(list(zip(m4_ids,
                                                   [[]] * len(m4_ids))))
            logging.info(f'Caching {files}')
            for train_csv in tqdm(glob(os.path.join(DATASET_PATH, files))):
                dataset = pd.read_csv(train_csv)
                dataset.set_index(dataset.columns[0], inplace=True)
                for m4id, row in dataset.iterrows():
                    values = row.values
                    timeseries_dict[m4id] = values[~np.isnan(values)]
            np.array(list(timeseries_dict.values())).dump(cache_path)

        download(TRAINING_DATASET_URL, TRAINING_DATASET_FILE_PATH)
        patoolib.extract_archive(TRAINING_DATASET_FILE_PATH,
                                 outdir=DATASET_PATH)
        build_cache('*-train.csv', TRAINING_DATASET_CACHE_FILE_PATH)
        download(TEST_DATASET_URL, TEST_DATASET_FILE_PATH)
        patoolib.extract_archive(TEST_DATASET_FILE_PATH, outdir=DATASET_PATH)
        build_cache('*-test.csv', TEST_DATASET_CACHE_FILE_PATH)

        naive2_archive = os.path.join(DATASET_PATH,
                                      url_file_name(NAIVE2_FORECAST_URL))
        download(NAIVE2_FORECAST_URL, naive2_archive)
        patoolib.extract_archive(naive2_archive, outdir=DATASET_PATH)
Ejemplo n.º 2
0
"""
import logging
import os
from dataclasses import dataclass

import numpy as np
import pandas as pd
import patoolib

from common.http_utils import download, url_file_name
from common.settings import DATASETS_PATH

DATASET_URL = 'https://robjhyndman.com/data/27-3-Athanasopoulos1.zip'

DATASET_PATH = os.path.join(DATASETS_PATH, 'tourism')
DATASET_FILE_PATH = os.path.join(DATASET_PATH, url_file_name(DATASET_URL))


@dataclass()
class TourismMeta:
    seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly']
    horizons = [4, 8, 24]
    frequency = [1, 4, 12]
    horizons_map = {'Yearly': 4, 'Quarterly': 8, 'Monthly': 24}
    frequency_map = {'Yearly': 1, 'Quarterly': 4, 'Monthly': 12}


@dataclass()
class TourismDataset:
    ids: np.ndarray
    groups: np.ndarray
Ejemplo n.º 3
0
"""
M3 summary unit test
"""
import os
import unittest

import numpy as np
import pandas as pd

from common.http_utils import download, url_file_name
from common.settings import TESTS_STORAGE_PATH
from summary.m3 import M3Summary

FORECASTS_URL = 'https://forecasters.org/data/m3comp/M3Forecast.xls'
FORECASTS_FILE_PATH = os.path.join(TESTS_STORAGE_PATH, 'm3', url_file_name(FORECASTS_URL))

class TestM3Summary(unittest.TestCase):
    def setUp(self) -> None:
        download(FORECASTS_URL, FORECASTS_FILE_PATH)

    def test_summary(self):
        summary = M3Summary()
        naive2 = pd.read_excel(FORECASTS_FILE_PATH, sheet_name='NAIVE2', header=None)
        naive2_forecast = np.array([ts[~np.isnan(ts)]
                                    for ts in naive2[naive2.columns[2:]].values])
        result = summary.evaluate(naive2_forecast)

        # based on http://www.forecastingprinciples.com/paperpdf/Makridakia-The%20M3%20Competition.pdf
        # Tables 13-16 and Table 6 for Average.
        self.assertEqual(result['M3Year'], 17.88)
Ejemplo n.º 4
0
 def setUp(self) -> None:
     winner_archive = os.path.join(TEST_STORAGE_PATH,
                                   url_file_name(WINNER_FORECAST_URL))
     download(WINNER_FORECAST_URL, winner_archive)
     if not os.path.isfile(WINNER_FORECAST_PATH):
         patoolib.extract_archive(winner_archive, outdir=TEST_STORAGE_PATH)
Ejemplo n.º 5
0
import pandas as pd
import patoolib
from tqdm import tqdm

from common.http_utils import download, url_file_name
from common.settings import DATASETS_PATH

TRAINING_DATASET_URL = 'https://www.m4.unic.ac.cy/wp-content/uploads/2017/12/M4DataSet.zip'
TEST_DATASET_URL = 'https://www.m4.unic.ac.cy/wp-content/uploads/2018/07/M-test-set.zip'
INFO_URL = 'https://www.m4.unic.ac.cy/wp-content/uploads/2018/12/M4Info.csv'
NAIVE2_FORECAST_URL = 'https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-Naive2.rar'

DATASET_PATH = os.path.join(DATASETS_PATH, 'm4')

TRAINING_DATASET_FILE_PATH = os.path.join(DATASET_PATH,
                                          url_file_name(TRAINING_DATASET_URL))
TEST_DATASET_FILE_PATH = os.path.join(DATASET_PATH,
                                      url_file_name(TEST_DATASET_URL))
INFO_FILE_PATH = os.path.join(DATASET_PATH, url_file_name(INFO_URL))
NAIVE2_FORECAST_FILE_PATH = os.path.join(DATASET_PATH, 'submission-Naive2.csv')

TRAINING_DATASET_CACHE_FILE_PATH = os.path.join(DATASET_PATH, 'training.npz')
TEST_DATASET_CACHE_FILE_PATH = os.path.join(DATASET_PATH, 'test.npz')


@dataclass()
class M4Dataset:
    ids: np.ndarray
    groups: np.ndarray
    frequencies: np.ndarray
    horizons: np.ndarray