def download() -> None: """ Download M4 dataset if doesn't exist. """ if os.path.isdir(DATASET_PATH): logging.info(f'skip: {DATASET_PATH} directory already exists.') return download(INFO_URL, INFO_FILE_PATH) m4_ids = pd.read_csv(INFO_FILE_PATH).M4id.values def build_cache(files: str, cache_path: str) -> None: timeseries_dict = OrderedDict(list(zip(m4_ids, [[]] * len(m4_ids)))) logging.info(f'Caching {files}') for train_csv in tqdm(glob(os.path.join(DATASET_PATH, files))): dataset = pd.read_csv(train_csv) dataset.set_index(dataset.columns[0], inplace=True) for m4id, row in dataset.iterrows(): values = row.values timeseries_dict[m4id] = values[~np.isnan(values)] np.array(list(timeseries_dict.values())).dump(cache_path) download(TRAINING_DATASET_URL, TRAINING_DATASET_FILE_PATH) patoolib.extract_archive(TRAINING_DATASET_FILE_PATH, outdir=DATASET_PATH) build_cache('*-train.csv', TRAINING_DATASET_CACHE_FILE_PATH) download(TEST_DATASET_URL, TEST_DATASET_FILE_PATH) patoolib.extract_archive(TEST_DATASET_FILE_PATH, outdir=DATASET_PATH) build_cache('*-test.csv', TEST_DATASET_CACHE_FILE_PATH) naive2_archive = os.path.join(DATASET_PATH, url_file_name(NAIVE2_FORECAST_URL)) download(NAIVE2_FORECAST_URL, naive2_archive) patoolib.extract_archive(naive2_archive, outdir=DATASET_PATH)
""" import logging import os from dataclasses import dataclass import numpy as np import pandas as pd import patoolib from common.http_utils import download, url_file_name from common.settings import DATASETS_PATH DATASET_URL = 'https://robjhyndman.com/data/27-3-Athanasopoulos1.zip' DATASET_PATH = os.path.join(DATASETS_PATH, 'tourism') DATASET_FILE_PATH = os.path.join(DATASET_PATH, url_file_name(DATASET_URL)) @dataclass() class TourismMeta: seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly'] horizons = [4, 8, 24] frequency = [1, 4, 12] horizons_map = {'Yearly': 4, 'Quarterly': 8, 'Monthly': 24} frequency_map = {'Yearly': 1, 'Quarterly': 4, 'Monthly': 12} @dataclass() class TourismDataset: ids: np.ndarray groups: np.ndarray
""" M3 summary unit test """ import os import unittest import numpy as np import pandas as pd from common.http_utils import download, url_file_name from common.settings import TESTS_STORAGE_PATH from summary.m3 import M3Summary FORECASTS_URL = 'https://forecasters.org/data/m3comp/M3Forecast.xls' FORECASTS_FILE_PATH = os.path.join(TESTS_STORAGE_PATH, 'm3', url_file_name(FORECASTS_URL)) class TestM3Summary(unittest.TestCase): def setUp(self) -> None: download(FORECASTS_URL, FORECASTS_FILE_PATH) def test_summary(self): summary = M3Summary() naive2 = pd.read_excel(FORECASTS_FILE_PATH, sheet_name='NAIVE2', header=None) naive2_forecast = np.array([ts[~np.isnan(ts)] for ts in naive2[naive2.columns[2:]].values]) result = summary.evaluate(naive2_forecast) # based on http://www.forecastingprinciples.com/paperpdf/Makridakia-The%20M3%20Competition.pdf # Tables 13-16 and Table 6 for Average. self.assertEqual(result['M3Year'], 17.88)
def setUp(self) -> None: winner_archive = os.path.join(TEST_STORAGE_PATH, url_file_name(WINNER_FORECAST_URL)) download(WINNER_FORECAST_URL, winner_archive) if not os.path.isfile(WINNER_FORECAST_PATH): patoolib.extract_archive(winner_archive, outdir=TEST_STORAGE_PATH)
import pandas as pd import patoolib from tqdm import tqdm from common.http_utils import download, url_file_name from common.settings import DATASETS_PATH TRAINING_DATASET_URL = 'https://www.m4.unic.ac.cy/wp-content/uploads/2017/12/M4DataSet.zip' TEST_DATASET_URL = 'https://www.m4.unic.ac.cy/wp-content/uploads/2018/07/M-test-set.zip' INFO_URL = 'https://www.m4.unic.ac.cy/wp-content/uploads/2018/12/M4Info.csv' NAIVE2_FORECAST_URL = 'https://github.com/M4Competition/M4-methods/raw/master/Point%20Forecasts/submission-Naive2.rar' DATASET_PATH = os.path.join(DATASETS_PATH, 'm4') TRAINING_DATASET_FILE_PATH = os.path.join(DATASET_PATH, url_file_name(TRAINING_DATASET_URL)) TEST_DATASET_FILE_PATH = os.path.join(DATASET_PATH, url_file_name(TEST_DATASET_URL)) INFO_FILE_PATH = os.path.join(DATASET_PATH, url_file_name(INFO_URL)) NAIVE2_FORECAST_FILE_PATH = os.path.join(DATASET_PATH, 'submission-Naive2.csv') TRAINING_DATASET_CACHE_FILE_PATH = os.path.join(DATASET_PATH, 'training.npz') TEST_DATASET_CACHE_FILE_PATH = os.path.join(DATASET_PATH, 'test.npz') @dataclass() class M4Dataset: ids: np.ndarray groups: np.ndarray frequencies: np.ndarray horizons: np.ndarray