def train(config: dict, save_model: bool): name = '{}_{}H_{}'.format(config['model_type'], config['h_mult'], config['loss']) results_dir = mk_clear_dir(get_results_dir(config['load_subset']), False) model_dir = mk_clear_dir(get_model_dir(results_dir, name), False) m4 = M4Dataset(sources=M4Sources()).update_param(**config).read_source() config.update({ 'backcast_length': m4.H[Subset(config['load_subset'])] * m4.h_mult, 'forecast_length': m4.H[Subset(config['load_subset'])], 'name': name }) interpretable_mod(config) trainparms = TrainParams().update_param(**config) netparams = NBeatsParams().update_param(**config) train_data_fn = partial(m4.dataset, trainparms.epoch_sample_size, lh=config['lh']) model = NBeatsTF.create_model(netparams, trainparms) model_file_name = os.path.join(model_dir, f'model.mdl') model.train(train_data_fn, trainparms.epochs, trainparms.batch_size, save_model, model_file_name) with open(os.path.join(model_dir, 'config.yaml'), 'w') as f: yaml.dump(config, f)
def train(config: dict, save_model: bool, delete_existing: bool = False) -> tf.keras.models.Model: results_dir = mk_clear_dir(get_results_dir(config), delete_existing) model_dir = mk_clear_dir(get_model_dir(results_dir), delete_existing) m4 = M4Dataset(sources=M4Sources()).update_param(**config).read_source(config['test']) # Update backast and forecast lengths config.update({'backcast_length': m4.H[Subset(config['load_subset'])] * m4.h_mult, 'forecast_length': m4.H[Subset(config['load_subset'])]}) # For interpretable model, set seasonality theta dim to H if config['model_type'] == 'interpretable': th_dim = eval(config['thetas_dim']) assert eval(config['stack_types']) == (BlockType.TREND_BLOCK, BlockType.SEASONALITY_BLOCK) config['thetas_dim'] = str((th_dim[0], config['forecast_length'])) trainparms = TrainParams().update_param(**config) netparams = NBeatsParams().update_param(**config) train_data_fn = partial(m4.dataset, trainparms.epoch_sample_size, lh=config['lh']) model = NBeatsTF.create_model(netparams, trainparms) model_file_name = os.path.join(model_dir, f'model.mdl') model.train(train_data_fn, trainparms.epochs, trainparms.batch_size, save_model, model_file_name) with open(os.path.join(results_dir, 'config.yaml'), 'w') as f: yaml.dump(config, f) return model
def load_ensemble(load_subset: Subset) -> tf.keras.Model: results_dir = get_results_dir(load_subset.value) ensemble_model = NBeatsEnsemble( EnsembleParams().update_param(**load_config('ensemble.yaml'))) params = [] for d in Path(results_dir).rglob('mdl_*'): with open(os.path.join(d, 'config.yaml')) as f: params.append((NBeatsParams().update_param(**yaml.safe_load(f)), os.path.join(d, f'model.mdl'))) ensemble_model.load(params) return ensemble_model
def load_ensemble(ensemble_root_dir: str, trainparms: TrainParams, metric: str = 'smape', run_eagerly: bool = False, ignore_loading_mixer_weights: bool = False) \ -> Tuple[keras.Model, Dict, Iterable[Dict]]: """ Load an already generated ensemble saved with 'NBeatsWeightedEnsemble.save_ensemble()' Return: model, ensemble config, submodel configs """ # A: Instantiate ensemble # 1 - ensemble params ensemble_cfg = load_config( os.path.join(ensemble_root_dir, 'weighted_ensemble.yaml')) ensemble_params = WeightedEnsembleParams().update_param(**ensemble_cfg) # 2 - submodel params submodel_params = dict() submodel_files = dict() submodel_configs = dict() for d in Path(ensemble_root_dir).rglob('mdl_*'): cfg_file = os.path.join(d, 'config.yaml') mdl_file = os.path.join(d, 'model.mdl') parms = NBeatsParams().update_param(**load_config(cfg_file)) if parms.name in ensemble_params.submodel_names: submodel_params[parms.name] = parms submodel_files[parms.name] = mdl_file submodel_configs[parms.name] = load_config(cfg_file) assert len( submodel_files) > 0, 'Ensemble submodels could not be loaded' # 3 - instantiate ensemble ensemble_model = NBeatsWeightedEnsemble.create_model( ensemble_params, list(submodel_params.values()), trainparms, metric, freeze_submodels=False, run_eagerly=run_eagerly) # B: Load weights # 1 - weights of sub-models [ mdl.load(submodel_files[nm]) for nm, mdl in ensemble_model.sub_models.items() ] # 2 - weights of mixer if not ignore_loading_mixer_weights: mixer_file = str(list(Path(ensemble_root_dir).rglob('mixer_*'))[0]) mixer_file = os.path.join(mixer_file, 'model.mdl') ensemble_model.mixer.load(mixer_file) return ensemble_model, ensemble_cfg, submodel_configs.values()
def test_ensemble(subset: Subset, gen_plots: bool = False, show_plots: bool = False): config = load_config('ensemble.yaml') config['load_subset'] = subset.value results_dir = get_results_dir(subset.value) # Load model model = NBeatsEnsemble(EnsembleParams().update_param(**config)) params = [] for d in Path(results_dir).rglob(f'mdl_*'): with open(os.path.join(d, 'config.yaml')) as f: params.append((NBeatsParams().update_param(**yaml.safe_load(f)), os.path.join(d, f'model.mdl'))) model.load(params) m4 = M4Dataset(sources=M4Sources()).update_param( **config).read_source(True) test_dset = m4.test_dataset() x, y, w = next(test_dset.batch(len(test_dset)).as_numpy_iterator()) model_outs = dict() yhat = model.call(x, False, model_outputs=model_outs).numpy() test_result = 'test smape: {:.3f}'.format(smape_simple(y, yhat, w)) print(test_result) with open( os.path.join(results_dir, f'{model.aggregation_method}_results.txt'), 'w') as f: f.write(test_result) if len(model_outs) > 0: import pickle with open( os.path.join(results_dir, f'{model.aggregation_method}_model_outputs.pkl'), 'wb') as f: pickle.dump({k: v.numpy() for k, v in model_outs.items()}, f) if gen_plots: plot_dir = mk_clear_dir( get_plot_dir(results_dir, model.aggregation_method), True) samples2print = list(range(64)) plot_past_future(x[..., 0], yhat[..., 0], y[..., 0], plot_dir, n=samples2print, show=show_plots)
def train(ensemble_config: Dict, submodel_configs: Iterable[Dict], save_model=True, resume_training=False) -> tf.keras.models.Model: results_dir = mk_clear_dir(get_results_dir(ensemble_config), False) model_dir = mk_clear_dir(get_model_dir(results_dir, ensemble_config), False) trainparms = TrainParams().update_param(**ensemble_config) m4 = M4Dataset(sources=M4Sources()).update_param( **ensemble_config).read_source() if resume_training: model = NBeatsWeightedEnsemble.load_ensemble(model_dir, trainparms) else: # Instantiate new ensemble ensemble_params = WeightedEnsembleParams().update_param( **ensemble_config) submodel_params = [] for cfg in submodel_configs: name = '{}_{}H'.format(cfg['model_type'], cfg['h_mult']) cfg.update({ 'backcast_length': m4.H[m4.load_subset] * cfg['h_mult'], 'forecast_length': m4.H[m4.load_subset], 'name': name }) interpretable_mod(cfg) submodel_params.append(NBeatsParams().update_param(**cfg)) model = NBeatsWeightedEnsemble.create_model(ensemble_params, submodel_params) save_fun = partial(NBeatsWeightedEnsemble.save_ensemble, ensemble_config=ensemble_config, model_configs=submodel_configs, ensemble_root_dir=model_dir, delete_root_dir=False) train_data_fn = partial(m4.dataset, trainparms.epoch_sample_size, lh=ensemble_config['lh']) model.train(trainparms=trainparms, train_data_fn=train_data_fn, epochs=trainparms.epochs, batch_size=trainparms.batch_size, save_model=save_model, save_callback=save_fun, run_eagerly=False) return model
def load_ensemble(ensemble_root_dir: str, trainparms: TrainParams, metric: str = 'smape', run_eagerly: bool = False) -> keras.Model: """ Load an already generated ensemble saved with 'NBeatsWeightedEnsemble.save_ensemble()' """ # A: Instantiate ensemble # 1 - ensemble params cfg = load_config(os.path.join(ensemble_root_dir, 'weighted_ensemble.yaml')) ensemble_params = WeightedEnsembleParams().update_param(**cfg) # 2 - submodel params submodel_params = dict() submodel_files = dict() for d in Path(ensemble_root_dir).rglob('mdl_*'): cfg_file = os.path.join(d, 'config.yaml') mdl_file = os.path.join(d, 'model.mdl') parms = NBeatsParams().update_param(**load_config(cfg_file)) if parms.name in ensemble_params.submodel_names: submodel_params[parms.name] = parms submodel_files[parms.name] = mdl_file assert len(submodel_files) > 0, 'Ensemble submodels could not be loaded' # 3 - instantiate ensemble ensemble_model = NBeatsWeightedEnsemble.create_model(ensemble_params, list(submodel_params.values())) ensemble_model.m_compile(trainparms, metric, run_eagerly) # B: Load weights # 1 - weights of sub-models [mdl.load(submodel_files[nm]) for nm, mdl in ensemble_model.sub_models.items()] # 2 - weights of mixer mixer_file = str(list(Path(ensemble_root_dir).rglob('mixer_*'))[0]) mixer_file = os.path.join(mixer_file, 'model.mdl') ensemble_model.mixer.load(mixer_file) import pickle with open(os.path.join(ensemble_root_dir, 'meta.pkl'), 'rb') as f: meta = pickle.load(f) ensemble_model.m_history = meta[0] ensemble_model.optimizer = meta[1] return ensemble_model
def train(config: dict, save_model: bool, delete_existing: bool = False) -> tf.keras.models.Model: results_dir = mk_clear_dir(get_results_dir(config), delete_existing) model_dir = mk_clear_dir(get_model_dir(results_dir), delete_existing) m4 = M4Dataset(sources=M4SourcesLite()).update_param(**config).read_source(config['test']) # Update backast and forecast lengths config.update({'backcast_length': m4.H[Subset(config['load_subset'])] * m4.h_mult, 'forecast_length': m4.H[Subset(config['load_subset'])]}) trainparms = TrainParams().update_param(**config) netparams = NBeatsParams().update_param(**config) train_data_fn = partial(m4.dataset, trainparms.epoch_sample_size, lh=config['lh']) model = NBeatsTF.create_model(netparams, trainparms) model_file_name = os.path.join(model_dir, f'model.mdl') model.train(train_data_fn, trainparms.epochs, trainparms.batch_size, save_model, model_file_name) with open(os.path.join(results_dir, 'config.yaml'), 'w') as f: yaml.dump(config, f) return model
def test(config: dict, gen_plots: bool = False): results_dir = get_results_dir(config) plot_dir = mk_clear_dir(get_plot_dir(results_dir), True) if gen_plots else None model_dir = get_model_dir(results_dir) model_file_name = os.path.join(model_dir, f'model.mdl') m4 = M4Dataset(sources=M4SourcesLite()).update_param(**config).read_source(True) model = NBeatsTF.create_model(NBeatsParams().update_param(**config), TrainParams().update_param(**config)) model.load(model_file_name) test_dset = m4.test_dataset() x, y, w = next(test_dset.batch(len(test_dset)).as_numpy_iterator()) stack_coll = defaultdict(lambda: 0) yhat = model.call(x, False, stack_out=stack_coll).numpy() if gen_plots: samples2print = list(range(8)) try: if config['model_type'] == 'generic': labels = ['Stack1-Generic', 'Stack2-Generic'] elif config['model_type'] == 'interpretable': labels = ['Stack1-Interp', 'Stack2-Interp'] else: raise Exception() plot_stack(y, yhat, dict(stack_coll), samples2print, labels=labels, block_plot_cnt=2, plot_dir=plot_dir, show=False) plot_past_future(x[..., 0], yhat[..., 0], y[..., 0], plot_dir, n=samples2print, show=False) except: pass test_result = 'test smape: {:.3f}'.format(smape_simple(y, yhat, w)) print(test_result) with open(os.path.join(results_dir, 'results.txt'), 'w') as f: f.write(test_result)