def save_ensemble(ensemble_model, ensemble_config: Dict, model_configs: Iterable[Dict], ensemble_root_dir: str, delete_root_dir: bool = False): """ Save ensemble. Model to be loaded with 'NBeatsWeightedEnsemble.load_ensemble()' ensemble_model: model to be saved ensemble_config: ensemble config object model_configs: sequence of (model name, model configs) ensemble_root_dir: ensemble root directory delete_root_dir: whether to delete existing ensemble root directory. If false, model directories will still be deleted. """ assert isinstance(ensemble_model, NBeatsWeightedEnsemble) ensemble_root_dir = mk_clear_dir(ensemble_root_dir, delete_root_dir) ensemble_config['submodel_names'] = list(ensemble_model.submodel_names) save_config(ensemble_config, os.path.join(ensemble_root_dir, 'weighted_ensemble.yaml')) # Save ensemble config # Save submodel configs and weights model_configs = {cfg['name']: cfg for cfg in model_configs} for mdl_name, mdl in ensemble_model.sub_models.items(): cfg = model_configs[mdl_name] d = mk_clear_dir(os.path.join(ensemble_root_dir, 'mdl_' + mdl_name), True) mdl.save(os.path.join(d, 'model.mdl')) save_config(cfg, os.path.join(d, 'config.yaml')) # Save mixer weights mixer_dir = mk_clear_dir(os.path.join(ensemble_root_dir, 'mixer_' + ensemble_model.mixer.name), True) ensemble_model.mixer.save(os.path.join(mixer_dir, 'model.mdl')) import pickle with open(os.path.join(ensemble_root_dir, 'meta.pkl'), 'wb') as f: meta = (ensemble_model.m_history, ensemble_model.optimizer) pickle.dump(meta, f)
def train(config: dict, save_model: bool): name = '{}_{}H_{}'.format(config['model_type'], config['h_mult'], config['loss']) results_dir = mk_clear_dir(get_results_dir(config['load_subset']), False) model_dir = mk_clear_dir(get_model_dir(results_dir, name), False) m4 = M4Dataset(sources=M4Sources()).update_param(**config).read_source() config.update({ 'backcast_length': m4.H[Subset(config['load_subset'])] * m4.h_mult, 'forecast_length': m4.H[Subset(config['load_subset'])], 'name': name }) interpretable_mod(config) trainparms = TrainParams().update_param(**config) netparams = NBeatsParams().update_param(**config) train_data_fn = partial(m4.dataset, trainparms.epoch_sample_size, lh=config['lh']) model = NBeatsTF.create_model(netparams, trainparms) model_file_name = os.path.join(model_dir, f'model.mdl') model.train(train_data_fn, trainparms.epochs, trainparms.batch_size, save_model, model_file_name) with open(os.path.join(model_dir, 'config.yaml'), 'w') as f: yaml.dump(config, f)
def test(ensemble_config: Dict, show_plots: bool = False, gen_plots=False): results_dir = mk_clear_dir(get_results_dir(ensemble_config), False) model_dir = mk_clear_dir(get_model_dir(results_dir, ensemble_config), False) plot_dir = mk_clear_dir(get_plot_dir(model_dir), True) if gen_plots else None trainparms = TrainParams().update_param(**ensemble_config) m4 = M4Dataset(sources=M4Sources()).update_param( **ensemble_config).read_source(True) model = NBeatsWeightedEnsemble.load_ensemble(model_dir, trainparms) test_dset = m4.test_dataset() x, y, w = next(test_dset.batch(len(test_dset)).as_numpy_iterator()) yhat = model.call(x, False).numpy() test_result = 'test smape: {:.3f}'.format(smape_simple(y, yhat, w)) print(test_result) with open(os.path.join(model_dir, 'results.txt'), 'w') as f: f.write(test_result) if gen_plots: samples2print = list(range(8)) plot_past_future(x[..., 0], yhat[..., 0], y[..., 0], plot_dir, n=samples2print, show=show_plots)
def train(config: dict, save_model: bool, delete_existing: bool = False) -> tf.keras.models.Model: results_dir = mk_clear_dir(get_results_dir(config), delete_existing) model_dir = mk_clear_dir(get_model_dir(results_dir), delete_existing) m4 = M4Dataset(sources=M4Sources()).update_param(**config).read_source(config['test']) # Update backast and forecast lengths config.update({'backcast_length': m4.H[Subset(config['load_subset'])] * m4.h_mult, 'forecast_length': m4.H[Subset(config['load_subset'])]}) # For interpretable model, set seasonality theta dim to H if config['model_type'] == 'interpretable': th_dim = eval(config['thetas_dim']) assert eval(config['stack_types']) == (BlockType.TREND_BLOCK, BlockType.SEASONALITY_BLOCK) config['thetas_dim'] = str((th_dim[0], config['forecast_length'])) trainparms = TrainParams().update_param(**config) netparams = NBeatsParams().update_param(**config) train_data_fn = partial(m4.dataset, trainparms.epoch_sample_size, lh=config['lh']) model = NBeatsTF.create_model(netparams, trainparms) model_file_name = os.path.join(model_dir, f'model.mdl') model.train(train_data_fn, trainparms.epochs, trainparms.batch_size, save_model, model_file_name) with open(os.path.join(results_dir, 'config.yaml'), 'w') as f: yaml.dump(config, f) return model
def train(ensemble_config: Dict, submodel_configs: Iterable[Dict], save_model=True, resume_training=False) -> tf.keras.models.Model: results_dir = mk_clear_dir(get_results_dir(ensemble_config), False) model_dir = mk_clear_dir(get_model_dir(results_dir, ensemble_config), False) trainparms = TrainParams().update_param(**ensemble_config) m4 = M4Dataset(sources=M4Sources()).update_param( **ensemble_config).read_source() if resume_training: model = NBeatsWeightedEnsemble.load_ensemble(model_dir, trainparms) else: # Instantiate new ensemble ensemble_params = WeightedEnsembleParams().update_param( **ensemble_config) submodel_params = [] for cfg in submodel_configs: name = '{}_{}H'.format(cfg['model_type'], cfg['h_mult']) cfg.update({ 'backcast_length': m4.H[m4.load_subset] * cfg['h_mult'], 'forecast_length': m4.H[m4.load_subset], 'name': name }) interpretable_mod(cfg) submodel_params.append(NBeatsParams().update_param(**cfg)) model = NBeatsWeightedEnsemble.create_model(ensemble_params, submodel_params) save_fun = partial(NBeatsWeightedEnsemble.save_ensemble, ensemble_config=ensemble_config, model_configs=submodel_configs, ensemble_root_dir=model_dir, delete_root_dir=False) train_data_fn = partial(m4.dataset, trainparms.epoch_sample_size, lh=ensemble_config['lh']) model.train(trainparms=trainparms, train_data_fn=train_data_fn, epochs=trainparms.epochs, batch_size=trainparms.batch_size, save_model=save_model, save_callback=save_fun, run_eagerly=False) return model
def train(config: dict, save_model: bool, delete_existing: bool = False) -> tf.keras.models.Model: results_dir = mk_clear_dir(get_results_dir(config), delete_existing) model_dir = mk_clear_dir(get_model_dir(results_dir), delete_existing) m4 = M4Dataset(sources=M4SourcesLite()).update_param(**config).read_source(config['test']) # Update backast and forecast lengths config.update({'backcast_length': m4.H[Subset(config['load_subset'])] * m4.h_mult, 'forecast_length': m4.H[Subset(config['load_subset'])]}) trainparms = TrainParams().update_param(**config) netparams = NBeatsParams().update_param(**config) train_data_fn = partial(m4.dataset, trainparms.epoch_sample_size, lh=config['lh']) model = NBeatsTF.create_model(netparams, trainparms) model_file_name = os.path.join(model_dir, f'model.mdl') model.train(train_data_fn, trainparms.epochs, trainparms.batch_size, save_model, model_file_name) with open(os.path.join(results_dir, 'config.yaml'), 'w') as f: yaml.dump(config, f) return model
def test_ensemble(subset: Subset, gen_plots: bool = False, show_plots: bool = False): config = load_config('ensemble.yaml') config['load_subset'] = subset.value results_dir = get_results_dir(subset.value) # Load model model = NBeatsEnsemble(EnsembleParams().update_param(**config)) params = [] for d in Path(results_dir).rglob(f'mdl_*'): with open(os.path.join(d, 'config.yaml')) as f: params.append((NBeatsParams().update_param(**yaml.safe_load(f)), os.path.join(d, f'model.mdl'))) model.load(params) m4 = M4Dataset(sources=M4Sources()).update_param( **config).read_source(True) test_dset = m4.test_dataset() x, y, w = next(test_dset.batch(len(test_dset)).as_numpy_iterator()) model_outs = dict() yhat = model.call(x, False, model_outputs=model_outs).numpy() test_result = 'test smape: {:.3f}'.format(smape_simple(y, yhat, w)) print(test_result) with open( os.path.join(results_dir, f'{model.aggregation_method}_results.txt'), 'w') as f: f.write(test_result) if len(model_outs) > 0: import pickle with open( os.path.join(results_dir, f'{model.aggregation_method}_model_outputs.pkl'), 'wb') as f: pickle.dump({k: v.numpy() for k, v in model_outs.items()}, f) if gen_plots: plot_dir = mk_clear_dir( get_plot_dir(results_dir, model.aggregation_method), True) samples2print = list(range(64)) plot_past_future(x[..., 0], yhat[..., 0], y[..., 0], plot_dir, n=samples2print, show=show_plots)
def test(config: dict, gen_plots: bool = False): results_dir = get_results_dir(config) plot_dir = mk_clear_dir(get_plot_dir(results_dir), True) if gen_plots else None model_dir = get_model_dir(results_dir) model_file_name = os.path.join(model_dir, f'model.mdl') m4 = M4Dataset(sources=M4SourcesLite()).update_param(**config).read_source(True) model = NBeatsTF.create_model(NBeatsParams().update_param(**config), TrainParams().update_param(**config)) model.load(model_file_name) test_dset = m4.test_dataset() x, y, w = next(test_dset.batch(len(test_dset)).as_numpy_iterator()) stack_coll = defaultdict(lambda: 0) yhat = model.call(x, False, stack_out=stack_coll).numpy() if gen_plots: samples2print = list(range(8)) try: if config['model_type'] == 'generic': labels = ['Stack1-Generic', 'Stack2-Generic'] elif config['model_type'] == 'interpretable': labels = ['Stack1-Interp', 'Stack2-Interp'] else: raise Exception() plot_stack(y, yhat, dict(stack_coll), samples2print, labels=labels, block_plot_cnt=2, plot_dir=plot_dir, show=False) plot_past_future(x[..., 0], yhat[..., 0], y[..., 0], plot_dir, n=samples2print, show=False) except: pass test_result = 'test smape: {:.3f}'.format(smape_simple(y, yhat, w)) print(test_result) with open(os.path.join(results_dir, 'results.txt'), 'w') as f: f.write(test_result)