def create_dataset_Xy(files, full_event_table, added_samples_path=None): # TODO CHECK X, y, file_name = [], [], [] for file in tqdm(files): X_file, y_file = create_Xy(file, full_event_table) print(f'create_dataset_Xy: len(y_file)={len(y_file)}') X += X_file y += y_file file_name += [file] * len(y_file) if added_samples_path is not None: X_added, y_added = load_pickle(added_samples_path) X += list(X_added) * K_SAMPLES_REP y += list(y_added) * K_SAMPLES_REP return np.array(X), np.array(y), np.array(file_name)
def create_X(file_name): path = VIDEO_DIR + 'resized_' + file_name X = [] for frame in get_reader(path): frame = PREPROC_FUN(frame) if IS_FLATTEN: frame = frame.reshape((-1)) X.append(frame) return X from lib.utils import parse_args, load_pickle, seconds_to_hours model = load_pickle('../output/models/model_ui.pickle') results = [] output = '../data/test/real_start_test.csv' FILES = [ '645001_5.mp4', '645066_5.mp4', '645098_5.mp4', '645195_5.mp4', '645286_5.mp4', '645310_5.mp4', '646186_5.mp4', '648559_5.mp4' ] for file in FILES: print(file) prediction = model.predict(create_X(file)) print('predct') first, last = None, None second = None for i, y in enumerate(prediction): time = (i)
def evaluate_generator(model_name, seed, experiment_dir, dataset, use_cuda=True): """ Args: model_name: experiment_dir: dataset: use_cuda: Returns: """ torch.random.manual_seed(0) if use_cuda: device = 'cuda' else: device = 'cpu' experiment_summary = dict() experiment_summary['model_id'] = model_name experiment_summary['seed'] = seed sig_config = get_algo_config(dataset, experiment_dir) # shorthands base_config = BaseConfig(device=device) p, q = base_config.p, base_config.q # ---------------------------------------------- # Load and prepare real path. # ---------------------------------------------- x_real = load_pickle(os.path.join(os.path.dirname(experiment_dir), 'x_real.torch')).to(device) x_past, x_future = x_real[:, :p], x_real[:, p:p + q] x_future = x_real[:, p:p + q] dim = x_real.shape[-1] # ---------------------------------------------- # Load generator weights and hyperparameters # ---------------------------------------------- G_weights = load_pickle(os.path.join(experiment_dir, 'G_weights.torch')) G = SimpleGenerator(dim * p, dim, 3 * (50,), dim).to(device) G.load_state_dict(G_weights) # ---------------------------------------------- # Compute predictive score - TSTR (train on synthetic, test on real) # ---------------------------------------------- with torch.no_grad(): x_fake = G.sample(1, x_past) predict_score_dict = compute_predictive_score(x_past, x_future, x_fake) experiment_summary.update(predict_score_dict) # ---------------------------------------------- # Compute metrics and scores of the unconditional distribution. # ---------------------------------------------- with torch.no_grad(): x_fake = G.sample(q, x_past) test_metrics_dict = compute_test_metrics(x_fake, x_real) experiment_summary.update(test_metrics_dict) # ---------------------------------------------- # Compute Sig-W_1 distance. # ---------------------------------------------- if dataset in ['VAR', 'ARCH']: x_past = x_past[::10] x_future = x_future[::10] sigs_pred = calibrate_sigw1_metric(sig_config, x_future, x_past) # generate fake paths sigs_conditional = list() with torch.no_grad(): steps = 100 size = x_past.size(0) // steps for i in range(steps): x_past_sample = x_past[i * size:(i + 1) * size] if i < (steps - 1) else x_past[i * size:] sigs_fake_ce = sample_sig_fake(G, q, sig_config, x_past_sample)[0] sigs_conditional.append(sigs_fake_ce) sigs_conditional = torch.cat(sigs_conditional, dim=0) sig_w1_metric = sigcwgan_loss(sigs_pred, sigs_conditional) experiment_summary['sig_w1_metric'] = sig_w1_metric.item() # ---------------------------------------------- # Create the relevant summary plots. # ---------------------------------------------- with torch.no_grad(): _x_past = x_past.clone().repeat(5, 1, 1) if dataset in ['STOCKS', 'ECG'] else x_past.clone() x_fake_future = G.sample(q, _x_past) plot_summary(x_fake=x_fake_future, x_real=x_real, max_lag=q) plt.savefig(os.path.join(experiment_dir, 'summary.png')) plt.close() if is_multivariate(x_real): compare_cross_corr(x_fake=x_fake_future, x_real=x_real) plt.savefig(os.path.join(experiment_dir, 'cross_correl.png')) plt.close() # ---------------------------------------------- # Generate long paths # ---------------------------------------------- with torch.no_grad(): x_fake = G.sample(8000, x_past[0:1]) plot_summary(x_fake=x_fake, x_real=x_real, max_lag=q) plt.savefig(os.path.join(experiment_dir, 'summary_long.png')) plt.close() plt.plot(to_numpy(x_fake[0, :1000])) plt.savefig(os.path.join(experiment_dir, 'long_path.png')) plt.close() return experiment_summary
from lib import Processor from lib.utils import load_pickle import pandas as pd # Load trained models when model endpoint is spun up models = load_pickle("data/models/fbmodels.pickle") def get_predictions(location_name, end_date): # get the first model that has the given location name (two models may have same location name but diff zip codes) result = [None, None, None, None] found_models = [x for x in models.keys() if location_name in x] model = models[found_models[0]] start_date = pd.date_range( end=end_date, periods=14).tolist()[0] # we will predict for last 14 days if model: proc = Processor() forecast = proc.get_forcast(model, start_date, end_date) mean_percentage_uncertainty = (( (forecast.yhat_upper - forecast.yhat_lower)) / 2 / (forecast.yhat.abs())).mean() slope_data = proc.get_slope(forecast.yhat, sample_size=14) # if mean_percentage_uncertainty > 10% of predicted value, we say this is low confidence. confidence = "high" if (mean_percentage_uncertainty > 0.1): confidence = "low"
def load_graph_data(pkl_filename): sensor_ids, sensor_id_to_ind, adj_mx = load_pickle(pkl_filename) return sensor_ids, sensor_id_to_ind, adj_mx
train_table_path = args['train_table'] added_samples_path = args.get('added_samples', None) if added_samples_path is not None: print('Train with samples from', added_samples_path) full_event_table = pd.read_csv(train_table_path) full_event_table = full_event_table[full_event_table['event_type'].isin(GOOD_EVENTS)] model = train(files, full_event_table, added_samples_path) save_pickle(model, output) elif mode == 'test': print('Test') time_table_path = args['time_table'] model_path = args['model'] model = load_pickle(model_path) time_table = pd.read_csv(time_table_path) starts_dict = start_table_to_dict(time_table) for file in files: assert ('resized_' + file in starts_dict) prediction = predict(files, model, starts_dict) prediction.to_csv(output) elif mode == 'neg': print('Negsampling') model_path = args['model'] model = load_pickle(model_path) train_table_path = args['train_table'] full_event_table = pd.read_csv(train_table_path) full_event_table = full_event_table[full_event_table['event_type'].isin(GOOD_EVENTS)] X, y = generate_negsamples(files, model, full_event_table)