def main(): X_train, y_train, X_test, y_test = get_initial_split( df_full_factorial_feat, y) for train_size in TRAIN_SIZES: for i in range(REPEAT): X_train, _, y_train, _ = train_test_split(X_train, y_train, train_size=train_size) # Train coregionalized model wandb.init(project='dispersant_screener', tags=['coregionalized', 'matern32'], save_code=True) m = build_coregionalized_model(X_train, y_train) y0, var0 = predict_coregionalized(m, X_test, 0) y1, var1 = predict_coregionalized(m, X_test, 1) metrics_0 = get_metrics(y0, y_test[:, 0]) metrics_0 = add_postfix_to_keys(metrics_0, 0) metrics_1 = get_metrics(y1, y_test[:, 1]) metrics_1 = add_postfix_to_keys(metrics_0, 1) variance_0 = get_variance_descriptors(var0) variance_1 = get_variance_descriptors(var1) variance_0 = add_postfix_to_keys(variance_0, 0) variance_1 = add_postfix_to_keys(variance_1, 1) overall_metrics = metrics_0 overall_metrics.update(metrics_1) overall_metrics.update(variance_0) overall_metrics.update(variance_1) overall_metrics['train_size'] = len(X_train) overall_metrics['coregionalized'] = True plot_parity(y0, y_test[:, 0], var0, y1, y_test[:, 1], var1, 'coregionalized_{}_{}.pdf'.format(len(X_train), i)) wandb.log(overall_metrics) # Train "simple models" wandb.init(project='dispersant_screener', tags=['matern32'], save_code=True) m0 = build_model(X_train, y_train[:, 0]) m1 = build_model(X_train, y_train[:, 1]) y0, var0 = predict(m0, X_test) y1, var1 = predict(m1, X_test) metrics_0 = get_metrics(y0, y_test[:, 0]) metrics_0 = add_postfix_to_keys(metrics_0, 0) metrics_1 = get_metrics(y1, y_test[:, 1]) metrics_1 = add_postfix_to_keys(metrics_0, 1) variance_0 = get_variance_descriptors(var0) variance_1 = get_variance_descriptors(var1) variance_0 = add_postfix_to_keys(variance_0, 0) variance_1 = add_postfix_to_keys(variance_1, 1) overall_metrics = metrics_0 overall_metrics.update(metrics_1) overall_metrics.update(variance_0) overall_metrics.update(variance_1) overall_metrics['train_size'] = len(X_train) overall_metrics['coregionalized'] = False plot_parity(y0, y_test[:, 0], var0, y1, y_test[:, 1], var1, 'coregionalized_{}_{}.pdf'.format(len(X_train), i)) wandb.log(overall_metrics)
def main(): # pylint:disable=too-many-locals, too-many-statements """Runs everything""" X_train_, y_train_, X_test, y_test = get_initial_split(df_full_factorial_feat, y) for train_size in TRAIN_SIZES: for i in range(REPEAT): X_train, _, y_train, _ = train_test_split(X_train_, y_train_, train_size=train_size) # Train coregionalized model wandb.init(project='dispersant_screener', tags=['coregionalized', 'matern32'], reinit=True) m = build_coregionalized_model(X_train, y_train) m.optimize_restarts(20) y0, var0 = predict_coregionalized(m, X_test, 0) y1, var1 = predict_coregionalized(m, X_test, 1) metrics_0 = get_metrics(y0, y_test[:, 0]) metrics_0 = add_postfix_to_keys(metrics_0, 0) metrics_1 = get_metrics(y1, y_test[:, 1]) metrics_1 = add_postfix_to_keys(metrics_0, 1) variance_0 = get_variance_descriptors(var0) variance_1 = get_variance_descriptors(var1) variance_0 = add_postfix_to_keys(variance_0, 0) variance_1 = add_postfix_to_keys(variance_1, 1) overall_metrics = metrics_0 overall_metrics.update(metrics_1) overall_metrics.update(variance_0) overall_metrics.update(variance_1) overall_metrics['train_size'] = len(X_train) overall_metrics['coregionalized'] = True METRICS.append(overall_metrics) plot_parity([(y0, y_test[:, 0], var0), (y1, y_test[:, 1], var1)], 'coregionalized_{}_{}.pdf'.format(len(X_train), i)) wandb.log(overall_metrics) wandb.join() # Train "simple models" wandb.init(project='dispersant_screener', tags=['matern32'], reinit=True) m0 = build_model(X_train, y_train, 0) m0.optimize_restarts(20) m1 = build_model(X_train, y_train, 1) m1.optimize_restarts(20) y0, var0 = predict(m0, X_test) y1, var1 = predict(m1, X_test) metrics_0 = get_metrics(y0, y_test[:, 0]) metrics_0 = add_postfix_to_keys(metrics_0, 0) metrics_1 = get_metrics(y1, y_test[:, 1]) metrics_1 = add_postfix_to_keys(metrics_0, 1) variance_0 = get_variance_descriptors(var0) variance_1 = get_variance_descriptors(var1) variance_0 = add_postfix_to_keys(variance_0, 0) variance_1 = add_postfix_to_keys(variance_1, 1) overall_metrics = metrics_0 overall_metrics.update(metrics_1) overall_metrics.update(variance_0) overall_metrics.update(variance_1) overall_metrics['train_size'] = len(X_train) overall_metrics['coregionalized'] = False METRICS.append(overall_metrics) plot_parity([(y0, y_test[:, 0], var0), (y1, y_test[:, 1], var1)], 'simple_{}_{}.pdf'.format(len(X_train), i)) wandb.log(overall_metrics) wandb.join() df = pd.DataFrame(METRICS) df.to_csv('metrics.csv')