예제 #1
0
def main():
    X_train, y_train, X_test, y_test = get_initial_split(
        df_full_factorial_feat, y)

    for train_size in TRAIN_SIZES:
        for i in range(REPEAT):
            X_train, _, y_train, _ = train_test_split(X_train,
                                                      y_train,
                                                      train_size=train_size)

            # Train coregionalized model
            wandb.init(project='dispersant_screener',
                       tags=['coregionalized', 'matern32'],
                       save_code=True)
            m = build_coregionalized_model(X_train, y_train)
            y0, var0 = predict_coregionalized(m, X_test, 0)
            y1, var1 = predict_coregionalized(m, X_test, 1)
            metrics_0 = get_metrics(y0, y_test[:, 0])
            metrics_0 = add_postfix_to_keys(metrics_0, 0)

            metrics_1 = get_metrics(y1, y_test[:, 1])
            metrics_1 = add_postfix_to_keys(metrics_0, 1)

            variance_0 = get_variance_descriptors(var0)
            variance_1 = get_variance_descriptors(var1)
            variance_0 = add_postfix_to_keys(variance_0, 0)
            variance_1 = add_postfix_to_keys(variance_1, 1)

            overall_metrics = metrics_0
            overall_metrics.update(metrics_1)
            overall_metrics.update(variance_0)
            overall_metrics.update(variance_1)
            overall_metrics['train_size'] = len(X_train)
            overall_metrics['coregionalized'] = True

            plot_parity(y0, y_test[:, 0], var0, y1, y_test[:, 1], var1,
                        'coregionalized_{}_{}.pdf'.format(len(X_train), i))
            wandb.log(overall_metrics)

            # Train "simple models"
            wandb.init(project='dispersant_screener',
                       tags=['matern32'],
                       save_code=True)
            m0 = build_model(X_train, y_train[:, 0])
            m1 = build_model(X_train, y_train[:, 1])

            y0, var0 = predict(m0, X_test)
            y1, var1 = predict(m1, X_test)
            metrics_0 = get_metrics(y0, y_test[:, 0])
            metrics_0 = add_postfix_to_keys(metrics_0, 0)

            metrics_1 = get_metrics(y1, y_test[:, 1])
            metrics_1 = add_postfix_to_keys(metrics_0, 1)

            variance_0 = get_variance_descriptors(var0)
            variance_1 = get_variance_descriptors(var1)
            variance_0 = add_postfix_to_keys(variance_0, 0)
            variance_1 = add_postfix_to_keys(variance_1, 1)

            overall_metrics = metrics_0
            overall_metrics.update(metrics_1)
            overall_metrics.update(variance_0)
            overall_metrics.update(variance_1)
            overall_metrics['train_size'] = len(X_train)
            overall_metrics['coregionalized'] = False

            plot_parity(y0, y_test[:, 0], var0, y1, y_test[:, 1], var1,
                        'coregionalized_{}_{}.pdf'.format(len(X_train), i))

            wandb.log(overall_metrics)
예제 #2
0
def main():  # pylint:disable=too-many-locals, too-many-statements
    """Runs everything"""
    X_train_, y_train_, X_test, y_test = get_initial_split(df_full_factorial_feat, y)

    for train_size in TRAIN_SIZES:
        for i in range(REPEAT):
            X_train, _, y_train, _ = train_test_split(X_train_, y_train_, train_size=train_size)

            # Train coregionalized model
            wandb.init(project='dispersant_screener', tags=['coregionalized', 'matern32'], reinit=True)
            m = build_coregionalized_model(X_train, y_train)
            m.optimize_restarts(20)
            y0, var0 = predict_coregionalized(m, X_test, 0)
            y1, var1 = predict_coregionalized(m, X_test, 1)
            metrics_0 = get_metrics(y0, y_test[:, 0])
            metrics_0 = add_postfix_to_keys(metrics_0, 0)

            metrics_1 = get_metrics(y1, y_test[:, 1])
            metrics_1 = add_postfix_to_keys(metrics_0, 1)

            variance_0 = get_variance_descriptors(var0)
            variance_1 = get_variance_descriptors(var1)
            variance_0 = add_postfix_to_keys(variance_0, 0)
            variance_1 = add_postfix_to_keys(variance_1, 1)

            overall_metrics = metrics_0
            overall_metrics.update(metrics_1)
            overall_metrics.update(variance_0)
            overall_metrics.update(variance_1)
            overall_metrics['train_size'] = len(X_train)
            overall_metrics['coregionalized'] = True

            METRICS.append(overall_metrics)

            plot_parity([(y0, y_test[:, 0], var0), (y1, y_test[:, 1], var1)],
                        'coregionalized_{}_{}.pdf'.format(len(X_train), i))
            wandb.log(overall_metrics)
            wandb.join()

            # Train "simple models"
            wandb.init(project='dispersant_screener', tags=['matern32'], reinit=True)
            m0 = build_model(X_train, y_train, 0)
            m0.optimize_restarts(20)
            m1 = build_model(X_train, y_train, 1)
            m1.optimize_restarts(20)

            y0, var0 = predict(m0, X_test)
            y1, var1 = predict(m1, X_test)
            metrics_0 = get_metrics(y0, y_test[:, 0])
            metrics_0 = add_postfix_to_keys(metrics_0, 0)

            metrics_1 = get_metrics(y1, y_test[:, 1])
            metrics_1 = add_postfix_to_keys(metrics_0, 1)

            variance_0 = get_variance_descriptors(var0)
            variance_1 = get_variance_descriptors(var1)
            variance_0 = add_postfix_to_keys(variance_0, 0)
            variance_1 = add_postfix_to_keys(variance_1, 1)

            overall_metrics = metrics_0
            overall_metrics.update(metrics_1)
            overall_metrics.update(variance_0)
            overall_metrics.update(variance_1)
            overall_metrics['train_size'] = len(X_train)
            overall_metrics['coregionalized'] = False

            METRICS.append(overall_metrics)

            plot_parity([(y0, y_test[:, 0], var0), (y1, y_test[:, 1], var1)],
                        'simple_{}_{}.pdf'.format(len(X_train), i))

            wandb.log(overall_metrics)
            wandb.join()

    df = pd.DataFrame(METRICS)
    df.to_csv('metrics.csv')