def learning_curve(seed=2, runs=10, light=False, K=2, outlier_threshold=10, granularity=1.0):
    sk = Sidekick(data_dir=data_dir, seed=seed)
    sk.load(light=light)

    rmse_failed_all = []
    rmse_success_all = []
    rmse_all = []
    accuracy_all = []
    for r in range(runs):
        projects_train, projects_test = sk.split(threshold=0.7, shuffle=True)
        n_test = len(projects_test)
        projects_validation = projects_test[:floor(n_test*3/5)]
        projects_test = projects_test[floor(n_test*3/5):]

        _, _, rmse_run, accuracy_run = one_run(projects_train, projects_test, K, outlier_threshold, granularity)
        # rmse_failed_all.append(rmse_failed_run)
        # rmse_success_all.append(rmse_success_run)
        rmse_all.append(rmse_run)
        accuracy_all.append(accuracy_run)
        # with open('rmse_failed_outlier_%s.pkl' % outlier_threshold, 'wb') as f:
        #     cp.dump(rmse_failed_all, f)
        # with open('rmse_success_outlier_%s.pkl' % outlier_threshold, 'wb') as f:
        #     cp.dump(rmse_success_all, f)
        with open('rmse_K_%s_outlier_%s_granularity_%s.pkl' % (K, outlier_threshold, granularity), 'wb') as f:
            cp.dump(rmse_all, f)
        with open('accuracy_K_%s_outlier_%s_granularity_%s.pkl' % (K, outlier_threshold, granularity), 'wb') as f:
            cp.dump(accuracy_all, f)

    return rmse_failed_all, rmse_success_all, rmse_all, accuracy_all
def learning_curve(seed=2,
                   runs=10,
                   light=False,
                   K=2,
                   outlier_threshold=10,
                   granularity=1.0):
    sk = Sidekick(data_dir=data_dir, seed=seed)
    sk.load(light=light)

    rmse_failed_all = []
    rmse_success_all = []
    rmse_all = []
    accuracy_all = []
    for r in range(runs):
        projects_train, projects_test = sk.split(threshold=0.7, shuffle=True)
        n_test = len(projects_test)
        projects_validation = projects_test[:floor(n_test * 3 / 5)]
        projects_test = projects_test[floor(n_test * 3 / 5):]

        _, _, rmse_run, accuracy_run = one_run(projects_train, projects_test,
                                               K, outlier_threshold,
                                               granularity)
        # rmse_failed_all.append(rmse_failed_run)
        # rmse_success_all.append(rmse_success_run)
        rmse_all.append(rmse_run)
        accuracy_all.append(accuracy_run)
        # with open('rmse_failed_outlier_%s.pkl' % outlier_threshold, 'wb') as f:
        #     cp.dump(rmse_failed_all, f)
        # with open('rmse_success_outlier_%s.pkl' % outlier_threshold, 'wb') as f:
        #     cp.dump(rmse_success_all, f)
        with open(
                'rmse_K_%s_outlier_%s_granularity_%s.pkl' %
            (K, outlier_threshold, granularity), 'wb') as f:
            cp.dump(rmse_all, f)
        with open(
                'accuracy_K_%s_outlier_%s_granularity_%s.pkl' %
            (K, outlier_threshold, granularity), 'wb') as f:
            cp.dump(accuracy_all, f)

    return rmse_failed_all, rmse_success_all, rmse_all, accuracy_all
Beispiel #3
0
def learning_curve(seed=2, runs=10, light=False, outlier_threshold=10):
    sk = Sidekick(data_dir=data_dir, seed=seed)
    sk.load(light=light)

    rmse_all = []
    accuracy_all = []
    for r in range(runs):
        projects_train, projects_test = sk.split(threshold=0.7, shuffle=True)
        n_test = len(projects_test)
        projects_validation = projects_test[:floor(n_test * 3 / 5)]
        projects_test = projects_test[floor(n_test * 3 / 5):]

        rmse_run, accuracy_run = one_run(projects_train, projects_test, outlier_threshold)
        rmse_all.append(rmse_run)
        accuracy_all.append(accuracy_run)

        with open('gp_rmse_outlier_%s_normalized_false.pkl' % outlier_threshold, 'wb') as f:
            cp.dump(rmse_all, f)
        with open('gp_accuracy_outlier_%s_normalized_false.pkl' % outlier_threshold, 'wb') as f:
            cp.dump(accuracy_all, f)

    return rmse_all, accuracy_all
Beispiel #4
0
def experiment(args):
    """
    Run the experiment for the given number of times.

    :param seed:                Seed to use when shuffling the data set
    :param runs:                Number of times to run the experiment
    :param light:               Whether to use a light data set (1000 projects)
    :param outlier_threshold:   Threshold of outliers to discard
    :param normalized:          Whether to use the normalized money
    :param granularity:         Level of granularity
    :return:
    """
    features = _get_extractor(args.features)

    sk = Sidekick(data_dir=data_dir, seed=args.seed)
    sk.load(light=args.light)

    relative_time = np.linspace(0.025, 1, 40)

    # Construct data dict
    data_rmse = {
        "plot_label": args.features,
        "x": relative_time,
        "y": [],
        "args": vars(args),
        "timestamp": time.time()
    }
    data_accuracy = {
        "plot_label": args.features,
        "x": relative_time,
        "y": [],
        "args": vars(args),
        "timestamp": time.time()
    }
    rmse_all = []
    accuracy_all = []
    for r in range(args.runs):
        projects_train, projects_test = sk.split(threshold=0.7, shuffle=True)

        # Set which money time series to use
        for p in np.append(projects_train, projects_test):
            p.normalized = args.normalized

        # n_test = len(projects_test)
        # projects_validation = projects_test[:floor(n_test*3/5)]
        # projects_test = projects_test[floor(n_test*3/5):]

        # Run the experiment once
        rmse_run, accuracy_run = _one_run(projects_train, projects_test,
                                          relative_time, features,
                                          args.outlierThreshold,
                                          args.normalized, args.granularity)

        # Record the results
        rmse_all.append(rmse_run)
        accuracy_all.append(accuracy_run)

    data_rmse["y"] = rmse_all
    data_accuracy["y"] = accuracy_all

    # Save the results to disk
    args.metric = "rmse"
    u.save_args(data_rmse, vars(args))
    args.metric = "accuracy"
    u.save_args(data_accuracy, vars(args))

    return rmse_all, accuracy_all