def learning_curve(seed=2, runs=10, light=False, K=2, outlier_threshold=10, granularity=1.0): sk = Sidekick(data_dir=data_dir, seed=seed) sk.load(light=light) rmse_failed_all = [] rmse_success_all = [] rmse_all = [] accuracy_all = [] for r in range(runs): projects_train, projects_test = sk.split(threshold=0.7, shuffle=True) n_test = len(projects_test) projects_validation = projects_test[:floor(n_test*3/5)] projects_test = projects_test[floor(n_test*3/5):] _, _, rmse_run, accuracy_run = one_run(projects_train, projects_test, K, outlier_threshold, granularity) # rmse_failed_all.append(rmse_failed_run) # rmse_success_all.append(rmse_success_run) rmse_all.append(rmse_run) accuracy_all.append(accuracy_run) # with open('rmse_failed_outlier_%s.pkl' % outlier_threshold, 'wb') as f: # cp.dump(rmse_failed_all, f) # with open('rmse_success_outlier_%s.pkl' % outlier_threshold, 'wb') as f: # cp.dump(rmse_success_all, f) with open('rmse_K_%s_outlier_%s_granularity_%s.pkl' % (K, outlier_threshold, granularity), 'wb') as f: cp.dump(rmse_all, f) with open('accuracy_K_%s_outlier_%s_granularity_%s.pkl' % (K, outlier_threshold, granularity), 'wb') as f: cp.dump(accuracy_all, f) return rmse_failed_all, rmse_success_all, rmse_all, accuracy_all
def learning_curve(seed=2, runs=10, light=False, K=2, outlier_threshold=10, granularity=1.0): sk = Sidekick(data_dir=data_dir, seed=seed) sk.load(light=light) rmse_failed_all = [] rmse_success_all = [] rmse_all = [] accuracy_all = [] for r in range(runs): projects_train, projects_test = sk.split(threshold=0.7, shuffle=True) n_test = len(projects_test) projects_validation = projects_test[:floor(n_test * 3 / 5)] projects_test = projects_test[floor(n_test * 3 / 5):] _, _, rmse_run, accuracy_run = one_run(projects_train, projects_test, K, outlier_threshold, granularity) # rmse_failed_all.append(rmse_failed_run) # rmse_success_all.append(rmse_success_run) rmse_all.append(rmse_run) accuracy_all.append(accuracy_run) # with open('rmse_failed_outlier_%s.pkl' % outlier_threshold, 'wb') as f: # cp.dump(rmse_failed_all, f) # with open('rmse_success_outlier_%s.pkl' % outlier_threshold, 'wb') as f: # cp.dump(rmse_success_all, f) with open( 'rmse_K_%s_outlier_%s_granularity_%s.pkl' % (K, outlier_threshold, granularity), 'wb') as f: cp.dump(rmse_all, f) with open( 'accuracy_K_%s_outlier_%s_granularity_%s.pkl' % (K, outlier_threshold, granularity), 'wb') as f: cp.dump(accuracy_all, f) return rmse_failed_all, rmse_success_all, rmse_all, accuracy_all
def learning_curve(seed=2, runs=10, light=False, outlier_threshold=10): sk = Sidekick(data_dir=data_dir, seed=seed) sk.load(light=light) rmse_all = [] accuracy_all = [] for r in range(runs): projects_train, projects_test = sk.split(threshold=0.7, shuffle=True) n_test = len(projects_test) projects_validation = projects_test[:floor(n_test * 3 / 5)] projects_test = projects_test[floor(n_test * 3 / 5):] rmse_run, accuracy_run = one_run(projects_train, projects_test, outlier_threshold) rmse_all.append(rmse_run) accuracy_all.append(accuracy_run) with open('gp_rmse_outlier_%s_normalized_false.pkl' % outlier_threshold, 'wb') as f: cp.dump(rmse_all, f) with open('gp_accuracy_outlier_%s_normalized_false.pkl' % outlier_threshold, 'wb') as f: cp.dump(accuracy_all, f) return rmse_all, accuracy_all
def experiment(args): """ Run the experiment for the given number of times. :param seed: Seed to use when shuffling the data set :param runs: Number of times to run the experiment :param light: Whether to use a light data set (1000 projects) :param outlier_threshold: Threshold of outliers to discard :param normalized: Whether to use the normalized money :param granularity: Level of granularity :return: """ features = _get_extractor(args.features) sk = Sidekick(data_dir=data_dir, seed=args.seed) sk.load(light=args.light) relative_time = np.linspace(0.025, 1, 40) # Construct data dict data_rmse = { "plot_label": args.features, "x": relative_time, "y": [], "args": vars(args), "timestamp": time.time() } data_accuracy = { "plot_label": args.features, "x": relative_time, "y": [], "args": vars(args), "timestamp": time.time() } rmse_all = [] accuracy_all = [] for r in range(args.runs): projects_train, projects_test = sk.split(threshold=0.7, shuffle=True) # Set which money time series to use for p in np.append(projects_train, projects_test): p.normalized = args.normalized # n_test = len(projects_test) # projects_validation = projects_test[:floor(n_test*3/5)] # projects_test = projects_test[floor(n_test*3/5):] # Run the experiment once rmse_run, accuracy_run = _one_run(projects_train, projects_test, relative_time, features, args.outlierThreshold, args.normalized, args.granularity) # Record the results rmse_all.append(rmse_run) accuracy_all.append(accuracy_run) data_rmse["y"] = rmse_all data_accuracy["y"] = accuracy_all # Save the results to disk args.metric = "rmse" u.save_args(data_rmse, vars(args)) args.metric = "accuracy" u.save_args(data_accuracy, vars(args)) return rmse_all, accuracy_all