def main(srl_method_name, evaluator_name, example_name, fold, seed, alpha, study, out_directory): """ Driver for BOWLOS weight learning """ # Initialize logging level, switch to DEBUG for more info. initLogging(logging_level=logging.INFO) logging.info("Performing BowlOS on {}:{}:{}".format( srl_method_name, evaluator_name, example_name)) # model specific parameters num_weights = HELPER_METHODS[srl_method_name]['get_num_weights']( example_name) predicate = EVAL_PREDICATE[example_name] logging.info("Optimizing over {} weights".format(num_weights)) # the dataframes we will be using for evaluation truth_df = load_truth_frame(example_name, fold, predicate, 'learn') observed_df = load_observed_frame(example_name, fold, predicate, 'learn') target_df = load_target_frame(example_name, fold, predicate, 'learn') get_function_value = write_get_function_value_fun( srl_method_name, example_name, fold, seed, evaluator_name, out_directory, study, truth_df, observed_df, target_df) best_weights = doLearn(num_weights, seed, get_function_value, alpha) HELPER_METHODS[srl_method_name]['write_learned_weights'](best_weights, example_name)
def calculate_experiment_performance(dataset, wl_method, evaluator, acq, folds): # initialize the experiment list that will be populated in the following for # loop with the performance outcome of each fold experiment_performance = np.array([]) for fold in folds: # load the prediction dataframe try: # prediction dataframe if METHOD == 'psl': predicted_df = load_psl_prediction_frame( dataset, wl_method, evaluator, fold, dataset_properties[dataset]['evaluation_predicate'], "acquisition_study", acq=acq) elif METHOD == 'tuffy': predicted_df = load_tuffy_prediction_frame( dataset, wl_method, evaluator, fold, dataset_properties[dataset]['evaluation_predicate'], "acquisition_study", acq=acq) else: raise ValueError( "{} not supported. Try: ['psl', 'tuffy']".format(METHOD)) except FileNotFoundError as err: print(err) continue # truth dataframe truth_df = load_truth_frame( dataset, fold, dataset_properties[dataset]['evaluation_predicate']) # observed dataframe observed_df = load_observed_frame( dataset, fold, dataset_properties[dataset]['evaluation_predicate']) # target dataframe target_df = load_target_frame( dataset, fold, dataset_properties[dataset]['evaluation_predicate']) experiment_performance = np.append( experiment_performance, evaluator_name_to_method[evaluator](predicted_df, truth_df, observed_df, target_df)) # organize into a performance_series performance_series = pd.Series(index=PERFORMANCE_COLUMNS, dtype=float) performance_series['Dataset'] = dataset performance_series['Wl_Method'] = wl_method performance_series['Acquisition_Function'] = acq performance_series['Evaluation_Method'] = evaluator performance_series['Mean'] = experiment_performance.mean() performance_series['Standard_Deviation'] = experiment_performance.std() return performance_series
def calculate_experiment_training_performance(dataset, wl_method, evaluator, folds): dirname = os.path.dirname(__file__) # initialize the experiment list that will be populated in the following for # loop with the performance outcome of each fold training_performance = np.array([]) for fold in folds: path = '{}/../results/weightlearning/{}/performance_study/{}/{}/{}/{}'.format( dirname, METHOD, dataset, wl_method, evaluator, fold ) # load the prediction dataframe try: # prediction dataframe if METHOD == 'psl': predicted_df = load_psl_prediction_frame(dataset, wl_method, evaluator, fold, dataset_properties[dataset]['evaluation_predicate'], "performance_study", "learn", inferred_predicates_file='inferred-train-predicates.txt') elif METHOD == 'tuffy': if wl_method == 'DiagonalNewton': predicted_df = load_tuffy_prediction_frame(dataset, wl_method, evaluator, fold, dataset_properties[dataset]['evaluation_predicate'], "performance_study", "learn", inferred_predicates_file='inferred-train-predicates.txt') else: raise ValueError("{} not supported. Try: ['psl', 'tuffy']".format(METHOD)) except FileNotFoundError as err: print(err) continue if wl_method == 'DiagonalNewton': # truth dataframe truth_df = load_truth_frame(dataset, fold, dataset_properties[dataset]['evaluation_predicate'], phase='learn') # observed dataframe observed_df = load_observed_frame(dataset, fold, dataset_properties[dataset]['evaluation_predicate'], phase='learn') # target dataframe target_df = load_target_frame(dataset, fold, dataset_properties[dataset]['evaluation_predicate'], phase='learn') training_performance = np.append(training_performance, evaluator_name_to_method[evaluator](predicted_df, truth_df, observed_df, target_df)) else: cmd = "cat {}/learn_out.txt".format(path) output = subprocess.getoutput(cmd) training_performance = np.append() # organize into a performance_series performance_series = pd.Series(index=PERFORMANCE_COLUMNS, dtype=float) performance_series['Dataset'] = dataset performance_series['Wl_Method'] = wl_method performance_series['Evaluation_Method'] = evaluator performance_series['Mean'] = training_performance.mean() performance_series['Standard_Deviation'] = training_performance.std() return performance_series
def calculate_experiment_performance(dataset, inference_method, evaluator, folds): # initialize the experiment list that will be populated in the following for # loop with the performance outcome of each fold fold_performance = np.array([]) for fold in folds: experiment_performance = np.array([]) path = '{}/../results/online/performance_study/{}/{}/{}/{}'.format(DIRNAME, inference_method, dataset, evaluator, fold) time_steps = [time_step for time_step in os.listdir(path) if os.path.isdir(os.path.join(path, fold))] for time_step in time_steps: # load the prediction dataframe try: predicted_df = load_psl_prediction_frame(dataset, inference_method, evaluator, fold, time_step, dataset_properties[dataset]['evaluation_predicate'], "performance_study") except FileNotFoundError as err: print(err) continue # truth dataframe truth_df = load_truth_frame(dataset, fold, time_step, dataset_properties[dataset]['evaluation_predicate']) # observed dataframe observed_df = load_observed_frame(dataset, fold, time_step, dataset_properties[dataset]['evaluation_predicate']) # target dataframe target_df = load_target_frame(dataset, fold, time_step, dataset_properties[dataset]['evaluation_predicate']) # experiment_performance will be a np array with experiment performance values indexed by the time step experiment_performance = np.append(experiment_performance, evaluator_name_to_method[evaluator](predicted_df, truth_df, observed_df, target_df)) if fold_performance.shape[0] == 0: fold_performance = np.array([experiment_performance]) else: fold_performance = np.append(fold_performance, [experiment_performance], axis=0) # organize into a performance_dataframe performance_df = pd.DataFrame(columns=PERFORMANCE_COLUMNS, index=time_steps) performance_df['Dataset'] = dataset performance_df['Inference_Method'] = inference_method performance_df['Evaluation_Method'] = evaluator performance_df['Time_Step'] = time_steps performance_df['Mean'] = fold_performance.mean(axis=0) performance_df['Standard_Error'] = fold_performance.std(axis=0, ddof=1) return performance_df
def calculate_experiment_robustness(dataset, wl_method, evaluator, iters): # initialize the experiment list that will be populated in the following for # loop with the performance outcome of each fold experiment_performance = np.array([]) # truth dataframe truth_df = load_truth_frame(dataset, FOLD, dataset_properties[dataset]['evaluation_predicate']) # observed dataframe observed_df = load_observed_frame(dataset, FOLD, dataset_properties[dataset]['evaluation_predicate']) # target dataframe target_df = load_target_frame(dataset, FOLD, dataset_properties[dataset]['evaluation_predicate']) for iter in iters: # load the prediction dataframe try: # prediction dataframe if method == 'psl': predicted_df = load_psl_prediction_frame(dataset, wl_method, evaluator, iter, dataset_properties[dataset]['evaluation_predicate'], "robustness_study") elif method == 'tuffy': predicted_df = load_tuffy_prediction_frame(dataset, wl_method, evaluator, iter, dataset_properties[dataset]['evaluation_predicate'], "robustness_study", ) else: raise ValueError("{} not supported. Try: ['psl', 'tuffy']".format(method)) except FileNotFoundError as err: print(err) continue experiment_performance = np.append(experiment_performance, evaluator_name_to_method[evaluator](predicted_df, truth_df, observed_df, target_df)) # organize into a performance_series robustness_series = pd.Series(index=ROBUSTNESS_COLUMNS, dtype=float) robustness_series['Dataset'] = dataset robustness_series['Wl_Method'] = wl_method robustness_series['Evaluation_Method'] = evaluator robustness_series['Mean'] = experiment_performance.mean() robustness_series['Standard_Deviation'] = experiment_performance.std() return robustness_series
def main(srl_method_name, evaluator_name, example_name, fold, seed, alpha, study, out_directory): """ Driver for RGS weight learning """ # path to this file relative to caller dirname = os.path.dirname(__file__) # Initialize logging level, switch to DEBUG for more info. initLogging(logging_level=logging.INFO) logging.info("Performing RGS on {}:{}:{}".format(srl_method_name, evaluator_name, example_name)) # the same grid as the default psl core implementation of RGS grid = GRID[srl_method_name] # the same number of iterations as the default psl RGS for this experiment n = 50 # model specific parameters num_weights = HELPER_METHODS[srl_method_name]['get_num_weights'](example_name) predicate = EVAL_PREDICATE[example_name] # the dataframe we will be using as ground truth for this process truth_df = load_truth_frame(example_name, fold, predicate, 'learn') observed_df = load_observed_frame(example_name, fold, predicate, 'learn') target_df = load_target_frame(example_name, fold, predicate, 'learn') # initial state if IS_HIGHER_REP_BETTER[evaluator_name]: best_performance = -np.inf else: best_performance = np.inf best_weights = np.zeros(num_weights) np.random.seed(int(seed)) for i in range(n): logging.info("Iteration {}".format(i)) # obtain a random weight configuration for the model weights = np.random.choice(grid, num_weights) logging.info("Trying Configuration: {}".format(weights)) # assign weight configuration to the model file HELPER_METHODS[srl_method_name]['write_learned_weights'](weights, example_name) # perform inference # TODO: psl file structure does not fit this pattern: wrapper_learn process = subprocess.Popen('cd {}/../{}_scripts; ./run_inference.sh {} {} {} {} {} {}'.format( dirname, srl_method_name, example_name, 'RGS', 'wrapper_learn', fold, evaluator_name, out_directory), shell=True) process.wait() # fetch results if study == "robustness_study": predicted_df = HELPER_METHODS[srl_method_name]['load_prediction_frame'](example_name, 'RGS', evaluator_name, seed, predicate, study, "learn", alpha) else: predicted_df = HELPER_METHODS[srl_method_name]['load_prediction_frame'](example_name, 'RGS', evaluator_name, fold, predicate, study, "learn", alpha) performance = EVALUATE_METHOD[evaluator_name](predicted_df, truth_df, observed_df, target_df) logging.info("Configuration Performance: {}: {}".format(evaluator_name, performance)) # update best weight configuration if improved if IS_HIGHER_REP_BETTER[evaluator_name]: if performance > best_performance: best_performance = performance best_weights = weights else: if performance < best_performance: best_performance = performance best_weights = weights # assign best weight configuration to the model file HELPER_METHODS[srl_method_name]['write_learned_weights'](best_weights, example_name)
def main(srl_method_name, evaluator_name, example_name, fold, seed, alpha, study, out_directory): """ Driver for CRGS weight learning :param srl_method_name: :param evaluator_name: :param example_name: :param fold: :param seed: :param alpha: :param study: :param out_directory: :return: """ # path to this file relative to caller dirname = os.path.dirname(__file__) # Initialize logging level, switch to DEBUG for more info. initLogging(logging_level=logging.INFO) logging.info("Performing CRGS on {}:{}:{}".format(srl_method_name, evaluator_name, example_name)) # the number of samples n = NUM_SAMPLES # the defaults from the psl core code and recentered for tuffy to allow for negative weights. weight_mean = MEAN[srl_method_name] variance = 0.20 # model specific parameters num_weights = HELPER_METHODS[srl_method_name]['get_num_weights']( example_name) predicate = EVAL_PREDICATE[example_name] # parameters for sampling distribution mean_vector = np.array([weight_mean] * num_weights) variance_matrix = np.eye(num_weights) * variance logging.info("Optimizing over {} weights".format(num_weights)) # the dataframes we will be using for evaluation truth_df = load_truth_frame(example_name, fold, predicate, 'learn') observed_df = load_observed_frame(example_name, fold, predicate, 'learn') target_df = load_target_frame(example_name, fold, predicate, 'learn') # initial state if IS_HIGHER_REP_BETTER[evaluator_name]: best_performance = -np.inf else: best_performance = np.inf best_weights = np.zeros(num_weights) print("setting seed {}".format(seed)) np.random.seed(int(seed)) for i in range(n): logging.info("Iteration {}".format(i)) # obtain a random weight configuration for the model # sample from dirichlet and randomly set the orthant weights = np.random.dirichlet( (np.ones(num_weights) * alpha)) * np.random.choice([-1, 1], num_weights) logging.info("Trying Configuration: {}".format(weights)) # assign weight configuration to the model file HELPER_METHODS[srl_method_name]['write_learned_weights'](weights, example_name) # perform inference # TODO: (Charles.) psl file structure needs to fit this pattern: wrapper_learn logging.info("writing to {}".format(out_directory)) process = subprocess.Popen( 'cd {}/../{}_scripts; ./run_inference.sh {} {} {} {} {}'.format( dirname, srl_method_name, example_name, 'wrapper_learn', fold, evaluator_name, out_directory), shell=True) logging.info("Waiting for inference") process.wait() # fetch results if study == "robustness_study": predicted_df = HELPER_METHODS[srl_method_name][ 'load_prediction_frame'](example_name, 'CRGS', evaluator_name, seed, predicate, study, alpha) else: predicted_df = HELPER_METHODS[srl_method_name][ 'load_prediction_frame'](example_name, 'CRGS', evaluator_name, fold, predicate, study, alpha) performance = EVALUATE_METHOD[evaluator_name](predicted_df, truth_df, observed_df, target_df) logging.info("Configuration Performance: {}: {}".format( evaluator_name, performance)) # update best weight configuration if improved if IS_HIGHER_REP_BETTER[evaluator_name]: if performance > best_performance: best_performance = performance best_weights = weights else: if performance < best_performance: best_performance = performance best_weights = weights # assign best weight configuration to the model file HELPER_METHODS[srl_method_name]['write_learned_weights'](best_weights, example_name)
def main(srl_method_name, evaluator_name, example_name, fold, seed, alpha, study, out_directory): """ Driver for HB weight learning :param srl_method_name: :param evaluator_name: :param example_name: :param fold: :param seed: :param study: :param out_directory: :return: """ # path to this file relative to caller dirname = os.path.dirname(__file__) # Initialize logging level, switch to DEBUG for more info. initLogging(logging_level=logging.INFO) logging.info("Performing Hyperband on {}:{}:{}".format( srl_method_name, evaluator_name, example_name)) # model specific parameters num_weights = HELPER_METHODS[srl_method_name]['get_num_weights']( example_name) predicate = EVAL_PREDICATE[example_name] logging.info("Optimizing over {} weights".format(num_weights)) # the dataframes we will be using for evaluation truth_df = load_truth_frame(example_name, fold, predicate, 'learn') observed_df = load_observed_frame(example_name, fold, predicate, 'learn') target_df = load_target_frame(example_name, fold, predicate, 'learn') # inital state np.random.seed(int(seed)) def get_random_configuration(): weights = np.random.dirichlet( (np.ones(num_weights) * alpha)) * np.random.choice([-1, 1], num_weights) return weights def run_then_return_val_loss(num_iters, weights): # assign weight configuration to the model file HELPER_METHODS[srl_method_name]['write_learned_weights'](weights, example_name) # extra options to set max number of iterations extra_options = MAX_ITER_OPTION[srl_method_name] + str( int(np.ceil(num_iters))) # perform inference # TODO: (Charles.) psl file structure needs to fit this pattern if we want to use this wrapper : wrapper_learn process = subprocess.Popen( 'cd {}/../{}_scripts; ./run_inference.sh {} {} {} {} {} {}'.format( dirname, srl_method_name, example_name, 'wrapper_learn', fold, evaluator_name, out_directory, extra_options), shell=True) process.wait() # fetch results if study == "robustness_study": predicted_df = HELPER_METHODS[srl_method_name][ 'load_prediction_frame'](example_name, 'HB', evaluator_name, seed, predicate, study, alpha) else: predicted_df = HELPER_METHODS[srl_method_name][ 'load_prediction_frame'](example_name, 'HB', evaluator_name, fold, predicate, study, alpha) # return negative if we are maximizing performance else positive if IS_HIGHER_REP_BETTER[evaluator_name]: return -EVALUATE_METHOD[evaluator_name](predicted_df, truth_df, observed_df, target_df) else: return EVALUATE_METHOD[evaluator_name](predicted_df, truth_df, observed_df, target_df) max_iter = MAX_ITER_DEFAULT[ srl_method_name] # maximum iterations/epochs per configuration eta = SURVIVAL_DEFAULT # defines downsampling rate (default=4) logeta = lambda x: np.log(x) / np.log(eta) s_max = int( logeta(max_iter) ) # number of unique executions of Successive Halving (minus one) B = ( s_max + 1 ) * max_iter # total number of iterations (without reuse) per execution of Succesive Halving (n,r) # initialize best_val = np.inf best_weights = np.zeros(num_weights) # Begin Finite Horizon Hyperband outerloop. for s in reversed(range(s_max + 1)): n = int(np.ceil(int(B / max_iter / (s + 1)) * eta**s)) # initial number of configurations r = max_iter * eta**( -s) # initial number of iterations to run configurations for # Begin Finite Horizon Successive Halving with (n,r) T = [get_random_configuration() for _ in range(n)] val_losses = [] total_iter = 0 for i in range(s + 1): # Run each of the n_i configs for r_i iterations and keep best n_i/eta n_i = n * eta**(-i) r_i = r * eta**(i) total_iter = total_iter + r_i # the standard algorithm will only run r_i iterations, but this is a continuation of the optimization # we are starting over, so to get the same effect we need to run the total number of iterations over again. # Note: Very inefficient val_losses = [ run_then_return_val_loss(num_iters=total_iter, weights=t) for t in T ] T = [ T[i] for i in np.argsort(val_losses)[0:int(np.ceil(n_i / eta))] ] logging.info( "Successive halving: (n,r) = ({}, {}) Bracket winners: Configs: {} Vals: {}" .format(n_i, r_i, T, np.sort(val_losses)[0:int(np.ceil(n_i / eta))])) tournament_winning_val = min(val_losses) logging.info( "Hyperband outerloop: (s) = ({}) Tournament winner: Config: {} Val:" .format(s, T, tournament_winning_val)) if tournament_winning_val < best_val: best_weights = T[0] # assign best weight configuration to the model file HELPER_METHODS[srl_method_name]['write_learned_weights'](best_weights, example_name)