def train_al_and_report(model_name, kernel, warp, ard): dataset_dir = os.path.join(MODEL_DIR, DATASET) try: os.makedirs(dataset_dir) except OSError: print "skipping output folder" for fold in xrange(1): fold_dir = os.path.join(SPLIT_DIR, DATASET, str(fold)) train_data = np.loadtxt(os.path.join(fold_dir, 'train')) test_data = np.loadtxt(os.path.join(fold_dir, 'test')) params_file = None output_dir = os.path.join(dataset_dir, str(fold)) try: os.makedirs(output_dir) except OSError: print "skipping output folder" if ard: iso_dir = output_dir.replace('True', 'False') params_file = os.path.join(iso_dir, 'params') # Split train into train and pool pool_data = train_data[50:] train_data = train_data[:50] metrics_list = [] while True: # Train gp gp = util.train_gp_model(train_data, kernel, warp, ard, params_file) # Get metrics on test metrics = util.get_metrics(gp, test_data) metrics_list.append([metrics[0], metrics[1], metrics[2][0], metrics[2][1], metrics[3]]) # Predict pool and select instance in pool with higher variance new_instance, new_i = util.query(gp, pool_data) # Update train and pool train_data = np.append(train_data, [new_instance], axis=0) pool_data = np.delete(pool_data, (new_i), axis=0) #if pool_data.shape[0] == 0: # break if train_data.shape[0] == 500: break print pool_data.shape gc.collect(2) # Final metrics on full train set (sanity check) gp = util.train_gp_model(train_data, kernel, warp, ard, params_file) metrics = util.get_metrics(gp, test_data) metrics_list.append([metrics[0], metrics[1], metrics[2][0], metrics[2][1], metrics[3]]) util.save_metrics_list(metrics_list, os.path.join(output_dir, 'metrics'))
def train_and_report(model_name, kernel, warp, ard): dataset_dir = os.path.join(MODEL_DIR, DATASET) try: os.makedirs(dataset_dir) except OSError: print "skipping output folder" for fold in xrange(10): fold_dir = os.path.join(SPLIT_DIR, DATASET, str(fold)) train_data = np.loadtxt(os.path.join(fold_dir, 'train')) test_data = np.loadtxt(os.path.join(fold_dir, 'test')) params_file = None output_dir = os.path.join(dataset_dir, str(fold)) try: os.makedirs(output_dir) except OSError: print "skipping output folder" if ard: iso_dir = output_dir.replace('True', 'False') params_file = os.path.join(iso_dir, 'params') gp = util.train_gp_model(train_data, kernel, warp, ard, params_file) util.save_parameters(gp, os.path.join(output_dir, 'params')) util.save_gradients(gp, os.path.join(output_dir, 'grads')) metrics = util.get_metrics(gp, test_data) util.save_metrics(metrics, os.path.join(output_dir, 'metrics')) util.save_cautious_curves(gp, test_data, os.path.join(output_dir, 'curves')) util.save_predictions(gp, test_data, os.path.join(output_dir, 'preds')) asym_metrics = util.get_asym_metrics(gp, test_data) util.save_asym_metrics(asym_metrics, os.path.join(output_dir, 'asym_metrics')) gc.collect(2) # buggy GPy has allocation cycles...
def train_and_report(model_name, kernel, warp, ard, likelihood='gaussian'): dataset_dir = os.path.join(MODEL_DIR, DATASET) try: os.makedirs(dataset_dir) except OSError: print "skipping output folder" for fold in xrange(10): fold_dir = os.path.join(SPLIT_DIR, DATASET, str(fold)) train_data = np.loadtxt(os.path.join(fold_dir, 'train')) test_data = np.loadtxt(os.path.join(fold_dir, 'test')) output_dir = os.path.join(dataset_dir, str(fold)) params_file = None if ard: iso_dir = output_dir.replace('True', 'False') params_file = os.path.join(iso_dir, 'params') gp = util.train_gp_model(train_data, kernel, warp, ard, params_file, likelihood=likelihood) metrics = util.get_metrics(gp, test_data) try: os.makedirs(output_dir) except OSError: print "skipping output folder" util.save_parameters(gp, os.path.join(output_dir, 'params')) util.save_metrics(metrics, os.path.join(output_dir, 'metrics')) #util.save_gradients(gp, os.path.join(output_dir, 'grads')) util.save_cautious_curves(gp, test_data, os.path.join(output_dir, 'curves')) util.save_predictions(gp, test_data, os.path.join(output_dir, 'preds'))