def main(): model_path = "" data_path = "" if "RESULT_DIR" in os.environ: model_path = os.environ["RESULT_DIR"] if "DATA_DIR" in os.environ: data_path = os.environ["DATA_DIR"] checkpoint_path = os.path.join(model_path, "model", "checkpoint") data_path = os.path.join(data_path, flags.FLAGS.data) data = create_dataset(data_path, split=0.8) if flags.FLAGS.hpo: # Check if HPO flags set print("Running hyperparameter optimization") params = { "learning_rate": [1e-3, 5e-3, 1e-2], "n_factors": [8, 16, 32], "n_epochs": [50, 100] } grid = GridSearch(model_fn=NCF, param_grid=params, scoring_fn=evaluate_model_spark) optimized_params = grid.run(data) full_data = create_dataset(data_path) train_model(full_data, checkpoint_path, **optimized_params) else: train_model(data, checkpoint_path)
def calc_performance(X): res = [] grid = { 'click_weight': [0.0, 0.01, 0.1, 1.0], 'n_components': [10, 30, 50] } grid_search = GridSearch(grid=grid) for param in grid_search.iter_grid(): mapk = cross_val_performance(X=X, **param) param['mapk'] = mapk res.append(param) pd.DataFrame(res).sort_values('mapk', ascending=False).to_csv('~/Downloads/collab-filtering.csv', index=False)
def main(): parser = argparse.ArgumentParser( description='Optimizing number of polygons', prog='genetic-optimization') parser.add_argument('image', type=str, help='Path to image e.g. /images/pic.jpg') results = parser.parse_args() parameters = { 'image': results.image, 'population_size': 50, 'polygons_count': 25, 'time_to_run': [.1], 'retain': [0.1, 0.2, 0.3, .5, .8], 'mutate': [.5, .6, .9] } grid_search = GridSearch(**parameters) grid_search.search()
def calc_cv_performance(train): res = [] grid = { 'max_depth': [None, 3], 'max_features': ['sqrt', .4] } grid_search = GridSearch(grid=grid) time_aggs = [None, 'day'] for time_agg in time_aggs: X_train, y_train, _, _ = preprocess_X(X=train, time_agg=time_agg) for param in grid_search.iter_grid(): print(f'{datetime.datetime.now()} \tTesting {param} with {time_agg} time_agg') model = RandomForestClassifier(**param) train_mapk, val_mapk = cross_val_performance(X=X_train, y=y_train, model=model, calc_train_performance=True) param['train_mapk'] = train_mapk param['val_mapk'] = val_mapk param['time_agg'] = time_agg res.append(param) res = pd.DataFrame(res) res.sort_values('val_mapk', ascending=False).to_csv('~/Downloads/rf.csv', index=False) return res
def main(): MODEL_NAME = input( "Please introduce model name (dgn, conv3d, lstm_bucketing, lstm_sliding): " ) GA = input( "Do you want to use genetic algorithm for your model? (yes/no): ") GS = input("Do you want to use grid search for your model? (yes/no): ") time_str = time.strftime("%Y-%m-%d_%H %M") path = PATH_SAVE_FIG + str(time_str) if MODEL_NAME == 'dgn': parameters = PARAMETERS_DGN model_function = model.dgn_model elif MODEL_NAME == 'conv3d': parameters = PARAMETERS_CONV3D model_function = model.conv3d_model elif (MODEL_NAME == 'lstm_bucketing') or \ (MODEL_NAME == 'lstm_sliding'): parameters = PARAMETERS_LSTM model_function = model.lstm_model print('\nLoad dataset...') data_set = load_data(MODEL_NAME) print('Done! \n') if GA == 'no' and GS == 'no': one_train(path, data_set, model_function, parameters) if GA == 'yes': optim = GeneticAlgorithm(path, parameters, model_function, data_set) optim.run() if GS == 'yes': optim = GridSearch(path, parameters, model_function, data_set) optim.run()
# -------- import GridSearch and define/import the compile function -------- # import sys sys.path.append('SIGS-Grid-Search') from grid_search import GridSearch # -------- main file to run -------- # main_file = '-m spinup.run' # -------- define dictionary of arguments for grid search -------- # args = { 'algo': ['ppo'], 'env': ['CartPole-v0'], 'steps_per_epoch': [4000], 'epochs': [500], 'seed': [0, 1, 2], 'num_cpu': [2] } # -------- create GridSearch object and run -------- # import grid_search print(grid_search) myGridSearch = GridSearch(main_file, args, num_process=15) myGridSearch.run()
def __init__(self): with open(os.path.join(dir_path, "value_function.pkl"), "rb") as f: self.v_f = pickle.load(f) self.gamma = 0.9 self.advantage_fun = None self.gridsearch = GridSearch()
class Agent(object): """Agent for dispatching and reposition""" def __init__(self): with open(os.path.join(dir_path, "value_function.pkl"), "rb") as f: self.v_f = pickle.load(f) self.gamma = 0.9 self.advantage_fun = None self.gridsearch = GridSearch() def dispatch(self, dispatch_observ): """ Compute the assignment between drivers and passengers at each time step :param dispatch_observ: a list of dict, the key in the dict includes: order_id, int driver_id, int order_driver_distance, float order_start_location, a list as [lng, lat], float order_finish_location, a list as [lng, lat], float driver_location, a list as [lng, lat], float timestamp, int order_finish_timestamp, int day_of_week, int reward_units, float pick_up_eta, float :return: a list of dict, the key in the dict includes: order_id and driver_id, the pair indicating the assignment """ # 做driver_id映射 order_set = set() driver_set = set() for od in dispatch_observ: order_set.add(od["order_id"]) driver_set.add(od["driver_id"]) driver_id_dict = dict(zip(driver_set, range(len(driver_set)))) # driver_id_refresh_dict = dict(zip(range(len(driver_set)), driver_set)) # 计算advantage_fun self.advantage_fun = pd.DataFrame(np.zeros( [len(driver_set), len(order_set)]), columns=list(order_set), index=driver_id_dict.values()) for od in dispatch_observ: oid, did = od["order_id"], driver_id_dict[od["driver_id"]] fgrid, lgrid = self.gridsearch.cal_loc_grid( [od["driver_location"], od["order_finish_location"]]) ftid = self.cal_time_index_for_day(od["timestamp"]) ltid = self.cal_time_index_for_day(od["order_finish_timestamp"]) self.advantage_fun[oid][did] = pow( self.gamma, ltid - ftid) * self.v_f[ltid][lgrid] - self.v_f[ ftid][fgrid] + od["reward_units"] # 标准化输入格式 rec_num = len(dispatch_observ) order_mat_row, order_mat_col = [None] * rec_num, [None] * rec_num driver_mat_row, driver_mat_col = [None] * rec_num, [None] * rec_num order_idx_gmv = np.zeros(rec_num) order_num, driver_num = 0, 0 for idx, od in enumerate(dispatch_observ): oid, did = od["order_id"], driver_id_dict[od["driver_id"]] order_num = max(order_num, oid + 1) driver_num = max(driver_num, did + 1) order_idx_gmv[idx] = self.advantage_fun[oid][did] order_mat_row[idx], order_mat_col[idx] = oid, idx driver_mat_row[idx], driver_mat_col[idx] = did, idx order_mat = sp.coo_matrix( (np.ones(rec_num), (order_mat_row, order_mat_col)), shape=(order_num, rec_num)) driver_mat = sp.coo_matrix( (np.ones(rec_num), (driver_mat_row, driver_mat_col)), shape=(driver_num, rec_num)) # cvxpy求解凸优化问题 X = cvx.Variable(rec_num, boolean=True) obj = order_idx_gmv * X constr = [ order_mat * X <= 1, driver_mat * X <= 1, ] prob = cvx.Problem(cvx.Maximize(obj), constr) prob.solve(solver=cvx.GLPK_MI, glpk={ 'msg_lev': 'GLP_MSG_OFF', 'presolve': 'GLP_ON' }) # opt_v = prob.value opt_X = X.value # 输出格式标准化 dispatch_action = [] for idx, od in enumerate(dispatch_observ): if opt_X[idx] == 1: oid, did = od["order_id"], od["driver_id"] dispatch_action.append(dict(order_id=oid, driver_id=did)) return dispatch_action def reposition(self, repo_observ): """ Compute the reposition action for the given drivers :param repo_observ: a dict, the key in the dict includes: timestamp: int driver_info: a list of dict, the key in the dict includes: driver_id: driver_id of the idle driver in the treatment group, int grid_id: id of the grid the driver is located at, str day_of_week: int :return: a list of dict, the key in the dict includes: driver_id: corresponding to the driver_id in the od_list destination: id of the grid the driver is repositioned to, str """ repo_action = [] for driver in repo_observ['driver_info']: # the default reposition is to let drivers stay where they are repo_action.append({ 'driver_id': driver['driver_id'], 'destination': driver['grid_id'] }) return repo_action def cal_time_index_for_day(self, timestamp, dispatch_freqency_gap=300, BASEHOUR=0) -> int: """ args:绝对时间 tid间隔,默认5m tid=0对应当天时间,默认0:00 return:tid """ ts = time.localtime(timestamp) tid = tid = ((ts[3] - BASEHOUR) * 3600 + ts[4] * 60 + ts[5]) // 300 return tid
[X_est, y_est] = load_data(data_path, names[i], estimation_range[i], grayscale=False) [X_pred, y_pred] = load_data(data_path, names[i], prediction_range[i], grayscale=False) alpha_range = np.arange(a[i].get('min'), a[i].get('max'), a[i].get('step')) rho_range = np.arange(r[i].get('min'), r[i].get('max'), r[i].get('step')) / 10 parameters = {'alpha': alpha_range, 'rho': rho_range} gs = GridSearch(rgbEstimatorAdaptative(metric="f1"), parameters) gs.fitAndPredict(X_est, X_pred, None, y_pred) fig = plt.figure() ax = fig.gca(projection='3d') X, Y = np.meshgrid(rho_range, alpha_range) Z = np.array(gs.results).reshape(len(alpha_range), len(rho_range)) print('best_metric: ' + str(gs.best_score)) print('best_params: ' + str(gs.best_params)) # Plot the surface. ax.set_zlim(0, 1) ax.set_title(names[i]) ax.set_xlabel('rho')
"n_estimators": [10, 50, 100, 200, 400, 750, 800, 1000, 2000], "base_estimator__max_depth": [1, 2, 3, 5], "base_estimator__random_state": [0], "random_state": [0] } # params = {"C": [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]} datamanager = CaltechManager() categories = [ c for c in os.listdir(datamanager.PATHS["CATEGORIES_DIR"]) if c != datamanager.BACKGROUND and os.path.splitext(c)[1] != ".py" ] #kernels, gammas = build_train_kernels(categories, datamanager) #print "Finished building kernels" #grids = (GridSearch(SVC(kernel="precomputed"), c) for c in categories) # grids = (GridSearch(RandomForestClassifier(), c) for c in categories) grids = [ GridSearch(AdaBoostClassifier(), datamanager, c) for c in categories ] with warnings.catch_warnings(): warnings.simplefilter("ignore") for g in grids: g.grid_search(params, weight_samples=False) generate_evaluation_summary(grids, "grid_test.csv") print "Total execution time: %f minutes" % ((time.time() - total) / 60.0)
metavar='N', type=str, nargs='+', help='.pkl grid search file.') parser.add_argument('--topk', help='show top k examples.', type=int, default=3) parser.add_argument('--show', help='show grid search plots.', action='store_true') args = parser.parse_args() # load grid search file print("Loading file", args.file) gs = GridSearch.load(args.file[0]) # print statistics stats = gs.get_stats() print("") print("Stats:") print(" - mean: %.5f, med: %.5f, std: %.5f" % (stats['mean'], stats['median'], stats['std'])) print(" - min: %.5f, max: %.5f" % (stats['min'], stats['max'])) print(" - eval: %d, total: %d" % (stats['n_eval'], stats['n_total'])) # print best configuration print("") gs.print_best() # print best configuration