Example #1
0
def main():

    model_path = ""
    data_path = ""

    if "RESULT_DIR" in os.environ:
        model_path = os.environ["RESULT_DIR"]
    if "DATA_DIR" in os.environ:
        data_path = os.environ["DATA_DIR"]

    checkpoint_path = os.path.join(model_path, "model", "checkpoint")
    data_path = os.path.join(data_path, flags.FLAGS.data)

    data = create_dataset(data_path, split=0.8)

    if flags.FLAGS.hpo:
        # Check if HPO flags set
        print("Running hyperparameter optimization")
        params = {
            "learning_rate": [1e-3, 5e-3, 1e-2],
            "n_factors": [8, 16, 32],
            "n_epochs": [50, 100]
        }
        grid = GridSearch(model_fn=NCF,
                          param_grid=params,
                          scoring_fn=evaluate_model_spark)
        optimized_params = grid.run(data)
        full_data = create_dataset(data_path)
        train_model(full_data, checkpoint_path, **optimized_params)

    else:
        train_model(data, checkpoint_path)
Example #2
0
def calc_performance(X):
    res = []
    grid = {
        'click_weight': [0.0, 0.01, 0.1, 1.0],
        'n_components': [10, 30, 50]
    }
    grid_search = GridSearch(grid=grid)

    for param in grid_search.iter_grid():
        mapk = cross_val_performance(X=X, **param)
        param['mapk'] = mapk
        res.append(param)
    pd.DataFrame(res).sort_values('mapk', ascending=False).to_csv('~/Downloads/collab-filtering.csv', index=False)
def main():
    parser = argparse.ArgumentParser(
        description='Optimizing number of polygons',
        prog='genetic-optimization')
    parser.add_argument('image',
                        type=str,
                        help='Path to image e.g. /images/pic.jpg')
    results = parser.parse_args()

    parameters = {
        'image': results.image,
        'population_size': 50,
        'polygons_count': 25,
        'time_to_run': [.1],
        'retain': [0.1, 0.2, 0.3, .5, .8],
        'mutate': [.5, .6, .9]
    }
    grid_search = GridSearch(**parameters)
    grid_search.search()
Example #4
0
def calc_cv_performance(train):
    res = []
    grid = {
        'max_depth': [None, 3],
        'max_features': ['sqrt', .4]
    }
    grid_search = GridSearch(grid=grid)

    time_aggs = [None, 'day']

    for time_agg in time_aggs:
        X_train, y_train, _, _ = preprocess_X(X=train, time_agg=time_agg)
        for param in grid_search.iter_grid():
            print(f'{datetime.datetime.now()} \tTesting {param} with {time_agg} time_agg')
            model = RandomForestClassifier(**param)
            train_mapk, val_mapk = cross_val_performance(X=X_train, y=y_train, model=model, calc_train_performance=True)
            param['train_mapk'] = train_mapk
            param['val_mapk'] = val_mapk
            param['time_agg'] = time_agg
            res.append(param)
    res = pd.DataFrame(res)
    res.sort_values('val_mapk', ascending=False).to_csv('~/Downloads/rf.csv', index=False)

    return res
Example #5
0
def main():
    MODEL_NAME = input(
        "Please introduce model name (dgn, conv3d, lstm_bucketing, lstm_sliding): "
    )
    GA = input(
        "Do you want to use genetic algorithm for your model? (yes/no): ")
    GS = input("Do you want to use grid search for your model? (yes/no): ")

    time_str = time.strftime("%Y-%m-%d_%H %M")
    path = PATH_SAVE_FIG + str(time_str)

    if MODEL_NAME == 'dgn':
        parameters = PARAMETERS_DGN
        model_function = model.dgn_model
    elif MODEL_NAME == 'conv3d':
        parameters = PARAMETERS_CONV3D
        model_function = model.conv3d_model
    elif (MODEL_NAME == 'lstm_bucketing') or \
         (MODEL_NAME == 'lstm_sliding'):
        parameters = PARAMETERS_LSTM
        model_function = model.lstm_model

    print('\nLoad dataset...')
    data_set = load_data(MODEL_NAME)
    print('Done! \n')

    if GA == 'no' and GS == 'no':
        one_train(path, data_set, model_function, parameters)

    if GA == 'yes':
        optim = GeneticAlgorithm(path, parameters, model_function, data_set)
        optim.run()

    if GS == 'yes':
        optim = GridSearch(path, parameters, model_function, data_set)
        optim.run()
Example #6
0
# -------- import GridSearch and define/import the compile function -------- #
import sys
sys.path.append('SIGS-Grid-Search')
from grid_search import GridSearch

# -------- main file to run -------- #
main_file = '-m spinup.run'

# -------- define dictionary of arguments for grid search -------- #
args = {
    'algo': ['ppo'],
    'env': ['CartPole-v0'],
    'steps_per_epoch': [4000],
    'epochs': [500],
    'seed': [0, 1, 2],
    'num_cpu': [2]
}

# -------- create GridSearch object and run -------- #
import grid_search
print(grid_search)
myGridSearch = GridSearch(main_file, args, num_process=15)
myGridSearch.run()
Example #7
0
 def __init__(self):
     with open(os.path.join(dir_path, "value_function.pkl"), "rb") as f:
         self.v_f = pickle.load(f)
     self.gamma = 0.9
     self.advantage_fun = None
     self.gridsearch = GridSearch()
Example #8
0
class Agent(object):
    """Agent for dispatching and reposition"""
    def __init__(self):
        with open(os.path.join(dir_path, "value_function.pkl"), "rb") as f:
            self.v_f = pickle.load(f)
        self.gamma = 0.9
        self.advantage_fun = None
        self.gridsearch = GridSearch()

    def dispatch(self, dispatch_observ):
        """ Compute the assignment between drivers and passengers at each time step
        :param dispatch_observ: a list of dict, the key in the dict includes:
            order_id, int
            driver_id, int
            order_driver_distance, float
            order_start_location, a list as [lng, lat], float
            order_finish_location, a list as [lng, lat], float
            driver_location, a list as [lng, lat], float
            timestamp, int
            order_finish_timestamp, int
            day_of_week, int
            reward_units, float
            pick_up_eta, float

        :return: a list of dict, the key in the dict includes:
            order_id and driver_id, the pair indicating the assignment
        """
        # 做driver_id映射
        order_set = set()
        driver_set = set()
        for od in dispatch_observ:
            order_set.add(od["order_id"])
            driver_set.add(od["driver_id"])
        driver_id_dict = dict(zip(driver_set, range(len(driver_set))))
        # driver_id_refresh_dict = dict(zip(range(len(driver_set)), driver_set))

        # 计算advantage_fun
        self.advantage_fun = pd.DataFrame(np.zeros(
            [len(driver_set), len(order_set)]),
                                          columns=list(order_set),
                                          index=driver_id_dict.values())
        for od in dispatch_observ:
            oid, did = od["order_id"], driver_id_dict[od["driver_id"]]
            fgrid, lgrid = self.gridsearch.cal_loc_grid(
                [od["driver_location"], od["order_finish_location"]])
            ftid = self.cal_time_index_for_day(od["timestamp"])
            ltid = self.cal_time_index_for_day(od["order_finish_timestamp"])
            self.advantage_fun[oid][did] = pow(
                self.gamma, ltid - ftid) * self.v_f[ltid][lgrid] - self.v_f[
                    ftid][fgrid] + od["reward_units"]

        # 标准化输入格式
        rec_num = len(dispatch_observ)
        order_mat_row, order_mat_col = [None] * rec_num, [None] * rec_num
        driver_mat_row, driver_mat_col = [None] * rec_num, [None] * rec_num
        order_idx_gmv = np.zeros(rec_num)
        order_num, driver_num = 0, 0
        for idx, od in enumerate(dispatch_observ):
            oid, did = od["order_id"], driver_id_dict[od["driver_id"]]
            order_num = max(order_num, oid + 1)
            driver_num = max(driver_num, did + 1)
            order_idx_gmv[idx] = self.advantage_fun[oid][did]
            order_mat_row[idx], order_mat_col[idx] = oid, idx
            driver_mat_row[idx], driver_mat_col[idx] = did, idx
        order_mat = sp.coo_matrix(
            (np.ones(rec_num), (order_mat_row, order_mat_col)),
            shape=(order_num, rec_num))
        driver_mat = sp.coo_matrix(
            (np.ones(rec_num), (driver_mat_row, driver_mat_col)),
            shape=(driver_num, rec_num))

        # cvxpy求解凸优化问题
        X = cvx.Variable(rec_num, boolean=True)
        obj = order_idx_gmv * X
        constr = [
            order_mat * X <= 1,
            driver_mat * X <= 1,
        ]
        prob = cvx.Problem(cvx.Maximize(obj), constr)
        prob.solve(solver=cvx.GLPK_MI,
                   glpk={
                       'msg_lev': 'GLP_MSG_OFF',
                       'presolve': 'GLP_ON'
                   })
        # opt_v = prob.value
        opt_X = X.value

        # 输出格式标准化
        dispatch_action = []
        for idx, od in enumerate(dispatch_observ):
            if opt_X[idx] == 1:
                oid, did = od["order_id"], od["driver_id"]
                dispatch_action.append(dict(order_id=oid, driver_id=did))
        return dispatch_action

    def reposition(self, repo_observ):
        """ Compute the reposition action for the given drivers
        :param repo_observ: a dict, the key in the dict includes:
            timestamp: int
            driver_info: a list of dict, the key in the dict includes:
                driver_id: driver_id of the idle driver in the treatment group, int
                grid_id: id of the grid the driver is located at, str
            day_of_week: int

        :return: a list of dict, the key in the dict includes:
            driver_id: corresponding to the driver_id in the od_list
            destination: id of the grid the driver is repositioned to, str
        """
        repo_action = []
        for driver in repo_observ['driver_info']:
            # the default reposition is to let drivers stay where they are
            repo_action.append({
                'driver_id': driver['driver_id'],
                'destination': driver['grid_id']
            })
        return repo_action

    def cal_time_index_for_day(self,
                               timestamp,
                               dispatch_freqency_gap=300,
                               BASEHOUR=0) -> int:
        """
        args:绝对时间  tid间隔,默认5m  tid=0对应当天时间,默认0:00
        return:tid
        """
        ts = time.localtime(timestamp)
        tid = tid = ((ts[3] - BASEHOUR) * 3600 + ts[4] * 60 + ts[5]) // 300
        return tid
Example #9
0
    [X_est, y_est] = load_data(data_path,
                               names[i],
                               estimation_range[i],
                               grayscale=False)
    [X_pred, y_pred] = load_data(data_path,
                                 names[i],
                                 prediction_range[i],
                                 grayscale=False)

    alpha_range = np.arange(a[i].get('min'), a[i].get('max'), a[i].get('step'))
    rho_range = np.arange(r[i].get('min'), r[i].get('max'),
                          r[i].get('step')) / 10

    parameters = {'alpha': alpha_range, 'rho': rho_range}
    gs = GridSearch(rgbEstimatorAdaptative(metric="f1"), parameters)
    gs.fitAndPredict(X_est, X_pred, None, y_pred)

    fig = plt.figure()
    ax = fig.gca(projection='3d')

    X, Y = np.meshgrid(rho_range, alpha_range)
    Z = np.array(gs.results).reshape(len(alpha_range), len(rho_range))

    print('best_metric: ' + str(gs.best_score))
    print('best_params: ' + str(gs.best_params))

    # Plot the surface.
    ax.set_zlim(0, 1)
    ax.set_title(names[i])
    ax.set_xlabel('rho')
Example #10
0
        "n_estimators": [10, 50, 100, 200, 400, 750, 800, 1000, 2000],
        "base_estimator__max_depth": [1, 2, 3, 5],
        "base_estimator__random_state": [0],
        "random_state": [0]
    }
    #    params = {"C": [0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000]}

    datamanager = CaltechManager()
    categories = [
        c for c in os.listdir(datamanager.PATHS["CATEGORIES_DIR"])
        if c != datamanager.BACKGROUND and os.path.splitext(c)[1] != ".py"
    ]

    #kernels, gammas = build_train_kernels(categories, datamanager)
    #print "Finished building kernels"

    #grids = (GridSearch(SVC(kernel="precomputed"), c) for c in categories)
    # grids = (GridSearch(RandomForestClassifier(), c) for c in categories)

    grids = [
        GridSearch(AdaBoostClassifier(), datamanager, c) for c in categories
    ]

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        for g in grids:
            g.grid_search(params, weight_samples=False)
        generate_evaluation_summary(grids, "grid_test.csv")

    print "Total execution time: %f minutes" % ((time.time() - total) / 60.0)
                        metavar='N',
                        type=str,
                        nargs='+',
                        help='.pkl grid search file.')
    parser.add_argument('--topk',
                        help='show top k examples.',
                        type=int,
                        default=3)
    parser.add_argument('--show',
                        help='show grid search plots.',
                        action='store_true')
    args = parser.parse_args()

    # load grid search file
    print("Loading file", args.file)
    gs = GridSearch.load(args.file[0])

    # print statistics
    stats = gs.get_stats()
    print("")
    print("Stats:")
    print(" - mean: %.5f, med: %.5f, std: %.5f" %
          (stats['mean'], stats['median'], stats['std']))
    print(" - min:  %.5f, max: %.5f" % (stats['min'], stats['max']))
    print(" - eval: %d, total: %d" % (stats['n_eval'], stats['n_total']))

    # print best configuration
    print("")
    gs.print_best()

    # print best configuration