def main(): import gp from sklearn.feature_extraction import DictVectorizer parser = argparse.ArgumentParser() parser.add_argument('--guess', type=int, default=-1, help="Run the hard-coded 'guess' values first before exploring") parser.add_argument('--boost', action="store_true", default=False, help="Use custom gradient-boosting optimization, or bayesian optimization?") utils.add_common_args(parser) args = parser.parse_args() # Encode features hsearch = HSearchEnv(cli_args=args) hypers_, hardcoded = hsearch.hypers, hsearch.hardcoded hypers_ = {k: v for k, v in hypers_.items() if k not in hardcoded} hsearch.close() # Build a matrix of features, length = max feature size max_num_vals = 0 for v in hypers_.values(): l = len(v['vals']) if l > max_num_vals: max_num_vals = l empty_obj = {k: None for k in hypers_} mat = pd.DataFrame([empty_obj.copy() for _ in range(max_num_vals)]) for k, hyper in hypers_.items(): for i, v in enumerate(hyper['vals']): mat.loc[i,k] = v mat.ffill(inplace=True) # Above is Pandas-friendly stuff, now convert to sklearn-friendly & pipe through OneHotEncoder vectorizer = DictVectorizer() vectorizer.fit(mat.T.to_dict().values()) feat_names = vectorizer.get_feature_names() # Map TensorForce actions to GP-compatible `domain` # instantiate just to get actions (get them from hypers above?) bounds = [] for k in feat_names: hyper = hypers_.get(k, False) if hyper: bounded, min_, max_ = hyper['type'] == 'bounded', min(hyper['vals']), max(hyper['vals']) b = [min_, max_] if bounded else [0, 1] bounds.append(b) def hypers2vec(obj): h = dict() for k, v in obj.items(): if k in hardcoded: continue if type(v) == bool: h[k] = float(v) else: h[k] = v or 0. return vectorizer.transform(h).toarray()[0] def vec2hypers(vec): # Reverse the encoding # https://stackoverflow.com/questions/22548731/how-to-reverse-sklearn-onehotencoder-transform-to-recover-original-data # https://github.com/scikit-learn/scikit-learn/issues/4414 reversed = vectorizer.inverse_transform([vec])[0] obj = {} for k, v in reversed.items(): if '=' not in k: obj[k] = v continue if k in obj: continue # we already handled this x=y logic (below) # Find the winner (max) option for this key score, attr, val = v, k.split('=')[0], k.split('=')[1] for k2, score2 in reversed.items(): if k2.startswith(attr + '=') and score2 > score: score, val = score2, k2.split('=')[1] obj[attr] = val # Bools come in as floats. Also, if the result is False they don't come in at all! So we start iterate # hypers now instead of nesting this logic in reversed-iteration above for k, v in hypers_.items(): if v['type'] == 'bool': obj[k] = bool(round(obj.get(k, 0.))) return obj # Specify the "loss" function (which we'll maximize) as a single rl_hsearch instantiate-and-run def loss_fn(params): hsearch = HSearchEnv(cli_args=args) reward = hsearch.execute(vec2hypers(params)) hsearch.close() return [reward] guess_i = 0 while True: # Every iteration, re-fetch from the database & pre-train new model. Acts same as saving/loading a model to disk, # but this allows to distribute across servers easily conn_runs = data.engine_runs.connect() sql = "select hypers, advantages, advantage_avg from runs where flag=:f" runs = conn_runs.execute(text(sql), f=args.net_type).fetchall() conn_runs.close() X, Y = [], [] for run in runs: X.append(hypers2vec(run.hypers)) Y.append([utils.calculate_score(run.advantages)]) boost_model = print_feature_importances(X, Y, feat_names) if args.guess != -1: guess = {k: v['guess'] for k, v in hypers_.items()} guess.update(utils.guess_overrides[args.guess][guess_i]) loss_fn(hypers2vec(guess)) guess_i += 1 if guess_i > len(utils.guess_overrides[args.guess])-1: args.guess = -1 # start on GP continue if args.boost: print('Using gradient-boosting') boost_optimization( model=boost_model, loss_fn=loss_fn, bounds=np.array(bounds), x_list=X, y_list=Y ) else: # Evidently duplicate values break GP. Many of these are ints, so they're definite duplicates. Either way, # tack on some small epsilon to make them different (1e-6 < gp.py's min threshold, make sure that #'s not a # problem). I'm concerned about this since many hypers can go below that epislon (eg learning-rate). for x in X: for i, v in enumerate(x): x[i] += np.random.random() * 1e-6 gp.bayesian_optimisation2( loss_fn=loss_fn, bounds=np.array(bounds), x_list=X, y_list=Y )
def main(): import gp from sklearn.feature_extraction import DictVectorizer parser = argparse.ArgumentParser() parser.add_argument( '--guess', type=int, default=-1, help="Run the hard-coded 'guess' values first before exploring") parser.add_argument( '--boost', action="store_true", default=False, help= "Use custom gradient-boosting optimization, or bayesian optimization?") utils.add_common_args(parser) args = parser.parse_args() # Encode features hsearch = HSearchEnv(cli_args=args) hypers_, hardcoded = hsearch.hypers, hsearch.hardcoded hypers_ = {k: v for k, v in hypers_.items() if k not in hardcoded} hsearch.close() # Build a matrix of features, length = max feature size max_num_vals = 0 for v in hypers_.values(): l = len(v['vals']) if l > max_num_vals: max_num_vals = l empty_obj = {k: None for k in hypers_} mat = pd.DataFrame([empty_obj.copy() for _ in range(max_num_vals)]) for k, hyper in hypers_.items(): for i, v in enumerate(hyper['vals']): mat.loc[i, k] = v mat.ffill(inplace=True) # Above is Pandas-friendly stuff, now convert to sklearn-friendly & pipe through OneHotEncoder vectorizer = DictVectorizer() vectorizer.fit(mat.T.to_dict().values()) feat_names = vectorizer.get_feature_names() # Map TensorForce actions to GP-compatible `domain` # instantiate just to get actions (get them from hypers above?) bounds = [] for k in feat_names: hyper = hypers_.get(k, False) bounded = False if hyper: bounded, min_, max_ = hyper['type'] == 'bounded', min( hyper['vals']), max(hyper['vals']) b = [min_, max_] if bounded else [0, 1] bounds.append(b) def hypers2vec(obj): h = dict() for k, v in obj.items(): if k in hardcoded: continue if type(v) == bool: h[k] = float(v) else: h[k] = v or 0. return vectorizer.transform(h).toarray()[0] def vec2hypers(vec): # Reverse the encoding # https://stackoverflow.com/questions/22548731/how-to-reverse-sklearn-onehotencoder-transform-to-recover-original-data # https://github.com/scikit-learn/scikit-learn/issues/4414 reversed = vectorizer.inverse_transform([vec])[0] obj = {} for k, v in reversed.items(): if '=' not in k: obj[k] = v continue if k in obj: continue # we already handled this x=y logic (below) # Find the winner (max) option for this key score, attr, val = v, k.split('=')[0], k.split('=')[1] for k2, score2 in reversed.items(): if k2.startswith(attr + '=') and score2 > score: score, val = score2, k2.split('=')[1] obj[attr] = val # Bools come in as floats. Also, if the result is False they don't come in at all! So we start iterate # hypers now instead of nesting this logic in reversed-iteration above for k, v in hypers_.items(): if v['type'] == 'bool': obj[k] = bool(round(obj.get(k, 0.))) return obj # Specify the "loss" function (which we'll maximize) as a single rl_hsearch instantiate-and-run def loss_fn(params): hsearch = HSearchEnv(cli_args=args) reward = hsearch.execute(vec2hypers(params)) hsearch.close() return [reward] guess_i = 0 while True: # Every iteration, re-fetch from the database & pre-train new model. Acts same as saving/loading a model to disk, # but this allows to distribute across servers easily conn_runs = data.engine_runs.connect() sql = "select hypers, returns from runs where flag=:f" runs = conn_runs.execute(text(sql), f=args.net_type).fetchall() conn_runs.close() X, Y = [], [] for run in runs: X.append(hypers2vec(run.hypers)) Y.append([utils.calculate_score(run.returns)]) boost_model = print_feature_importances(X, Y, feat_names) if args.guess != -1: guess = {k: v['guess'] for k, v in hypers_.items()} guess.update(utils.guess_overrides[args.guess][guess_i]) loss_fn(hypers2vec(guess)) guess_i += 1 if guess_i > len(utils.guess_overrides[args.guess]) - 1: args.guess = -1 # start on GP continue if args.boost: print('Using gradient-boosting') boost_optimization(model=boost_model, loss_fn=loss_fn, bounds=np.array(bounds), x_list=X, y_list=Y) else: # Evidently duplicate values break GP. Many of these are ints, so they're definite duplicates. Either way, # tack on some small epsilon to make them different (1e-6 < gp.py's min threshold, make sure that #'s not a # problem). I'm concerned about this since many hypers can go below that epislon (eg learning-rate). for x in X: for i, v in enumerate(x): x[i] += np.random.random() * 1e-6 gp.bayesian_optimisation2(loss_fn=loss_fn, bounds=np.array(bounds), x_list=X, y_list=Y)
def main_gp(): import gp, GPyOpt from sklearn.feature_extraction import DictVectorizer parser = argparse.ArgumentParser() parser.add_argument('-a', '--agent', type=str, default='ppo_agent', help="Agent to use (ppo_agent|dqn_agent|etc)") parser.add_argument( '-g', '--gpu_split', type=float, default=1, help="Num ways we'll split the GPU (how many tabs you running?)") parser.add_argument('-n', '--net_type', type=str, default='lstm', help="(lstm|conv2d) Which network arch to use") parser.add_argument( '--guess', action="store_true", default=False, help="Run the hard-coded 'guess' values first before exploring") parser.add_argument( '--gpyopt', action="store_true", default=False, help= "Use GPyOpt library, or use basic sklearn GP implementation? GpyOpt shows more promise, but has bugs." ) args = parser.parse_args() # Encode features hsearch = HSearchEnv(gpu_split=args.gpu_split, net_type=args.net_type) hypers_, hardcoded = hsearch.hypers, hsearch.hardcoded hypers_ = {k: v for k, v in hypers_.items() if k not in hardcoded} hsearch.close() # Build a matrix of features, length = max feature size max_num_vals = 0 for v in hypers_.values(): l = len(v['vals']) if l > max_num_vals: max_num_vals = l empty_obj = {k: None for k in hypers_} mat = pd.DataFrame([empty_obj.copy() for _ in range(max_num_vals)]) for k, hyper in hypers_.items(): for i, v in enumerate(hyper['vals']): mat.loc[i, k] = v mat.ffill(inplace=True) # Above is Pandas-friendly stuff, now convert to sklearn-friendly & pipe through OneHotEncoder vectorizer = DictVectorizer() vectorizer.fit(mat.T.to_dict().values()) feat_names = vectorizer.get_feature_names() # Map TensorForce actions to GPyOpt-compatible `domain` # instantiate just to get actions (get them from hypers above?) bounds = [] for k in feat_names: hyper = hypers_.get(k, False) if hyper: bounded, min_, max_ = hyper['type'] == 'bounded', min( hyper['vals']), max(hyper['vals']) if args.gpyopt: b = {'name': k, 'type': 'discrete', 'domain': (0, 1)} if bounded: b.update(type='continuous', domain=(min_, max_)) else: b = [min_, max_] if bounded else [0, 1] bounds.append(b) def hypers2vec(obj): h = dict() for k, v in obj.items(): if k in hardcoded: continue if type(v) == bool: h[k] = float(v) else: h[k] = v or 0. return vectorizer.transform(h).toarray()[0] def vec2hypers(vec): # Reverse the encoding # https://stackoverflow.com/questions/22548731/how-to-reverse-sklearn-onehotencoder-transform-to-recover-original-data # https://github.com/scikit-learn/scikit-learn/issues/4414 if not args.gpyopt: vec = [vec] # gp.py passes as flat, GPyOpt as wrapped reversed = vectorizer.inverse_transform(vec)[0] obj = {} for k, v in reversed.items(): if '=' not in k: obj[k] = v continue if k in obj: continue # we already handled this x=y logic (below) # Find the winner (max) option for this key score, attr, val = v, k.split('=')[0], k.split('=')[1] for k2, score2 in reversed.items(): if k2.startswith(attr + '=') and score2 > score: score, val = score2, k2.split('=')[1] obj[attr] = val # Bools come in as floats. Also, if the result is False they don't come in at all! So we start iterate # hypers now instead of nesting this logic in reversed-iteration above for k, v in hypers_.items(): if v['type'] == 'bool': obj[k] = bool(round(obj.get(k, 0.))) return obj # Specify the "loss" function (which we'll maximize) as a single rl_hsearch instantiate-and-run def loss_fn(params): hsearch = HSearchEnv(gpu_split=args.gpu_split, net_type=args.net_type) reward = hsearch.execute(vec2hypers(params)) hsearch.close() return [reward] while True: conn = data.engine.connect() sql = "SELECT hypers, reward_avg FROM runs WHERE flag=:f" runs = conn.execute(text(sql), f=args.net_type).fetchall() conn.close() X, Y = [], [] for run in runs: X.append(hypers2vec(run.hypers)) Y.append([run.reward_avg]) print_feature_importances(X, Y, feat_names) if args.guess: guesses = {k: v['guess'] for k, v in hypers_.items()} X.append(hypers2vec(guesses)) Y.append([None]) args.guess = False if args.gpyopt: pretrain = {'X': np.array(X), 'Y': np.array(Y)} if X else {} opt = GPyOpt.methods.BayesianOptimization(f=loss_fn, domain=bounds, maximize=True, **pretrain) # using max_iter=1 because of database setup. Normally you'd go until convergence, but since we're using # a database for the runs we can parallelize runs across machines (connected to the same database). Then # between each run we can grab the result from the other machines and merge with our own; so only run # once, reset the model-fitting w/ the full database (which may have grown), and repeat opt.run_optimization(max_iter=1) else: gp.bayesian_optimisation2(n_iters=1, loss_fn=loss_fn, bounds=np.array(bounds), x_list=X, y_list=Y)