def main(): config = tf.ConfigProto() # Avoid warning message errors os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Allowing GPU memory growth config.gpu_options.allow_growth = False K.clear_session() with tf.Session(config=config): model.learn(policy=policies.PPOPolicy, env=SubprocVecEnv([ env.make_gumball_env(), env.make_gumball_env(), env.make_gumball_env(), env.make_gumball_env(), ]), nsteps=16, # Steps per environment # nsteps=2048, # Steps per environment # total_timesteps=10000000, total_timesteps=10000000, gamma=0.99, lam=0.95, vf_coef=0.5, ent_coef=0.01, lr = lambda _:2e-4, cliprange = lambda _:0.1, # 0.1 * learning_rate max_grad_norm = 0.5, log_interval = 10 )
def main(): config = tf.ConfigProto() # Avoid warning message errors os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Allowing GPU memory growth config.gpu_options.allow_growth = True with tf.Session(config=config): model.learn(policy=policies.A2CPolicy, env=SubprocVecEnv( [env.make_train_0, env.make_train_1, env.make_train_2, env.make_train_3, env.make_train_4, env.make_train_5, env.make_train_6, env.make_train_7, env.make_train_8, env.make_train_9, env.make_train_10, env.make_train_11, env.make_train_12]), nsteps=2048, # Steps per environment total_timesteps=10000000, gamma=0.99, #discount rate lam=0.95, #lambda used to generalize advantage estimation vf_coef=0.5, # the value coefficient ent_coef=0.01, # entropy coefficient lr=2e-4, max_grad_norm=0.5, #used to have maximum gradient to avoid very big movements during the gredient log_interval=10 #print in the console each 10 updates )
def main(): # data data_path = "/home/kzk/datasets/uci_csv/spam.csv" data = np.loadtxt(data_path, delimiter=" ") y = data[:, 0] X = data[:, 1:] n = X.shape[0] X = np.hstack((X, np.reshape(np.ones(n), (n, 1)))) X_l = X X_u = X # learn C = 1 model = LSVMClassifier(multi_class="ovo", C=C) model.learn(X_l, y, X_u) # predict outputs = [] for i, x in enumerate(X): outputs_ = model.predict(x) outputs.append(outputs_[0][0]) # confusion matrix cm = confusion_matrix(y, outputs) print cm print 100.0 * np.sum(cm.diagonal()) / len(y)
def main(): # data data_path = "/home/kzk/datasets/uci_csv/iris.csv" data = np.loadtxt(data_path, delimiter=" ") y = data[:, 0] X = data[:, 1:] n = X.shape[0] X = np.hstack((X, np.reshape(np.ones(n), (n, 1)))) # learn model = RVMClassifier( max_itr=50, threshold=1e-4, learn_type="batch", multi_class="ovo", alpha_threshold=1e-24, ) model.learn(X, y) # predict outputs = [] for i, x in enumerate(X): outputs_ = model.predict(x) outputs.append(outputs_[0][0]) # confusion matrix cm = confusion_matrix(y, outputs) print cm print 100.0 * np.sum(cm.diagonal()) / len(y)
def get_score(data): target = 'label' feature = [x for x in data.columns if x not in [target, 'id']] score_list = [] for i in range(5): x_train, x_test, y_train, y_test = train_test_split(data[feature], data[target], test_size=0.4, random_state=1) pred_proba_gdt = 0 pred_proba_xgb = 0 pred_proba_rf = 0 # ff = [] for i in range(5): pred_proba_gdt += learn(x_train, y_train, x_test, i, 'GDBT')[1] pred_proba_xgb += learn(x_train, y_train, x_test, i, 'XGB')[1] pred_proba_rf += learn(x_train, y_train, x_test, i, 'RF') pred_proba = pred_proba_gdt + pred_proba_rf * 1.5 + pred_proba_xgb * 2.0 pred_max = np.max(pred_proba, axis=1) zipa = zip(*pred_proba) zz = pd.DataFrame() zz['max'] = pred_max zz['p0'] = (zipa[0] / zz['max']).astype('int') * 0 zz['p1000'] = (zipa[1] / zz['max']).astype('int') * 1000 zz['p1500'] = (zipa[2] / zz['max']).astype('int') * 1500 zz['p2000'] = (zipa[3] / zz['max']).astype('int') * 2000 zz['label'] = zz['p0'] + zz['p1000'] + zz['p1500'] + zz['p2000'] pred = zz['label'].values score = f1(pred, y_test) print 'final_score : ' + str(i), score score_list.append(score) return score_list
def main(): config = tf.ConfigProto() os.environ["CUDA_VISIBLE_DEVICES"]="0" #Allow GPU Memory Growth config.gpu_options.allow_growth = True #note: SubprocVecEnv places all our environments in a vector which will allow for us to run them simultaneously with tf.Session(config=config): #Call the learn function with all the required A2C Policy Params model.learn( policy=policies.A2CPolicy, env=SubprocVecEnv([env.make_train_0, env.make_train_1, env.make_train_2, env.make_train_3, env.make_train_4, env.make_train_5, env.make_train_6, env.make_train_7, env.make_train_8, env.make_train_9, env.make_train_10, env.make_train_11, env.make_train_12]), nsteps=2048, total_timesteps=10000000, gamma=0.99, lam=0.95, vf_coeff=0.5, ent_coeff=0.01, lr = lambda _: 2e-4, max_grad_norm = 0.5, #Avoid big gradient steps log_interval = 10 #print in our console every 10 weight updates )
def main(): config = tf.ConfigProto() # Avoid warning message errors # os.environ["CUDA_VISIBLE_DEVICES"]="0" # # Allowing GPU memory growth # In some cases it is desirable # for the process to only allocate a subset of the available memory, # or to only grow the memory usage as it is needed by the process.TensorFlow provides two configuration options on the session to control this.The first # is the allow_growth option, # which attempts to allocate only as much GPU memory based on runtime allocations, it starts out allocating very little memory, # and as sessions get run and more GPU memory is needed, we extend the GPU memory region needed by the TensorFlow process. # config.gpu_options.allow_growth = True #[env.make_train_0,env.make_train_1,env.make_train_2,env.make_train_3,env.make_train_4,env.make_train_5,env.make_train_6,env.make_train_7] #env.make_train_0,env.make_train_0,env.make_train_0,env.make_train_0,env.make_train_0,env.make_train_0,env.make_train_0,env.make_train_0 flag.on_desktop = True if flag.ON_DESKTOP: nsteps = 1 else: nsteps = 2048 with tf.Session(config=config): model.learn(policy=policies.A2CPolicy, env=SubprocVecEnv([env.make_train_0]), nsteps=nsteps, total_timesteps=1000000000, gamma=0.99, lam=0.95, vf_coef=0.5, ent_coef=0.001, lr=2e-4, max_grad_norm=0.5, log_interval=5, save_interval=5)
def main(): config = tf.ConfigProto() # Avoid warning message errors os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Allowing GPU memory growth config.gpu_options.allow_growth = False with tf.Session(config=config): model.learn( policy=policies.PPOPolicy, env=SubprocVecEnv([env.make_train_0]), # env.make_train_1, # env.make_train_2, # env.make_train_3]), # env.make_train_4, # env.make_train_5, # env.make_train_6, # env.make_train_7, # env.make_train_8, # env.make_train_9, # env.make_train_10, # env.make_train_11, # env.make_train_12]), nsteps=2048, # Steps per environment total_timesteps=100000, gamma=0.99, lam=0.95, vf_coef=0.5, ent_coef=0.01, lr=lambda _: 2e-2, cliprange=lambda _: 0.3, # 0.1 * learning_rate max_grad_norm=0.5, log_interval=10)
def main(): config = tf.ConfigProto() # Avoid warning message errors os.environ["CUDA_VISIBLE_DEVICES"]="0" # Allowing GPU memory growth config.gpu_options.allow_growth = True with tf.Session(config=config): model.learn(policy=policies.PPOPolicy, env=SubprocVecEnv([env.make_train_0, env.make_train_1, env.make_train_2, env.make_train_3, env.make_train_4, env.make_train_5, env.make_train_6, env.make_train_7, env.make_train_8, env.make_train_9, env.make_train_10, env.make_train_11, env.make_train_12]), nsteps=2048, # Steps per environment total_timesteps=10000000, gamma=0.99, lam = 0.95, vf_coef=0.5, ent_coef=0.01, lr = lambda _: 2e-4, cliprange = lambda _: 0.1, # 0.1 * learning_rate max_grad_norm = 0.5, log_interval = 10 )
def sparse_main(): from sklearn.datasets import load_svmlight_file from sklearn.metrics import confusion_matrix import time # data data_path = "/home/kzk/datasets/news20/news20.dat" (X, y) = load_svmlight_file(data_path) n = X.shape[0] X = sp.sparse.hstack( (X, sp.sparse.csr_matrix(np.reshape(np.ones(n), (n, 1))))) X_l = sp.sparse.csr_matrix(X) X_u = sp.sparse.csr_matrix(X) st = time.time() # learn model = RegularizedHPFSSLClassifier(max_itr=0, threshold=1e-4, learn_type="online", multi_class="ovo") model.learn(X_l, y, X_u) et = time.time() print "Elapsed time: %f [s]" % (et - st) # predict outputs = [] for i, x in enumerate(X): outputs_ = model.predict(x) outputs.append(outputs_[0][0]) # confusion matrix cm = confusion_matrix(y, outputs) print cm print 100.0 * np.sum(cm.diagonal()) / len(y)
def main(): config = tf.ConfigProto() os.environ["CUDA_VISIBLE_DEVICES"] = "0" config.gpu_options.allow_growth = True environment_list = [] for i in range(10): environment_list.append(env.make_env) env_vector = SubprocVecEnv(environment_list) with tf.Session(config=config): model.learn(policy=policies.A2CNetwork, env=env_vector, nsteps=2048, total_timesteps=10000000, gamma=0.99, lam=0.95, vf_coef=0.5, ent_coef=0.01, lr=2e-4, max_grad_norm=0.5, log_interval=2, restart=True)
def main(): config = tf.ConfigProto(gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=0.5)) # Avoid warning message errors os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Allowing GPU memory growth config.gpu_options.allow_growth = True with tf.Session(config=config): model.learn( policy=policies.A2CPolicy, env=SubprocVecEnv([ env.make_train_0, env.make_train_1, env.make_train_2, env.make_train_3, env.make_train_4, env.make_train_5, env.make_train_6, env.make_train_7, env.make_train_8, env.make_train_9, env.make_train_10, env.make_train_11, env.make_train_12 ]), nsteps=2048, # Steps per environment total_timesteps=10000000, gamma=0.99, lam=0.95, vf_coef=0.5, ent_coef=0.01, lr=2e-4, max_grad_norm=0.5, log_interval=10)
def main(): # data data_path = "/home/kzk/datasets/uci_csv/glass.csv" data = np.loadtxt(data_path, delimiter=" ") y = data[:, 0] X = data[:, 1:] n = X.shape[0] X = np.hstack((X, np.reshape(np.ones(n), (n, 1)))) X_l = X X_u = X # learn lam = 100 gamma_s = .001 model = LapRLSClassifier(lam=lam, normalized=False, kernel="rbf", gamma_s=gamma_s, multi_class="ovo") model.learn(X_l, y, X_u) # predict outputs = [] for i, x in enumerate(X): outputs_ = model.predict(x) outputs.append(outputs_[0][0]) # confusion matrix cm = confusion_matrix(y, outputs) print cm print 100.0 * np.sum(cm.diagonal()) / len(y)
def POST(self): partial = json.loads(web.data()) rawdata = model.build_raw_data(partial, web.ctx.environ, web.ctx.ip) identity = model.create_user(partial["name"]) whorls = model.create_get_whorls(rawdata) model.learn(whorls, identity) Session.commit() return ""
def main(game, representation, experiment, steps, n_cpu, render, logging, **kwargs): env_name = '{}-{}-v0'.format(game, representation) exp_name = get_exp_name(game, representation, experiment, **kwargs) resume = kwargs.get('resume', False) if representation == 'wide': policy = FullyConvPolicyBigMap if game == "sokoban": policy = FullyConvPolicySmallMap else: policy = CustomPolicyBigMap if game == "sokoban": policy = CustomPolicySmallMap if game == "binary": kwargs['cropped_size'] = 28 elif game == "zelda": kwargs['cropped_size'] = 22 elif game == "sokoban": kwargs['cropped_size'] = 10 n = max_exp_idx(exp_name) global log_dir if not resume: n = n + 1 log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log') if not resume: os.mkdir(log_dir) else: model = load_model(log_dir) kwargs = { **kwargs, 'render_rank': 0, 'render': render, } used_dir = log_dir if not logging: used_dir = None env = make_vec_envs(env_name, representation, log_dir, n_cpu, **kwargs) if not resume or model is None: model = PPO2(policy, env, verbose=1, tensorboard_log="./runs") else: model.set_env(env) if not logging: model.learn(total_timesteps=int(steps), tb_log_name=exp_name) else: model.learn(total_timesteps=int(steps), tb_log_name=exp_name, callback=callback)
def main(): import time from sklearn.metrics import confusion_matrix # labeled sample l_data_path = "/home/kzk/datasets/uci_csv_ssl_lrate_fixed_1_50_1_98/car/4_l.csv" data_l = np.loadtxt(l_data_path, delimiter=" ") data_l = np.hstack( (data_l, np.reshape(np.ones(data_l.shape[0]), (data_l.shape[0], 1)))) y_l = data_l[:, 0] X_l = data_l[:, 1:] # unlabeled sample u_data_path = "/home/kzk/datasets/uci_csv_ssl_lrate_fixed_1_50_1_98/car/4_u.csv" data_u = np.loadtxt(u_data_path, delimiter=" ") data_u = np.hstack( (data_u, np.reshape(np.ones(data_u.shape[0]), (data_u.shape[0], 1)))) X_u = data_u[:, 1:] # test sample t_data_path = "/home/kzk/datasets/uci_csv_ssl_lrate_fixed_1_50_1_98/car/4_t.csv" data_t = np.loadtxt(t_data_path, delimiter=" ") data_t = np.hstack( (data_t, np.reshape(np.ones(data_t.shape[0]), (data_t.shape[0], 1)))) y_t = data_t[:, 0] X_t = data_t[:, 1:] # learn st = time.time() model = RegularizedHPFSSLClassifier(max_itr=10, threshold=1e-4, learn_type="online", multi_class="ovo") model.learn(X_l, y_l, X_u) et = time.time() print "Elapsed time: %f [s]" % (et - st) # predict outputs = [] for i, x in enumerate(X_t): outputs_ = model.predict(x) outputs.append(outputs_[0][0]) # confusion matrix cm = confusion_matrix(y_t, outputs) print cm print 100.0 * np.sum(cm.diagonal()) / len(y_t)
def main(): config = tf.ConfigProto() # Avoid warning message errors os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Allowing GPU memory growth config.gpu_options.allow_growth = True with tf.Session(config=config): #load_path = "./model/mario/1-1/scratch/action_repeat_4/30/PPO/300000/model.ckpt" model.learn( policy=policies.PPOPolicy, env=SubprocVecEnv([ env.make_train_0, env.make_train_0, env.make_train_0, env.make_train_0, #env.make_train_1, #env.make_train_2, #env.make_train_3, #env.make_train_4, #env.make_train_5, #env.make_train_6, #env.make_train_7, #env.make_train_8, #env.make_train_9, #env.make_train_10, #env.make_train_11, #env.make_train_12 ]), nsteps=512, # Steps per environment total_timesteps=100000000, gamma=0.99, lam=0.95, vf_coef=0.5, ent_coef=0.01, lr=lambda _: 2e-4, cliprange=lambda _: 0.1, # 0.1 * learning_rate max_grad_norm=0.5, log_interval=4)
def main(): config = tf.ConfigProto() # Allowing GPU memory growth config.gpu_options.allow_growth = True with tf.Session(config=config): model.learn( policy=policies.A2CPolicy, env=DummyVecEnv([ env.make_train_0, env.make_train_1, env.make_train_2, env.make_train_3 ]), nsteps=2048, # Steps per environment total_timesteps=10000000, gamma=0.99, lam=0.95, vf_coef=0.5, ent_coef=0.01, lr=2e-4, max_grad_norm=0.5, log_interval=10)
def main(): config = tf.ConfigProto() os.environ["CURA_VISIBLE_DEVICES"] = "0" config.gpu_options_allow_growth = True with tf.Session(Config=config): model.learn( policy=policies.A2CPolicy, env=SubprocVecEnv([ env.make_train_0, env.make_train_1, env.make_train_2, env.make_train_3, env.make_train_4, env.make_train_5, env.make_train_6, env.make_train_7, env.make_train_8, env.make_train_9, env.make_train_10, env.make_train_11, env.make_train_12 ]), nsteps=2048, # Steps per environment total_timesteps=10000000, gamma=0.99, lam=0.95, vf_coef=0.5, ent_coef=0.01, lr=2e-4, max_grad_norm=0.5, log_interval=10)
def speak(channel, target='', target2=None): if target[:2] == '<#': modelkey = target.split('|')[0][2:] elif target[:2] == '<@': if target2: modelkey = (target, target2.split('|')[0][2:]) else: modelkey = (target, channel) else: modelkey = channel if modelkey not in CACHE: o = learn(channel, target, target2) model = CACHE[modelkey] s = model.make_short_sentence(max_sentence_length, tries=100) if s is not None: return o + '\n\n' + s else: return o + '\n\n:robot_face: Beep Boop' else: model = CACHE[modelkey] s = model.make_short_sentence(max_sentence_length, tries=100) if s: return s return ':robot_face: Beep Boop'
def train(env_id, num_timesteps, seed, lrschedule, num_cpu): def make_env(rank): def _thunk(): env = make_atari(env_id) env.seed(seed + rank) return wrap_deepmind(env) return _thunk set_global_seeds(seed) env = SubprocVecEnv([make_env(i) for i in range(num_cpu)]) parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--vf_coef', help='critic coefficient', default=0.5) parser.add_argument('--ent_coef', help='entropy coefficient', default=0.01) parser.add_argument('--opt_eps', help='option eps', default=0.01) parser.add_argument('--delib_cost', help='deliberation cost', default=0.001) parser.add_argument('--max_grad_norm', help='max gradient norm', default=0.5) parser.add_argument('--lrschedule', help='learning rate schedule', default='linear') parser.add_argument('--epsilon', help='epsilon for exploration', default=1e-5) parser.add_argument('--alpha', help='alpha', default=0.99) parser.add_argument('--gamma', help='gamma (discounting)', default=0.99) parser.add_argument('--log_interval', help='log_interval', default=100) parser.add_argument('--lr', help='learning rate', default=0.001) parser.add_argument('--nopts', help='number of options', default=4) parser.add_argument('--log_dir', help='log directory', default='log') args = parser.parse_args() model_template = [{ "model_type": "conv", "filter_size": [8, 8], "pool": [1, 1], "stride": [4, 4], "out_size": 32, "name": "conv1" }, { "model_type": "conv", "filter_size": [4, 4], "pool": [1, 1], "stride": [2, 2], "out_size": 64, "name": "conv2" }, { "model_type": "conv", "filter_size": [3, 3], "pool": [1, 1], "stride": [1, 1], "out_size": 64, "name": "conv3" }, { "model_type": "flatten" }, { "model_type": "mlp", "out_size": 512, "activation": "relu", "name": "fc1" }, { "model_type": "option" }, { "model_type": "value" }] learn(model_template, env, seed, total_timesteps=int(num_timesteps * 1.1), args=args) env.close()
if args.non_linear == 'relu': non_linear = torch.nn.ReLU() elif args.non_linear == 'elu': non_linear = torch.nn.ELU() # New actor and critic policies actor = Actor(use_gpu=use_gpu, non_linear=non_linear, batch_norm=args.batch_norm) critic = Critic(use_gpu=use_gpu, non_linear=non_linear, batch_norm=args.batch_norm) for i in range(args.num_train_cycles): print('Training cycle %s of %s' % (i, args.num_train_cycles)) act(actor, env, task, B, num_trajectories=args.num_trajectories, task_period=30, writer=writer) learn(actor, critic, task, B, num_learning_iterations=args.num_learning_iterations, episode_batch_size=args.episode_batch_size, lr=0.0002, writer=writer, loss=args.loss) run(actor, env, min_rate=0.05, writer=writer) # Remove early trajectories when buffer gets too large B = B[-args.buffer_size:] # Save the model to local directory if args.saveas is not None: save_path = str(root_dir / 'local' / 'models' / args.saveas) print('Saving models to %s' % save_path) torch.save(actor, save_path + '_actor.pt') torch.save(critic, save_path + '_critic.pt') print('...done') # Close writer try:
episode_reward += reward total_reward += reward episode_step += 1 total_step += 1 # Renders the game to screen if (args.render): env.render() # Add experience to replay buffer rp_buffer.append(obs, act_probs, act_taken_v, reward, done) # Learn from experience and clear rp buffer if (total_step % args.batch_size == 0): # Calculates/Applies grads pl, cl, tl, dr, ce, ad = model.learn(rp_buffer) # Write outputs out for visualization tb_writer.add_scalar('Misc/CrossEntropyMean', ce.mean(), total_step) tb_writer.add_scalar('Misc/Advantage', ad.mean(), total_step) tb_writer.add_scalar('Loss/PolicyLoss', pl, total_step) tb_writer.add_scalar('Loss/CriticLoss', cl, total_step) tb_writer.add_scalar('Loss/TotalLoss', tl, total_step) tb_writer.add_scalar('Rewards/DiscountedReward', dr, total_step) tb_writer.add_histogram('Actions/ActionsTaken', rp_buffer.actions_scalar().cpu().numpy(), total_step, bins=np.arange(-1, env.action_space.n + 1, 0.2)) # Clears the replay buffer rp_buffer = ReplayBuffer(args.batch_size, env.observation_space.shape,
from data_manager import ClutteredMNIST from model import STN, learn dataset_path = "./dataset/mnist_cluttered_60x60_6distortions.npz" batch_size = 256 num_epochs = 30 data_manager = ClutteredMNIST(dataset_path) train_data, val_data, test_data = data_manager.load() x_train, y_train = train_data print(x_train.shape, y_train.shape) learn(STN(input_shape=(60, 60, 1), num_classes=10), x_train, y_train, val_data[0], val_data[1])
#! /usr/bin/env python """ @author: dell """ if __name__ == "__main__": import music import model train_examples = music.load_examples('data/train.pkl') model.learn(train_examples) test_examples = music.load_examples('data/test.pkl') test_ratings = model.predict(test_examples) for i in range(len(test_examples)): test_examples[i]['rating'] = test_ratings[i] music.write_examples('submissions/zmusic_predictions.csv', test_examples)
transforms.ToTensor(), MyNormalize() ])) train_loader, test_loader, validation_loader = divide_dataset( imgDataset, 0.2, 16, 16) model = Net() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) criterion = nn.CrossEntropyLoss() epoch = [1, 3] train_results = [] test_results = [] for ep in epoch: train_result, test_result, epoch = learn(train_loader, test_loader, ep) train_results.append([train_result, epoch]) loss, accuracy = test_result print(loss) print(accuracy) print(test_result) print(epoch) test_results.append([loss, accuracy, epoch]) print(train_results) for result in train_results: print('epoch: ' + str(result[1]) + ", " + "Average loss: " + str(result[0])) for result in test_results:
def main(): if not (os.path.exists(f"./api_key/key.json")): print( "\n\n..oops you need a JSON file called 'key.json' inside the path './api_key/'\n(see the README.md to find out how to structure it)\n\n" ) exit() print( "\n\nQ-Learning for the greater good.. choose how to interact with the API:\n" ) mode = str( input( "\noption 't' is train (default)\noption 'c' is train-cycle\noption 'e' is exploit\n\nENTER OPTION: " ) or "t") if mode == "t": world = int( input( "\nwhich World [0-10] would you like to train on? (default is World 0)\nWORLD: " ) or "0") epochs = int( input( f"\nhow many epochs would you like to train the agent on World {world} for? (default is 1 epoch)\nEPOCHS: " ) or "1") print( f"\ntraining from scratch for {epochs} on world {world}! \n(visualizations will be saved to './runs/world_{world}/')\n(Q-tables will be saved to './runs/Q-table_world_{world}'" ) verbose = str( input(f"\nverbosity? (default is yes)\n([y]/n)? ") or "y") if verbose == "y": v = True else: v = False epsilon = 0.9 q_table = model.init_q_table() if not (os.path.exists(f"./runs/world_{world}/")): os.makedirs(f"./runs/world_{world}/") run_num = len([i for i in os.listdir(f"runs/world_{world}")]) file_path = f"./runs/Q-table_world_{world}" good_term_states = [] bad_term_states = [] obstacles = [] for epoch in range(epochs): print("EPOCH #" + str(epoch) + ":\n\n") q_table, good_term_states, bad_term_states, obstacles = model.learn( q_table, worldId=world, mode='train', learning_rate=0.0001, gamma=0.9, epsilon=epsilon, good_term_states=good_term_states, bad_term_states=bad_term_states, epoch=epoch, obstacles=obstacles, run_num=run_num, verbose=v) epsilon = utils.epsilon_decay(epsilon, epoch, epochs) np.save(file_path, q_table) np.save(f"./runs/obstacles_world_{world}", obstacles) np.save(f"./runs/good_term_states_world_{world}", good_term_states) np.save(f"./runs/bad_term_states_world_{world}", bad_term_states) elif mode == "e": world = int( input( "\nwhich World [0-10] would you like the agent to exploit? (default is World 0)\nWORLD: " ) or "0") epochs = int( input( f"\nhow many times would you like the agent to run on World {world} for? (default is 1 time)\nEPOCHS: " ) or "1") verbose = str( input(f"\nverbosity? (default is yes)\n([y]/n)? ") or "y") if verbose == "y": v = True else: v = False print( f"\nExploiting world {world} for {epochs} iterations! \n(visualizations will be saved to './runs/world_{world}/')" ) file_path = f"./runs/Q-table_world_{world}" q_table = np.load(file_path + ".npy") obstacles = np.load(f"./runs/obstacles_world_{world}" + ".npy") good_term_states = np.load(f"./runs/good_term_states_world_{world}" + ".npy") bad_term_states = np.load(f"./runs/bad_term_states_world_{world}" + ".npy") obstacles = obstacles.tolist() good_term_states = good_term_states.tolist() bad_term_states = bad_term_states.tolist() epsilon = 0.9 run_num = len([i for i in os.listdir(f"runs/world_{world}")]) for epoch in range(epochs): print("EPOCH #" + str(epoch) + ":\n\n") q_table, good_term_states, bad_term_states, obstacles = model.learn( q_table, worldId=world, mode='expl', learning_rate=0.0001, gamma=0.9, epsilon=epsilon, good_term_states=good_term_states, bad_term_states=bad_term_states, epoch=epoch, obstacles=obstacles, run_num=run_num, verbose=v) if mode == "c": confirm = str( input( f"\nyou've chosen to train the agent on all Worlds [1-10], this could take a while.. (are you sure?)\nProceed ([y]/n)? " ) or "y") cont = str( input( f"\nWould you like to continue training from previous runs? (are you sure?)\nProceed ([y]/n)? " ) or "y") if cont.lower() == "y": epochs_computed = int( input( f"\nHow many epochs were used in previous training runs?\nEPOCHS: " )) epochs = int( input( f"\nhow many more epochs would you the agent to train on each World? (default is 10 epochs)\nEPOCHS: " ) or "10") init_eps = epsilon = utils.epsilon_decay(0.9, 6, epochs_computed + epochs) else: epochs = int( input( f"\nhow many epochs would you the agent to train on each World? (default is 10 epochs)\nEPOCHS: " ) or "10") epochs_computed = 0 init_eps = epsilon = 0.9 verbose = str( input(f"\nverbosity? (default is yes)\n([y]/n)? ") or "y") if verbose == "y": v = True else: v = False if confirm == "y": for i in range(10): world = i + 1 print( f"\ntraining from scratch for {epochs} on world {world}! \n(visualizations will be saved to './runs/world_{world}/')\n(Q-tables will be saved to './runs/Q-table_world_{world}'" ) if not (os.path.exists(f"./runs/world_{world}/")): os.makedirs(f"./runs/world_{world}/") run_num = len([i for i in os.listdir(f"runs/world_{world}")]) file_path = f"./runs/Q-table_world_{world}" if cont.lower() == 'y': good_term_states = np.load( open(f"./runs/good_term_states_world_{world}.npy", "rb")) bad_term_states = np.load( open(f"./runs/bad_term_states_world_{world}.npy", "rb")) obstacles = np.load( open(f"./runs/obstacles_world_{world}.npy", "rb")) q_table = np.load( open(f"./runs/Q-table_world_{world}.npy", "rb")) else: good_term_states = [] bad_term_states = [] obstacles = [] q_table = model.init_q_table() t = trange(epochs, desc='Training on all worlds', leave=True) for epoch in t: t.set_description('Current World={}'.format(i + 1)) print("EPOCH #" + str(epoch) + ":\n\n") q_table, good_term_states, bad_term_states, obstacles = model.learn( q_table, worldId=world, mode='train', learning_rate=0.0001, gamma=0.9, epsilon=epsilon, good_term_states=good_term_states, bad_term_states=bad_term_states, epoch=epoch, obstacles=obstacles, run_num=run_num, verbose=v) epsilon = utils.epsilon_decay(init_eps, epoch + epochs_computed, epochs + epochs_computed) np.save(file_path, q_table) np.save(f"./runs/obstacles_world_{world}", obstacles) np.save(f"./runs/good_term_states_world_{world}", good_term_states) np.save(f"./runs/bad_term_states_world_{world}", bad_term_states) else: #confirmation not given exit() else: print("that option doesn't exist yet :'(") exit()