def try_random_agent(num_episodes=const.num_episodes_test): env = utils_env.Environment() brain_name = env.brain_names[0] for i in range(num_episodes): env_info = env.reset( train_mode=False)[brain_name] # reset the environment states = env_info.vector_observations # get the current state (for each agent) scores = np.zeros( const.num_agents) # initialize the score (for each agent) while True: actions = np.random.randn( const.num_agents, action_size) # select an action (for each agent) actions = np.clip(actions, -1, 1) # all actions between -1 and 1 env_info = env.step(actions)[ brain_name] # send all actions to tne environment next_states = env_info.vector_observations # get next state (for each agent) rewards = env_info.rewards # get reward (for each agent) dones = env_info.local_done # see if episode finished scores += rewards # update the score (for each agent) states = next_states # roll over states to next time step if np.any(dones): # exit loop if episode finished break print('Episode {}, score (max over agents): {}'.format( i, np.max(scores))) print('Episode {}, score of each agent: ['.format(i), '; '.join(['{:.3f}'.format(s) for s in scores]), ']') env.close()
def grid_search(): env = utils_env.Environment() print('=' * 30, 'Grid Search', '=' * 30) params = { 'num_episodes': [200, 250, 500], # --> 250 'batch_size': [32, 64, 128, 256], # --> 32 'expl_noise': [0.1, 0.3], # --> 0.3 'gamma': [0.95, 0.99], # --> 0.95 'model_learning_rate': [0.001, 0.0001, 0.00001], # --> 0.001 'num_fc_actor': [128, 64, 32], # --> 128 'num_fc_critic': [128, 64, 32], # --> 128 'memory_size': [20000, 40000] # --> 40000 } grid = ParameterGrid(params) rf = MyNavigator() best_score = -10. best_grid = None best_grid_index = 0 result_dict = {} key_list = list(params.keys()) + ['score'] df = pd.DataFrame(columns=key_list) for i, g in enumerate(grid): rf.set_params(**g) score = rf.fit(i, env) result_dict[i] = {'score': score, 'grid': g} d = g d['score'] = score df = df.append(d, ignore_index=True) print('Evaluated candidate:', i, result_dict[i]) # save if best if score >= best_score: best_score = score best_grid = g best_grid_index = i for k, v in result_dict.items(): print(k, v) print("==> Best score:", best_score) print("==> Best grid:", best_grid_index, best_grid) if len(key_list) == 3: # better overview as pivot table (only for 2 hyperparams) for c in params.keys(): # if one hyperparam is a list of values if df[c].dtype == object: df[c] = df[c].astype(str) print(df.pivot(index=key_list[0], columns=key_list[1], values=key_list[2])) else: print(df) env.close() # finally, close the Env
def test_default_algo(use_ref_model: bool = False): env = utils_env.Environment() # use default params ag = agent.DRLAgent() if use_ref_model: print('... Test the agent using reference model ...') ag.set_model_path('ref') al = algo.DRLAlgo(env, ag) al.test()
def test_default_algo(use_ref_model: bool = False): env = utils_env.Environment() model_name_suffix = '' if use_ref_model: print('... Test the agent using reference model ...') model_name_suffix = 'ref_' # use default params ag_1 = agent.DRLAgent() ag_1.set_model_path(model_name_suffix + str(1)) ag_2 = agent.DRLAgent() ag_2.set_model_path(model_name_suffix + str(2)) al = algo.DRLAlgo(env, ag_1, ag_2) al.test()
def train_two_agents(): env = utils_env.Environment() # use default params ag_1 = agent.DRLAgent() ag_1.set_model_path(1) ag_2 = agent.DRLAgent() ag_2.set_model_path(2) al = algo.DRLAlgo(env, ag_1, ag_2) history, best_e, best_score = al.train() print('\nFinal score: {:.3f}'.format( np.mean(history[-const.rolling_mean_N:]))) print('Final memory length:', ag_1.memory.get_length()) print('Best score in {:d} episodes, avg_score: {:.3f}'.format( best_e, best_score)) # plot losses losses_lists = [ ag_1.actor_loss_list, ag_2.actor_loss_list, ag_1.critic_loss_list, ag_2.critic_loss_list ] losses_labels = [ 'agent_1_actor', 'agent_2_actor', 'agent_1_critic', 'agent_2_critic' ] utils_plot.plot_loss(losses_lists, losses_labels) # plot noise utils_plot.plot_scatter(ag_1.noise_list, title_text='Noise', fp=const.file_path_img_noise) # plot memory actions memory_actions = np.array([t[1] for t in ag_1.memory.memory]) utils_plot.plot_scatter(memory_actions, title_text='Actions', fp=const.file_path_img_actions) # show mean memory actions mean_a = np.mean(memory_actions, axis=0) std_a = np.std(memory_actions, axis=0) print('Mean/std actions agent_1:', mean_a[:2], std_a[:2]) print('Mean/std actions agent_2:', mean_a[2:], std_a[2:])
def try_random_agent(): env = utils_env.Environment() brain_name = env.brain_names[0] env_info = env.reset(train_mode=False)[brain_name] # reset the environment states = env_info.vector_observations # get the current state (for each agent) scores = np.zeros( const.num_agents) # initialize the score (for each agent) while True: actions = np.random.randn( const.num_agents, action_size) # select an action (for each agent) actions = np.clip(actions, -1, 1) # all actions between -1 and 1 env_info = env.step(actions)[ brain_name] # send all actions to tne environment next_states = env_info.vector_observations # get next state (for each agent) rewards = env_info.rewards # get reward (for each agent) dones = env_info.local_done # see if episode finished scores += rewards # update the score (for each agent) states = next_states # roll over states to next time step if np.any(dones): # exit loop if episode finished break print('Total score (averaged over agents) this episode: {}'.format( np.mean(scores))) env.close()
def grid_search(): env = utils_env.Environment() print('=' * 30, 'Grid Search', '=' * 30) params = { # 'num_episodes': [5, 10], # test 'batch_size': [32, 64, 128], 'use_double_dqn': [True, False], 'eps_decay_factor': [0.99, 0.95, 0.9], 'gamma': [0.95, 0.9], 'update_target_each_iter': [2, 4, 8, 16], 'model_learning_rate': [0.001, 0.0001, 0.00001], 'model_fc1_num': [32, 20], 'model_fc2_num': [16, 10], 'num_episodes': [625, 700, 1000, 2000], 'memory_size': [20000, 40000] } grid = ParameterGrid(params) rf = MyNavigator() best_score = -10. best_grid = None best_grid_index = 0 result_dict = {} key_list = list(params.keys()) + ['score'] df = pd.DataFrame(columns=key_list) for i, g in enumerate(grid): rf.set_params(**g) score = rf.fit(i, env) result_dict[i] = {'score': score, 'grid': g} d = g d['score'] = score df = df.append(d, ignore_index=True) print('Evaluated candidate:', i, result_dict[i]) # save if best if score >= best_score: best_score = score best_grid = g best_grid_index = i for k, v in result_dict.items(): print(k, v) print("==> Best score:", best_score) print("==> Best grid:", best_grid_index, best_grid) if len(key_list) == 3: # better overview as pivot table (only for 2 hyperparams) for c in params.keys(): # if one hyperparam is a list of values if df[c].dtype == object: df[c] = df[c].astype(str) print(df.pivot(index=key_list[0], columns=key_list[1], values=key_list[2])) else: print(df) env.close() # finally, close the Env
def get_env_info(): env = utils_env.Environment() env.get_info()
def train_default_algo(): env = utils_env.Environment() # use default params ag = agent.DRLAgent() al = algo.DRLAlgo(env, ag) al.train()
def grid_search(): env = utils_env.Environment() print('=' * 30, 'Grid Search', '=' * 30) params = { 'num_episodes': [500, 1000, 1500], # --> 1500 'max_action': [0.1, 0.5, 1.0], # --> 1.0 'memory_size': [100000, 200000], # --> 200000 'gamma': [0.95, 0.99], # --> 0.99 'batch_size': [64, 128, 256], # --> 128 'tau': [0.01, 0.05, 0.06, 0.07, 0.1], # -->0.06 'policy_freq': [1, 2, 3], # --> 3 'model_learning_rate': [0.001, 0.0001], # --> 0.001 'num_fc_1': [256, 128, 64, 32, 16] # --> 256 } grid = ParameterGrid(params) rf = MyNavigator() best_score = -10. best_grid = None best_grid_index = 0 result_dict = {} key_list = list(params.keys()) + ['score'] df = pd.DataFrame(columns=key_list) for i, g in enumerate(grid): if 'num_fc_1' in key_list: g['num_fc_2'] = g['num_fc_1'] // 2 rf.set_params(**g) score = rf.fit(i, env) result_dict[i] = {'score': score, 'grid': g} d = g d['score'] = score df = df.append(d, ignore_index=True) print('\nEvaluated candidate:', i, result_dict[i]) # save if best if score >= best_score: best_score = score best_grid = g best_grid_index = i for k, v in result_dict.items(): print(k, v) print("==> Best score:", best_score) print("==> Best grid:", best_grid_index, best_grid) if len(key_list ) == 3: # better overview as pivot table (only for 2 hyperparams) for c in params.keys(): # if one hyperparam is a list of values if df[c].dtype == object: df[c] = df[c].astype(str) print( df.pivot(index=key_list[0], columns=key_list[1], values=key_list[2])) else: print(df) env.close() # finally, close the Env