def main(): args = vars(parser.parse_args()) agent_config = configs.get_agent_config(args) game_config = configs.get_game_config(args) training_config = configs.get_training_config(args) print("Training with config:") print(training_config) print(game_config) print(agent_config) agent = AgentModule(agent_config) if training_config.use_cuda: agent.cuda() optimizer = RMSprop(agent.parameters(), lr=training_config.learning_rate) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, cooldown=5) losses = defaultdict(lambda: defaultdict(list)) dists = defaultdict(lambda: defaultdict(list)) for epoch in range(training_config.num_epochs): num_agents = np.random.randint(game_config.min_agents, game_config.max_agents + 1) num_landmarks = np.random.randint(game_config.min_landmarks, game_config.max_landmarks + 1) agent.reset() game = GameModule(game_config, num_agents, num_landmarks) if training_config.use_cuda: game.cuda() optimizer.zero_grad() total_loss, _ = agent(game) per_agent_loss = total_loss.data[ 0] / num_agents / game_config.batch_size losses[num_agents][num_landmarks].append(per_agent_loss) dist = game.get_avg_agent_to_goal_distance() avg_dist = dist.data[0] / num_agents / game_config.batch_size dists[num_agents][num_landmarks].append(avg_dist) print_losses(epoch, losses, dists, game_config) total_loss.backward() optimizer.step() if num_agents == game_config.max_agents and num_landmarks == game_config.max_landmarks: scheduler.step( losses[game_config.max_agents][game_config.max_landmarks][-1]) if training_config.save_model: torch.save(agent, training_config.save_model_file) print("Saved agent model weights at %s" % training_config.save_model_file) """
def train(kwargs): args = defaultdict(lambda: False, kwargs) agent_config = configs.get_agent_config(args) game_config = configs.get_game_config(args) training_config = configs.get_training_config(args) agent = AgentModule(agent_config) if training_config.use_cuda: agent.cuda() optimizer = RMSprop(agent.parameters(), lr=training_config.learning_rate) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, cooldown=5) losses = defaultdict(lambda: defaultdict(list)) dists = defaultdict(lambda: defaultdict(list)) run_logs = defaultdict(lambda: list()) # for epoch in tqdm(range(training_config.num_epochs), desc='Epochs'): for epoch in range(training_config.num_epochs): epoch_logs = train_one_epoch(epoch, agent_config, game_config, training_config, agent, optimizer, scheduler, losses, dists) for name, values in epoch_logs.items(): run_logs[name].append(values) return run_logs
def main(): args = vars(parser.parse_args()) agent_config = configs.get_agent_config(args) game_config = configs.get_game_config(args) training_config = configs.get_training_config(args) print("Training with config:") print(training_config) print(game_config) print(agent_config) agent = AgentModule(agent_config) if training_config.use_cuda: agent.cuda() optimizer = RMSprop(agent.parameters(), lr=training_config.learning_rate) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, cooldown=5) losses = defaultdict(lambda: defaultdict(list)) dists = defaultdict(lambda: defaultdict(list)) for epoch in range(training_config.num_epochs): num_agents = np.random.randint(game_config.min_agents, game_config.max_agents + 1) num_landmarks = np.random.randint(game_config.min_landmarks, game_config.max_landmarks + 1) agent.reset() game = GameModule(game_config, num_agents, num_landmarks) if training_config.use_cuda: game.cuda() optimizer.zero_grad() total_loss, timesteps = agent(game) per_agent_loss = total_loss.data[ 0] / num_agents / game_config.batch_size losses[num_agents][num_landmarks].append(per_agent_loss) dist = game.get_avg_agent_to_goal_distance() avg_dist = dist.data / num_agents / game_config.batch_size dists[num_agents][num_landmarks].append(avg_dist) print_losses(epoch, losses, dists, game_config) # print("total loss:", total_loss.detach().numpy()[0]) total_loss.backward() optimizer.step() if num_agents == game_config.max_agents and num_landmarks == game_config.max_landmarks: scheduler.step( losses[game_config.max_agents][game_config.max_landmarks][-1]) ''' This visualizes the trajectories of agents (circles) and target locations (crosses). It also displays the communication symbol usage. Basically, alpha channel of a letter represents how much the the agent was using the i-th symbol during the epoch (on each step communication is done by a [1, 20] float vector). I sum all these vectors through all steps. ''' if epoch < 3 or epoch > training_config.num_epochs - 3: import matplotlib.pyplot as plt fig, ax = plt.subplots() ax.set_xticks([]) ax.set_yticks([]) colors = ['red', 'green', 'blue'] agent_markers = ['o', '^'] landmark_markers = ['P', '*'] utterances = np.zeros_like(timesteps[0]['utterances'][0].detach()) for time, timestep in enumerate(timesteps): agent_legends = [] for idx, point in enumerate( timestep['locations'][0][:num_agents]): agent_legends.append( plt.scatter( *list(point.detach().numpy()), color=colors[int(game.physical[0, idx, 0].item())], marker=agent_markers[int(game.physical[0, idx, 1].item())], s=20, alpha=0.75)) for idx, point in enumerate( timestep['locations'][0][-num_landmarks:]): if time == 0: plt.scatter( *list(point.detach().numpy()), color='dark' + colors[int(game.physical[0, idx, 0].item())], marker=landmark_markers[int( game.physical[0, idx, 1].item())], s=300, alpha=0.75) utterances += timestep['utterances'][0].detach().numpy() # this controls how much we highlight or supress non-freqent symbol when displaying # pow < 1 helps to bring in the low freqent symbols that were emitted once and lost in sum # pow >=1 can highlight some important symbols through the epoch if it is too noisy utterances = np.power( utterances / utterances.max(axis=1)[..., np.newaxis], 2) for agent_idx in range(utterances.shape[0]): for symbol_idx in range(utterances.shape[1]): plt.text(0, 1 + 0.01 + 0.05 * agent_idx, str(agent_idx + 1) + ': ', color=colors[int(game.physical[0, agent_idx, 0].item())], transform=ax.transAxes) plt.text(0.05 + 0.03 * symbol_idx, 1 + 0.01 + 0.05 * agent_idx, 'ABCDEFGHIJKLMNOPQRSTUVXYZ1234567890'[symbol_idx], alpha=utterances[agent_idx, symbol_idx], color=colors[int(game.physical[0, agent_idx, 0].item())], transform=ax.transAxes) plt.legend(reversed(agent_legends), reversed( [str(i + 1) for i in range(len(agent_legends))]), bbox_to_anchor=(0, 1.15)) for a in range(game_config.min_agents, game_config.max_agents + 1): for l in range(game_config.min_landmarks, game_config.max_landmarks + 1): loss = losses[a][l][-1] if len(losses[a][l]) > 0 else 0 min_loss = min( losses[a][l]) if len(losses[a][l]) > 0 else 0 plt.text( 0, -0.05 - 0.05 * ((a - game_config.min_agents) + (l - game_config.min_landmarks)), "[epoch %d][%d as, %d ls][last loss: %s][min loss: %s]" % (epoch, a, l, ("%.7f" % loss)[:7], ("%.7f" % min_loss)[:7]), transform=ax.transAxes) plt.show() # if training_config.save_model: # torch.save(agent, training_config.save_model_file) # print("Saved agent model weights at %s" % training_config.save_model_file) """
def main(): args = vars(parser.parse_args()) mode = args['mode'] if mode == 'selfplay': selfplay = True else: selfplay = False one_sentence_mode = args['one_sentence_data_set'] run_default_config = configs.get_run_config(args) folder_dir = run_default_config.folder_dir agent_config = configs.get_agent_config(args) game_config = configs.get_game_config(args) utterance_config = configs.get_utterance_config() training_config = configs.get_training_config(args, folder_dir) corpus = data.WordCorpus('data' + os.sep, freq_cutoff=20, verbose=True) agent = AgentModule(agent_config, utterance_config, corpus, run_default_config.creating_data_set_mode, run_default_config.create_utterance_using_old_code) utter = Utterance(agent_config.action_processor, utterance_config, corpus, run_default_config.create_utterance_using_old_code) if not mode == "train_utter": folder_dir_fb_model = utterance_config.fb_dir with open(folder_dir_fb_model, 'rb') as f: utter.load_state_dict(torch.load(f)) action = ActionModule(agent_config.action_processor, utterance_config, corpus, run_default_config.create_utterance_using_old_code) create_data_set = PredefinedUtterancesModule() if one_sentence_mode: num_agents = np.random.randint(game_config.min_agents, game_config.max_agents + 1) num_landmarks = np.random.randint(game_config.min_landmarks, game_config.max_landmarks + 1) agent.reset() game = GameModule(game_config, num_agents, num_landmarks, folder_dir) df_utterance = [ pd.DataFrame(index=range(game.batch_size), columns=agent.df_utterance_col_name, dtype=np.int64) for i in range(game.num_agents) ] iter = random.randint(0, game.time_horizon) df_utterance = create_data_set.generate_sentences(game, iter, df_utterance, one_sentence_mode, mode=mode) for epoch in range(training_config.num_epochs): if not one_sentence_mode: num_agents = np.random.randint(game_config.min_agents, game_config.max_agents + 1) num_landmarks = np.random.randint(game_config.min_landmarks, game_config.max_landmarks + 1) agent.reset() game = GameModule(game_config, num_agents, num_landmarks, folder_dir) df_utterance = [ pd.DataFrame(index=range(game.batch_size), columns=agent.df_utterance_col_name, dtype=np.int64) for i in range(game.num_agents) ] iter = random.randint(0, game.time_horizon) df_utterance = create_data_set.generate_sentences( game, iter, df_utterance, one_sentence_mode, mode=mode) agent_num = random.randint(0, game.num_agents - 1) physical_feat = agent.get_physical_feat(game, agent_num) mem = Variable( torch.zeros(game.batch_size, game.num_agents, game_config.memory_size)[:, agent_num]) utterance_feat = torch.zeros([game.batch_size, 1, 256], dtype=torch.float) goal = game.observed_goals[:, agent_num] processed, mem = action.processed_data(physical_feat, goal, mem, utterance_feat) if selfplay and one_sentence_mode: processed = torch.load(args['folder_dir'] + os.sep + 'processed.pt') elif not selfplay and one_sentence_mode: torch.save(processed, args['folder_dir'] + os.sep + 'processed.pt') full_sentence = df_utterance[agent_num]['Full Sentence' + str(iter)] if selfplay: loss, utterance, _ = utter(processed, full_sentence, epoch=epoch) with open(folder_dir + os.sep + "utterance_selfplay_annotation.csv", 'a', newline='') as f: for index in range(len(utterance)): f.write(' '.join( corpus.word_dict.i2w(utterance[index].data.cpu()))) f.write(" " + 'agent_color' + " " + colors_dict[ df_utterance[agent_num]['agent_color'][index]]) f.write(" " + 'agent_shape' + " " + shapes_dict[ df_utterance[agent_num]['agent_shape'][index]]) f.write(" " + 'lm_color' + " " + colors_dict[ df_utterance[agent_num]['lm_color'][index]]) f.write(" " + 'lm_shape' + " " + shapes_dict[ df_utterance[agent_num]['lm_shape'][index]]) f.write('\n') else: loss, utterance, folder_dir = utter(processed, full_sentence, epoch=epoch) with open(folder_dir + os.sep + "utterance_out_fb.csv", 'a', newline='') as f: f.write("-----") f.write(full_sentence[1]) f.write("----") f.write(colors_dict[df_utterance[agent_num]['agent_color'][1]]) f.write(" " + str(df_utterance[agent_num]['dist'][1])) f.write(" " + str(iter)) f.write('\n') if mode == 'train_utter': with open(training_config.save_model_file, 'wb') as f: torch.save(utter.state_dict(), f) print("Saved agent model weights at %s" % training_config.save_model_file)
def main(): args = vars(parser.parse_args()) run_config = configs.get_run_config(args) agent_config = configs.get_agent_config(args) game_config = configs.get_game_config(args) training_config = configs.get_training_config(args, run_config.folder_dir) utterance_config = configs.get_utterance_config() print("Training with config:") print(training_config) print(game_config) print(agent_config) print(run_config) writer = SummaryWriter( run_config.folder_dir + 'tensorboard' + os.sep) #Tensorboard - setting where the temp files will be saved agent = AgentModule(agent_config, utterance_config, run_config.corpus, run_config.creating_data_set_mode, run_config.create_utterance_using_old_code) if run_config.upload_trained_model: folder_dir_trained_model = run_config.dir_upload_model agent.load_state_dict(torch.load(folder_dir_trained_model)) agent.eval() else: pass if training_config.use_cuda: agent.cuda() optimizer = RMSprop(agent.parameters(), lr=training_config.learning_rate) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, cooldown=5) losses = defaultdict(lambda: defaultdict(list)) dists = defaultdict(lambda: defaultdict(list)) if args['one_sentence_data_set']: num_agents = np.random.randint(game_config.min_agents, game_config.max_agents + 1) num_landmarks = np.random.randint(game_config.min_landmarks, game_config.max_landmarks + 1) agent.reset() game_init = GameModule(game_config, num_agents, num_landmarks, run_config.folder_dir) for epoch in range(training_config.num_epochs): if args['one_sentence_data_set'] == False: num_agents = np.random.randint(game_config.min_agents, game_config.max_agents + 1) num_landmarks = np.random.randint(game_config.min_landmarks, game_config.max_landmarks + 1) agent.reset() game = GameModule(game_config, num_agents, num_landmarks, run_config.folder_dir) else: agent.reset() game = game_init if training_config.use_cuda: game.cuda() optimizer.zero_grad() total_loss, _ = agent(game) per_agent_loss = total_loss.data[ 0] / num_agents / game_config.batch_size losses[num_agents][num_landmarks].append(per_agent_loss) dist, dist_per_agent = game.get_avg_agent_to_goal_distance( ) #add to tensorboard dist_per_agent_file_name = run_config.folder_dir + 'dist_from_goal.h5' if os.path.isfile(dist_per_agent_file_name): plot.save_dataset(dist_per_agent_file_name, 'dist_from_goal', dist_per_agent.detach().numpy(), 'a') else: plot.save_dataset(dist_per_agent_file_name, 'dist_from_goal', dist_per_agent.detach().numpy(), 'w') avg_dist = dist.data.item() / num_agents / game_config.batch_size dists[num_agents][num_landmarks].append(avg_dist) print_losses(epoch, losses, dists, game_config, writer) torch.autograd.set_detect_anomaly(True) total_loss.backward() optimizer.step() optimizer.zero_grad() if num_agents == game_config.max_agents and num_landmarks == game_config.max_landmarks: scheduler.step( losses[game_config.max_agents][game_config.max_landmarks][-1]) torch.save(agent.state_dict(), training_config.save_model_file) print("Saved agent model weights at %s" % training_config.save_model_file) writer.close() # close the tensorboard temp files """
import torch from modules.agent import AgentModule import configs from train import parser args = vars(parser.parse_args()) agent_config = configs.get_agent_config(args) agent = AgentModule(agent_config) agent.load_state_dict( torch.load( r'C:\Users\user\Desktop\emergent-language\2249-08042019\modules_weights.pt' )) agent.eval() for param_tensor in agent.state_dict(): print(param_tensor, "\t", agent.state_dict()[param_tensor].size())