Esempio n. 1
0
def main():
    args = vars(parser.parse_args())
    agent_config = configs.get_agent_config(args)
    game_config = configs.get_game_config(args)
    training_config = configs.get_training_config(args)
    print("Training with config:")
    print(training_config)
    print(game_config)
    print(agent_config)
    agent = AgentModule(agent_config)
    if training_config.use_cuda:
        agent.cuda()
    optimizer = RMSprop(agent.parameters(), lr=training_config.learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, cooldown=5)
    losses = defaultdict(lambda: defaultdict(list))
    dists = defaultdict(lambda: defaultdict(list))
    for epoch in range(training_config.num_epochs):
        num_agents = np.random.randint(game_config.min_agents,
                                       game_config.max_agents + 1)
        num_landmarks = np.random.randint(game_config.min_landmarks,
                                          game_config.max_landmarks + 1)
        agent.reset()
        game = GameModule(game_config, num_agents, num_landmarks)
        if training_config.use_cuda:
            game.cuda()
        optimizer.zero_grad()

        total_loss, _ = agent(game)
        per_agent_loss = total_loss.data[
            0] / num_agents / game_config.batch_size
        losses[num_agents][num_landmarks].append(per_agent_loss)

        dist = game.get_avg_agent_to_goal_distance()
        avg_dist = dist.data[0] / num_agents / game_config.batch_size
        dists[num_agents][num_landmarks].append(avg_dist)

        print_losses(epoch, losses, dists, game_config)

        total_loss.backward()
        optimizer.step()

        if num_agents == game_config.max_agents and num_landmarks == game_config.max_landmarks:
            scheduler.step(
                losses[game_config.max_agents][game_config.max_landmarks][-1])

    if training_config.save_model:
        torch.save(agent, training_config.save_model_file)
        print("Saved agent model weights at %s" %
              training_config.save_model_file)
    """
def train(kwargs):
    args = defaultdict(lambda: False, kwargs)
    agent_config = configs.get_agent_config(args)
    game_config = configs.get_game_config(args)
    training_config = configs.get_training_config(args)
    agent = AgentModule(agent_config)
    if training_config.use_cuda:
        agent.cuda()
    optimizer = RMSprop(agent.parameters(), lr=training_config.learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, cooldown=5)
    losses = defaultdict(lambda: defaultdict(list))
    dists = defaultdict(lambda: defaultdict(list))

    run_logs = defaultdict(lambda: list())

    # for epoch in tqdm(range(training_config.num_epochs), desc='Epochs'):
    for epoch in range(training_config.num_epochs):
        epoch_logs = train_one_epoch(epoch, agent_config, game_config,
                                     training_config, agent, optimizer,
                                     scheduler, losses, dists)
        for name, values in epoch_logs.items():
            run_logs[name].append(values)
    return run_logs
Esempio n. 3
0
def main():
    args = vars(parser.parse_args())
    agent_config = configs.get_agent_config(args)
    game_config = configs.get_game_config(args)
    training_config = configs.get_training_config(args)
    print("Training with config:")
    print(training_config)
    print(game_config)
    print(agent_config)
    agent = AgentModule(agent_config)
    if training_config.use_cuda:
        agent.cuda()
    optimizer = RMSprop(agent.parameters(), lr=training_config.learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, cooldown=5)
    losses = defaultdict(lambda: defaultdict(list))
    dists = defaultdict(lambda: defaultdict(list))
    for epoch in range(training_config.num_epochs):
        num_agents = np.random.randint(game_config.min_agents,
                                       game_config.max_agents + 1)
        num_landmarks = np.random.randint(game_config.min_landmarks,
                                          game_config.max_landmarks + 1)
        agent.reset()
        game = GameModule(game_config, num_agents, num_landmarks)
        if training_config.use_cuda:
            game.cuda()
        optimizer.zero_grad()

        total_loss, timesteps = agent(game)
        per_agent_loss = total_loss.data[
            0] / num_agents / game_config.batch_size
        losses[num_agents][num_landmarks].append(per_agent_loss)

        dist = game.get_avg_agent_to_goal_distance()
        avg_dist = dist.data / num_agents / game_config.batch_size
        dists[num_agents][num_landmarks].append(avg_dist)

        print_losses(epoch, losses, dists, game_config)
        # print("total loss:", total_loss.detach().numpy()[0])

        total_loss.backward()
        optimizer.step()

        if num_agents == game_config.max_agents and num_landmarks == game_config.max_landmarks:
            scheduler.step(
                losses[game_config.max_agents][game_config.max_landmarks][-1])
        '''
        This visualizes the trajectories of agents (circles) and target locations (crosses).
        It also displays the communication symbol usage. Basically, alpha channel of a letter represents
        how much the the agent was using the i-th symbol during the epoch (on each step
        communication is done by a [1, 20] float vector). I sum all these vectors through all steps.
        '''
        if epoch < 3 or epoch > training_config.num_epochs - 3:
            import matplotlib.pyplot as plt
            fig, ax = plt.subplots()
            ax.set_xticks([])
            ax.set_yticks([])
            colors = ['red', 'green', 'blue']
            agent_markers = ['o', '^']
            landmark_markers = ['P', '*']
            utterances = np.zeros_like(timesteps[0]['utterances'][0].detach())
            for time, timestep in enumerate(timesteps):
                agent_legends = []
                for idx, point in enumerate(
                        timestep['locations'][0][:num_agents]):
                    agent_legends.append(
                        plt.scatter(
                            *list(point.detach().numpy()),
                            color=colors[int(game.physical[0, idx, 0].item())],
                            marker=agent_markers[int(game.physical[0, idx,
                                                                   1].item())],
                            s=20,
                            alpha=0.75))
                for idx, point in enumerate(
                        timestep['locations'][0][-num_landmarks:]):
                    if time == 0:
                        plt.scatter(
                            *list(point.detach().numpy()),
                            color='dark' +
                            colors[int(game.physical[0, idx, 0].item())],
                            marker=landmark_markers[int(
                                game.physical[0, idx, 1].item())],
                            s=300,
                            alpha=0.75)
                utterances += timestep['utterances'][0].detach().numpy()
            # this controls how much we highlight or supress non-freqent symbol when displaying
            # pow < 1 helps to bring in the low freqent symbols that were emitted once and lost in sum
            # pow >=1 can highlight some important symbols through the epoch if it is too noisy
            utterances = np.power(
                utterances / utterances.max(axis=1)[..., np.newaxis], 2)
            for agent_idx in range(utterances.shape[0]):
                for symbol_idx in range(utterances.shape[1]):
                    plt.text(0,
                             1 + 0.01 + 0.05 * agent_idx,
                             str(agent_idx + 1) + ': ',
                             color=colors[int(game.physical[0, agent_idx,
                                                            0].item())],
                             transform=ax.transAxes)
                    plt.text(0.05 + 0.03 * symbol_idx,
                             1 + 0.01 + 0.05 * agent_idx,
                             'ABCDEFGHIJKLMNOPQRSTUVXYZ1234567890'[symbol_idx],
                             alpha=utterances[agent_idx, symbol_idx],
                             color=colors[int(game.physical[0, agent_idx,
                                                            0].item())],
                             transform=ax.transAxes)
            plt.legend(reversed(agent_legends),
                       reversed(
                           [str(i + 1) for i in range(len(agent_legends))]),
                       bbox_to_anchor=(0, 1.15))
            for a in range(game_config.min_agents, game_config.max_agents + 1):
                for l in range(game_config.min_landmarks,
                               game_config.max_landmarks + 1):
                    loss = losses[a][l][-1] if len(losses[a][l]) > 0 else 0
                    min_loss = min(
                        losses[a][l]) if len(losses[a][l]) > 0 else 0
                    plt.text(
                        0,
                        -0.05 - 0.05 * ((a - game_config.min_agents) +
                                        (l - game_config.min_landmarks)),
                        "[epoch %d][%d as, %d ls][last loss: %s][min loss: %s]"
                        % (epoch, a, l, ("%.7f" % loss)[:7],
                           ("%.7f" % min_loss)[:7]),
                        transform=ax.transAxes)
            plt.show()

    # if training_config.save_model:
    #     torch.save(agent, training_config.save_model_file)
    #     print("Saved agent model weights at %s" % training_config.save_model_file)
    """
Esempio n. 4
0
def main():
    args = vars(parser.parse_args())
    mode = args['mode']
    if mode == 'selfplay':
        selfplay = True
    else:
        selfplay = False
    one_sentence_mode = args['one_sentence_data_set']
    run_default_config = configs.get_run_config(args)
    folder_dir = run_default_config.folder_dir
    agent_config = configs.get_agent_config(args)
    game_config = configs.get_game_config(args)
    utterance_config = configs.get_utterance_config()
    training_config = configs.get_training_config(args, folder_dir)
    corpus = data.WordCorpus('data' + os.sep, freq_cutoff=20, verbose=True)
    agent = AgentModule(agent_config, utterance_config, corpus,
                        run_default_config.creating_data_set_mode,
                        run_default_config.create_utterance_using_old_code)
    utter = Utterance(agent_config.action_processor, utterance_config, corpus,
                      run_default_config.create_utterance_using_old_code)
    if not mode == "train_utter":
        folder_dir_fb_model = utterance_config.fb_dir
        with open(folder_dir_fb_model, 'rb') as f:
            utter.load_state_dict(torch.load(f))
    action = ActionModule(agent_config.action_processor, utterance_config,
                          corpus,
                          run_default_config.create_utterance_using_old_code)
    create_data_set = PredefinedUtterancesModule()
    if one_sentence_mode:
        num_agents = np.random.randint(game_config.min_agents,
                                       game_config.max_agents + 1)
        num_landmarks = np.random.randint(game_config.min_landmarks,
                                          game_config.max_landmarks + 1)
        agent.reset()
        game = GameModule(game_config, num_agents, num_landmarks, folder_dir)
        df_utterance = [
            pd.DataFrame(index=range(game.batch_size),
                         columns=agent.df_utterance_col_name,
                         dtype=np.int64) for i in range(game.num_agents)
        ]
        iter = random.randint(0, game.time_horizon)
        df_utterance = create_data_set.generate_sentences(game,
                                                          iter,
                                                          df_utterance,
                                                          one_sentence_mode,
                                                          mode=mode)
    for epoch in range(training_config.num_epochs):
        if not one_sentence_mode:
            num_agents = np.random.randint(game_config.min_agents,
                                           game_config.max_agents + 1)
            num_landmarks = np.random.randint(game_config.min_landmarks,
                                              game_config.max_landmarks + 1)
            agent.reset()
            game = GameModule(game_config, num_agents, num_landmarks,
                              folder_dir)
            df_utterance = [
                pd.DataFrame(index=range(game.batch_size),
                             columns=agent.df_utterance_col_name,
                             dtype=np.int64) for i in range(game.num_agents)
            ]
            iter = random.randint(0, game.time_horizon)
            df_utterance = create_data_set.generate_sentences(
                game, iter, df_utterance, one_sentence_mode, mode=mode)
        agent_num = random.randint(0, game.num_agents - 1)
        physical_feat = agent.get_physical_feat(game, agent_num)
        mem = Variable(
            torch.zeros(game.batch_size, game.num_agents,
                        game_config.memory_size)[:, agent_num])
        utterance_feat = torch.zeros([game.batch_size, 1, 256],
                                     dtype=torch.float)
        goal = game.observed_goals[:, agent_num]
        processed, mem = action.processed_data(physical_feat, goal, mem,
                                               utterance_feat)
        if selfplay and one_sentence_mode:
            processed = torch.load(args['folder_dir'] + os.sep +
                                   'processed.pt')
        elif not selfplay and one_sentence_mode:
            torch.save(processed, args['folder_dir'] + os.sep + 'processed.pt')
        full_sentence = df_utterance[agent_num]['Full Sentence' + str(iter)]

        if selfplay:
            loss, utterance, _ = utter(processed, full_sentence, epoch=epoch)
            with open(folder_dir + os.sep +
                      "utterance_selfplay_annotation.csv",
                      'a',
                      newline='') as f:
                for index in range(len(utterance)):
                    f.write(' '.join(
                        corpus.word_dict.i2w(utterance[index].data.cpu())))
                    f.write(" " + 'agent_color' + " " + colors_dict[
                        df_utterance[agent_num]['agent_color'][index]])
                    f.write(" " + 'agent_shape' + " " + shapes_dict[
                        df_utterance[agent_num]['agent_shape'][index]])
                    f.write(" " + 'lm_color' + " " + colors_dict[
                        df_utterance[agent_num]['lm_color'][index]])
                    f.write(" " + 'lm_shape' + " " + shapes_dict[
                        df_utterance[agent_num]['lm_shape'][index]])
                    f.write('\n')
        else:
            loss, utterance, folder_dir = utter(processed,
                                                full_sentence,
                                                epoch=epoch)
            with open(folder_dir + os.sep + "utterance_out_fb.csv",
                      'a',
                      newline='') as f:
                f.write("-----")
                f.write(full_sentence[1])
                f.write("----")
                f.write(colors_dict[df_utterance[agent_num]['agent_color'][1]])
                f.write(" " + str(df_utterance[agent_num]['dist'][1]))
                f.write(" " + str(iter))
                f.write('\n')
    if mode == 'train_utter':
        with open(training_config.save_model_file, 'wb') as f:
            torch.save(utter.state_dict(), f)
    print("Saved agent model weights at %s" % training_config.save_model_file)
Esempio n. 5
0
def main():
    args = vars(parser.parse_args())
    run_config = configs.get_run_config(args)
    agent_config = configs.get_agent_config(args)
    game_config = configs.get_game_config(args)
    training_config = configs.get_training_config(args, run_config.folder_dir)
    utterance_config = configs.get_utterance_config()
    print("Training with config:")
    print(training_config)
    print(game_config)
    print(agent_config)
    print(run_config)
    writer = SummaryWriter(
        run_config.folder_dir + 'tensorboard' +
        os.sep)  #Tensorboard - setting where the temp files will be saved
    agent = AgentModule(agent_config, utterance_config, run_config.corpus,
                        run_config.creating_data_set_mode,
                        run_config.create_utterance_using_old_code)
    if run_config.upload_trained_model:
        folder_dir_trained_model = run_config.dir_upload_model
        agent.load_state_dict(torch.load(folder_dir_trained_model))
        agent.eval()
    else:
        pass
    if training_config.use_cuda:
        agent.cuda()
    optimizer = RMSprop(agent.parameters(), lr=training_config.learning_rate)
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, cooldown=5)
    losses = defaultdict(lambda: defaultdict(list))
    dists = defaultdict(lambda: defaultdict(list))
    if args['one_sentence_data_set']:
        num_agents = np.random.randint(game_config.min_agents,
                                       game_config.max_agents + 1)
        num_landmarks = np.random.randint(game_config.min_landmarks,
                                          game_config.max_landmarks + 1)
        agent.reset()
        game_init = GameModule(game_config, num_agents, num_landmarks,
                               run_config.folder_dir)

    for epoch in range(training_config.num_epochs):
        if args['one_sentence_data_set'] == False:
            num_agents = np.random.randint(game_config.min_agents,
                                           game_config.max_agents + 1)
            num_landmarks = np.random.randint(game_config.min_landmarks,
                                              game_config.max_landmarks + 1)
            agent.reset()
            game = GameModule(game_config, num_agents, num_landmarks,
                              run_config.folder_dir)
        else:
            agent.reset()
            game = game_init
        if training_config.use_cuda:
            game.cuda()
        optimizer.zero_grad()

        total_loss, _ = agent(game)
        per_agent_loss = total_loss.data[
            0] / num_agents / game_config.batch_size
        losses[num_agents][num_landmarks].append(per_agent_loss)

        dist, dist_per_agent = game.get_avg_agent_to_goal_distance(
        )  #add to tensorboard
        dist_per_agent_file_name = run_config.folder_dir + 'dist_from_goal.h5'
        if os.path.isfile(dist_per_agent_file_name):
            plot.save_dataset(dist_per_agent_file_name, 'dist_from_goal',
                              dist_per_agent.detach().numpy(), 'a')
        else:
            plot.save_dataset(dist_per_agent_file_name, 'dist_from_goal',
                              dist_per_agent.detach().numpy(), 'w')

        avg_dist = dist.data.item() / num_agents / game_config.batch_size
        dists[num_agents][num_landmarks].append(avg_dist)

        print_losses(epoch, losses, dists, game_config, writer)
        torch.autograd.set_detect_anomaly(True)
        total_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if num_agents == game_config.max_agents and num_landmarks == game_config.max_landmarks:
            scheduler.step(
                losses[game_config.max_agents][game_config.max_landmarks][-1])

    torch.save(agent.state_dict(), training_config.save_model_file)
    print("Saved agent model weights at %s" % training_config.save_model_file)
    writer.close()  # close the tensorboard temp files
    """
Esempio n. 6
0
import torch
from modules.agent import AgentModule

import configs
from train import parser

args = vars(parser.parse_args())
agent_config = configs.get_agent_config(args)
agent = AgentModule(agent_config)

agent.load_state_dict(
    torch.load(
        r'C:\Users\user\Desktop\emergent-language\2249-08042019\modules_weights.pt'
    ))
agent.eval()

for param_tensor in agent.state_dict():
    print(param_tensor, "\t", agent.state_dict()[param_tensor].size())