Esempio n. 1
0
def agent_train():
    env = gym.make(env_name)

    score = 0
    done = False
    agent = PPO(env.observation_space.shape, env.action_space.n, lr, betas,
                gamma, K_epochs, eps_clip, device)
    memory = Memory()

    # variables
    running_reward = 0
    avg_length = 0
    timestep = 0

    for i_episode in range(1, max_episodes + 1):
        state = env.reset()
        state = preProcess(state)
        t = 0
        while done is False:
            t += 1
            timestep += 1

            #Running policy_old:
            action = agent.polciy_old.act(state, memory)
            state, reward, done, _ = env.step(action)
            state = preProcess(state)

            # Saving reward:
            memory.rewards.append(reward)

            if timestep % update_timestep == 0:
                agent.update(memory)
                memory.clear_memory()
                timestep = 0

            running_reward += reward
            if render:
                env.render()
            if done:
                break

        avg_length += t

        # log
        if i_episode % log_interval == 0:
            avg_length = int(avg_length / log_interval)
            running_reward = int((running_reward / log_interval))

            print('Episode {} \t avg length: {} \t reward: {}'.format(
                i_episode, avg_length, running_reward))
            running_reward = 0
            avg_length = 0

        if i_episode % 500 == 0:
            torch.save(agent.policy.state_dict(),
                       './PPO_{}.pth'.format(env_name))
Esempio n. 2
0
    def run_assistant(self, name):
        self.__root.withdraw()
        Memory.Memory()
        self.__controller.set_name(name)
        self.__controller.speak("Hola, " + name + ". ¿Qué puedo hacer por ti?")

        data = ""
        while data != "salir":
            data = self.__controller.record_audio()
            if data != "salir" and data != "":
                self.__controller.assistant(data)
            elif data == "salir":
                self.__root.destroy()
Esempio n. 3
0
def knowledge_celebrity_test(context_text_encoder: TextEncoder,
                             context_image_encoder: ImageEncoder,
                             context_encoder: ContextEncoder,
                             to_hidden: ToHidden, celebrity_memory: Memory,
                             text_decoder: TextDecoder, test_dataset: Dataset,
                             celebrity_scores, text_length: int,
                             vocab: Dict[str, int]):
    """Knowledge celebrity test.

    Args:
        context_text_encoder (TextEncoder): Context text encoder.
        context_image_encoder (ImageEncoder): Context image encoder.
        context_encoder (ContextEncoder): Context encoder.
        to_hidden (ToHidden): Context to hidden.
        celebrity_memory (Memory): Celebrity Memory.
        text_decoder (TextDecoder): Text decoder.
        test_dataset (Dataset): Valid dataset.
        celebrity_scores: Celebrity scores.
        text_length (int): Text length.
        vocab (Dict[str, int]): Vocabulary.

    """

    id2word: List[str] = [None] * len(vocab)
    for word, wid in vocab.items():
        id2word[wid] = word

    # Test dataset loader.
    test_data_loader = DataLoader(
        test_dataset,
        batch_size=KnowledgeCelebrityTestConfig.batch_size,
        num_workers=KnowledgeCelebrityTestConfig.num_data_loader_workers)

    sum_loss = 0

    # Switch to eval mode.
    context_text_encoder.eval()
    context_image_encoder.eval()
    context_encoder.eval()
    to_hidden.eval()
    celebrity_memory.eval()
    text_decoder.eval()

    output_file = open('knowledge_celebrity.out', 'w')

    with torch.no_grad():
        for batch_id, test_data in enumerate(test_data_loader):
            texts, text_lengths, images, utter_types = test_data
            # Sizes:
            # texts: (batch_size, dialog_context_size + 1, dialog_text_max_len)
            # text_lengths: (batch_size, dialog_context_size + 1)
            # images: (batch_size, dialog_context_size + 1,
            #          pos_images_max_num, 3, image_size, image_size)
            # utter_types: (batch_size, )

            # To device.
            texts = texts.to(GlobalConfig.device)
            text_lengths = text_lengths.to(GlobalConfig.device)
            images = images.to(GlobalConfig.device)

            texts.transpose_(0, 1)
            # (dialog_context_size + 1, batch_size, dialog_text_max_len)

            text_lengths.transpose_(0, 1)
            # (dialog_context_size + 1, batch_size)

            images.transpose_(0, 1)
            images.transpose_(1, 2)
            # (dialog_context_size + 1, pos_images_max_num, batch_size, 3,
            #  image_size, image_size)

            # Encode context.
            context, hiddens = encode_context(context_text_encoder,
                                              context_image_encoder,
                                              context_encoder, texts,
                                              text_lengths, images)
            # (batch_size, context_vector_size)

            knowledge_entry = celebrity_scores
            encode_knowledge_func = partial(celebrity_memory, knowledge_entry)

            text_eval(to_hidden,
                      text_decoder,
                      text_length,
                      id2word,
                      context,
                      texts[-1],
                      hiddens,
                      encode_knowledge_func,
                      output_file=output_file)

    output_file.close()
def knowledge_celebrity_valid(
        context_text_encoder: TextEncoder, context_image_encoder: ImageEncoder,
        context_encoder: ContextEncoder, to_hidden: ToHidden,
        celebrity_memory: Memory, text_decoder: TextDecoder,
        valid_dataset: Dataset, celebrity_scores, text_length: int):
    """Knowledge celebrity valid.

    Args:
        context_text_encoder (TextEncoder): Context text encoder.
        context_image_encoder (ImageEncoder): Context image encoder.
        context_encoder (ContextEncoder): Context encoder.
        to_hidden (ToHidden): Context to hidden.
        celebrity_memory (Memory): Celebrity Memory.
        text_decoder (TextDecoder): Text decoder.
        valid_dataset (Dataset): Valid dataset.
        celebrity_scores: Celebrity scores.
        text_length (int): Text length.

    """

    # Valid dataset loader.
    valid_data_loader = DataLoader(
        valid_dataset,
        batch_size=KnowledgeCelebrityValidConfig.batch_size,
        shuffle=True,
        num_workers=KnowledgeCelebrityValidConfig.num_data_loader_workers)

    sum_loss = 0
    num_batches = 0

    # Switch to eval mode.
    context_text_encoder.eval()
    context_image_encoder.eval()
    context_encoder.eval()
    to_hidden.eval()
    celebrity_memory.eval()
    text_decoder.eval()

    with torch.no_grad():
        for batch_id, valid_data in enumerate(valid_data_loader):
            # Only valid `ValidConfig.num_batches` batches.
            if batch_id >= KnowledgeCelebrityValidConfig.num_batches:
                break

            num_batches += 1

            texts, text_lengths, images, utter_types = valid_data
            # Sizes:
            # texts: (batch_size, dialog_context_size + 1, dialog_text_max_len)
            # text_lengths: (batch_size, dialog_context_size + 1)
            # images: (batch_size, dialog_context_size + 1,
            #          pos_images_max_num, 3, image_size, image_size)
            # utter_types: (batch_size, )

            # To device.
            texts = texts.to(GlobalConfig.device)
            text_lengths = text_lengths.to(GlobalConfig.device)
            images = images.to(GlobalConfig.device)
            # utter_types = utter_types.to(GlobalConfig.device)

            texts.transpose_(0, 1)
            # (dialog_context_size + 1, batch_size, dialog_text_max_len)

            text_lengths.transpose_(0, 1)
            # (dialog_context_size + 1, batch_size)

            images.transpose_(0, 1)
            images.transpose_(1, 2)
            # (dialog_context_size + 1, pos_images_max_num, batch_size, 3,
            #  image_size, image_size)

            # Encode context.
            context, hiddens = encode_context(context_text_encoder,
                                              context_image_encoder,
                                              context_encoder, texts,
                                              text_lengths, images)
            # (batch_size, context_vector_size)

            knowledge_entry = celebrity_scores
            encode_knowledge_func = partial(celebrity_memory, knowledge_entry)

            loss, n_totals = text_loss(to_hidden, text_decoder, text_length,
                                       context, texts[-1], text_lengths[-1],
                                       hiddens, encode_knowledge_func)
            sum_loss += loss / text_length

    # Switch to train mode.
    context_text_encoder.train()
    context_image_encoder.train()
    context_encoder.train()
    to_hidden.train()
    celebrity_memory.train()
    text_decoder.train()

    return sum_loss / num_batches
Esempio n. 5
0
def knowledge_styletip_train(context_text_encoder: TextEncoder,
                             context_image_encoder: ImageEncoder,
                             context_encoder: ContextEncoder,
                             train_dataset: Dataset,
                             valid_dataset: Dataset,
                             test_dataset: Dataset,
                             model_file: str,
                             styletips_data: StyleTipsData,
                             vocab: Dict[str, int],
                             embed_init=None):
    """Knowledge styletip train.

    Args:
        context_text_encoder (TextEncoder): Context text encoder.
        context_image_encoder (ImageEncoder): Context image encoder.
        context_encoder (ContextEncoder): Context encoder.
        train_dataset (Dataset): Train dataset.
        valid_dataset (Dataset): Valid dataset.
        test_dataset (Dataset): Test dataset.
        model_file (str): Saved model file.
        styletips_data (StyleTipsData): Style tips data.
        vocab (Dict[str, int]): Vocabulary.
        embed_init: Initial embedding (vocab_size, embed_size).

    """

    # Data loader.
    train_data_loader = DataLoader(
        dataset=train_dataset,
        batch_size=KnowledgeStyletipTrainConfig.batch_size,
        shuffle=True,
        num_workers=KnowledgeStyletipTrainConfig.num_data_loader_workers)

    styletip_edges = torch.stack(
        [torch.tensor([x, y]) for x, y in styletips_data.edges])
    styletip_edges = styletip_edges.to(GlobalConfig.device)

    # Model.
    vocab_size = len(vocab)

    graph_encoder_config = GraphEncoderConfig(len(styletips_data.vocab))
    styletip_memory_config = StyletipMemoryConfig(len(styletips_data.edges))
    text_decoder_config = KnowledgeTextDecoderConfig(vocab_size,
                                                     MemoryConfig.memory_size,
                                                     MemoryConfig.output_size,
                                                     embed_init)

    to_hidden = ToHidden(text_decoder_config)
    to_hidden = to_hidden.to(GlobalConfig.device)

    graph_encoder = GraphEncoder(graph_encoder_config)
    graph_encoder = graph_encoder.to(GlobalConfig.device)

    styletip_memory = Memory(styletip_memory_config)
    styletip_memory = styletip_memory.to(GlobalConfig.device)

    text_decoder = TextDecoder(text_decoder_config)
    text_decoder = text_decoder.to(GlobalConfig.device)

    # Model parameters.
    params = list(
        chain.from_iterable([
            list(model.parameters()) for model in [
                context_text_encoder, context_image_encoder, context_encoder,
                to_hidden, graph_encoder, styletip_memory, text_decoder
            ]
        ]))

    optimizer = Adam(params, lr=KnowledgeStyletipTrainConfig.learning_rate)
    epoch_id = 0
    min_valid_loss = None

    # Load saved state.
    if isfile(model_file):
        state = torch.load(model_file)
        to_hidden.load_state_dict(state['to_hidden'])
        graph_encoder.load_state_dict(state['graph_encoder'])
        styletip_memory.load_state_dict(state['styletip_memory'])
        text_decoder.load_state_dict(state['text_decoder'])
        optimizer.load_state_dict(state['optimizer'])
        epoch_id = state['epoch_id']
        min_valid_loss = state['min_valid_loss']

    # Loss.
    sum_loss = 0
    bad_loss_cnt = 0

    # Switch to train mode.
    context_text_encoder.train()
    context_image_encoder.train()
    context_encoder.train()
    to_hidden.train()
    graph_encoder.train()
    styletip_memory.train()
    text_decoder.train()

    finished = False

    for epoch_id in range(epoch_id,
                          KnowledgeStyletipTrainConfig.num_iterations):
        for batch_id, train_data in enumerate(train_data_loader):
            # Set gradients to 0.
            optimizer.zero_grad()

            texts, text_lengths, images, utter_types = train_data
            # Sizes:
            # texts: (batch_size, dialog_context_size + 1, dialog_text_max_len)
            # text_lengths: (batch_size, dialog_context_size + 1)
            # images: (batch_size, dialog_context_size + 1,
            #          pos_images_max_num, 3, image_size, image_size)
            # utter_types: (batch_size, )

            # To device.

            texts = texts.to(GlobalConfig.device)
            text_lengths = text_lengths.to(GlobalConfig.device)
            images = images.to(GlobalConfig.device)
            utter_types = utter_types.to(GlobalConfig.device)

            texts.transpose_(0, 1)
            # (dialog_context_size + 1, batch_size, dialog_text_max_len)

            text_lengths.transpose_(0, 1)
            # (dialog_context_size + 1, batch_size)

            images.transpose_(0, 1)
            images.transpose_(1, 2)
            # (dialog_context_size + 1, pos_images_max_num, batch_size, 3,
            #  image_size, image_size)

            # Encode context.
            context, hiddens = encode_context(context_text_encoder,
                                              context_image_encoder,
                                              context_encoder, texts,
                                              text_lengths, images)
            # (batch_size, context_vector_size)

            knowledge_entry = graph_encoder(styletip_edges)
            encode_knowledge_func = partial(styletip_memory, knowledge_entry)

            loss, n_totals = text_loss(to_hidden, text_decoder,
                                       text_decoder_config.text_length,
                                       context, texts[-1], text_lengths[-1],
                                       hiddens, encode_knowledge_func)
            sum_loss += loss / text_decoder_config.text_length

            loss.backward()
            optimizer.step()

            # Print loss every `TrainConfig.print_freq` batches.
            if (batch_id + 1) % KnowledgeStyletipTrainConfig.print_freq == 0:
                cur_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                sum_loss /= KnowledgeStyletipTrainConfig.print_freq
                print('epoch: {} \tbatch: {} \tloss: {} \ttime: {}'.format(
                    epoch_id + 1, batch_id + 1, sum_loss, cur_time))
                sum_loss = 0

            # Valid every `TrainConfig.valid_freq` batches.
            if (batch_id + 1) % KnowledgeStyletipTrainConfig.valid_freq == 0:
                valid_loss = knowledge_styletip_valid(
                    context_text_encoder, context_image_encoder,
                    context_encoder, to_hidden, graph_encoder, styletip_memory,
                    text_decoder, valid_dataset, styletip_edges,
                    text_decoder_config.text_length)
                cur_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                print('valid_loss: {} \ttime: {}'.format(valid_loss, cur_time))

                # Save current best model.
                if min_valid_loss is None or valid_loss < min_valid_loss:
                    min_valid_loss = valid_loss
                    bad_loss_cnt = 0
                    save_dict = {
                        'task': KNOWLEDGE_STYLETIP_SUBTASK,
                        'epoch_id': epoch_id,
                        'min_valid_loss': min_valid_loss,
                        'optimizer': optimizer.state_dict(),
                        'context_text_encoder':
                        context_text_encoder.state_dict(),
                        'context_image_encoder':
                        context_image_encoder.state_dict(),
                        'context_encoder': context_encoder.state_dict(),
                        'to_hidden': to_hidden.state_dict(),
                        'graph_encoder': graph_encoder.state_dict(),
                        'styletip_memory': styletip_memory.state_dict(),
                        'text_decoder': text_decoder.state_dict()
                    }
                    torch.save(save_dict, model_file)
                    print('Best model saved.')
                else:
                    bad_loss_cnt += 1
                    if bad_loss_cnt > KnowledgeStyletipTrainConfig.patience:
                        knowledge_styletip_test(
                            context_text_encoder, context_image_encoder,
                            context_encoder, to_hidden, graph_encoder,
                            styletip_memory, text_decoder, test_dataset,
                            styletip_edges, text_decoder_config.text_length,
                            vocab)
                        finished = True
                        break
        if finished:
            break
Esempio n. 6
0
 def __init__(self):
     self.__name = ""
     self._memory = Memory.Memory()
Esempio n. 7
0
from model import Model, Memory, GameRunner
import gym
import tensorflow as tf

if __name__ == "__main__":
    env_name = 'MountainCar-v0'
    env = gym.make(env_name)

    num_states = env.env.observation_space.shape[0]
    num_actions = env.env.action_space.n

    BATCH_SIZE = 100

    model = Model(num_states, num_actions, BATCH_SIZE)
    mem = Memory(50000)

    with tf.Session() as sess:
        sess.run(model.var_init)
        gr = GameRunner(sess, model, env, mem, MAX_EPSILON, MIN_EPSILON,
                        LAMBDA)
        num_episodes = 300
        cnt = 0
        while cnt < num_episodes:
            if cnt % 10 == 0:
                print('Episode {} of {}'.format(cnt + 1, num_episodes))
            gr.run()
            cnt += 1
        plt.plot(gr.reward_store)
        plt.show()
        plt.close("all")
        plt.plot(gr.max_x_store)
Esempio n. 8
0
            ), False)
        p4.eval()
        print('model loaded')

    treesearcher = Robot("tree_searcher")
    treesearcher.add_soul(p4)
    treesearcher.set_device(device)
    Greed = Robot("MrGreed")
    IF = Robot("MrIf")
    #robot = [treesearcher, Greed, Greed, Greed]
    robot = [treesearcher, IF, IF, IF]
    tser_ct = 0
    for single_rbt in robot:
        if single_rbt.name == "tree_searcher":
            tser_ct += 1
    mbuffer = Memory(13 * tser_ct * (game_batch))

    R18_instance = Robot("R18")
    R18_instance.worker.device = device

    #imaginary_robot = [Greed, Greed, Greed, Greed]
    imaginary_robot = [IF, IF, IF, IF]
    treesearcher.set_imaginary_robot_list(imaginary_robot)

    #vis = visdom.Visdom()
    #gainline = vis.line(X=np.array([0]), Y=np.array([0]), opts=dict(showlegend=True, title='Gain over Mr.if'))
    gain_vec = []
    #game.save_model(robot_v4, prophet_v4, "robot-net.txt", "prophet-net.txt")

    for i in range(1000):
        trainingv = np.zeros((game_batch - 1, 4))
Esempio n. 9
0
def memory(memory_size, memory_feature_size):
    return Memory(memory_size, memory_feature_size)
Esempio n. 10
0
		gameExit = True

	text = font.render('SCORE : '+str(score)+'\n', True, (255,255,255))
	gameDisplay.blit(text,(400,400))

	loop = loop + 1

pygame.quit()
quit()





# Instantiate memory
memory = Memory(max_size = memory_size)
for i in range(pretrain_length):
    # If it's the first step
    if i == 0:
        state = Reset()
        
        state, stacked_frames = stack_frames(stacked_frames, state, True)
        
    # Get the next_state, the rewards, done by taking a random action
    choice = random.randint(1,len(possible_actions))-1
    action = possible_actions[choice]
    next_state, reward, done = Step(action)
    
    #env.render()
    
    # Stack the frames