def agent_train(): env = gym.make(env_name) score = 0 done = False agent = PPO(env.observation_space.shape, env.action_space.n, lr, betas, gamma, K_epochs, eps_clip, device) memory = Memory() # variables running_reward = 0 avg_length = 0 timestep = 0 for i_episode in range(1, max_episodes + 1): state = env.reset() state = preProcess(state) t = 0 while done is False: t += 1 timestep += 1 #Running policy_old: action = agent.polciy_old.act(state, memory) state, reward, done, _ = env.step(action) state = preProcess(state) # Saving reward: memory.rewards.append(reward) if timestep % update_timestep == 0: agent.update(memory) memory.clear_memory() timestep = 0 running_reward += reward if render: env.render() if done: break avg_length += t # log if i_episode % log_interval == 0: avg_length = int(avg_length / log_interval) running_reward = int((running_reward / log_interval)) print('Episode {} \t avg length: {} \t reward: {}'.format( i_episode, avg_length, running_reward)) running_reward = 0 avg_length = 0 if i_episode % 500 == 0: torch.save(agent.policy.state_dict(), './PPO_{}.pth'.format(env_name))
def run_assistant(self, name): self.__root.withdraw() Memory.Memory() self.__controller.set_name(name) self.__controller.speak("Hola, " + name + ". ¿Qué puedo hacer por ti?") data = "" while data != "salir": data = self.__controller.record_audio() if data != "salir" and data != "": self.__controller.assistant(data) elif data == "salir": self.__root.destroy()
def knowledge_celebrity_test(context_text_encoder: TextEncoder, context_image_encoder: ImageEncoder, context_encoder: ContextEncoder, to_hidden: ToHidden, celebrity_memory: Memory, text_decoder: TextDecoder, test_dataset: Dataset, celebrity_scores, text_length: int, vocab: Dict[str, int]): """Knowledge celebrity test. Args: context_text_encoder (TextEncoder): Context text encoder. context_image_encoder (ImageEncoder): Context image encoder. context_encoder (ContextEncoder): Context encoder. to_hidden (ToHidden): Context to hidden. celebrity_memory (Memory): Celebrity Memory. text_decoder (TextDecoder): Text decoder. test_dataset (Dataset): Valid dataset. celebrity_scores: Celebrity scores. text_length (int): Text length. vocab (Dict[str, int]): Vocabulary. """ id2word: List[str] = [None] * len(vocab) for word, wid in vocab.items(): id2word[wid] = word # Test dataset loader. test_data_loader = DataLoader( test_dataset, batch_size=KnowledgeCelebrityTestConfig.batch_size, num_workers=KnowledgeCelebrityTestConfig.num_data_loader_workers) sum_loss = 0 # Switch to eval mode. context_text_encoder.eval() context_image_encoder.eval() context_encoder.eval() to_hidden.eval() celebrity_memory.eval() text_decoder.eval() output_file = open('knowledge_celebrity.out', 'w') with torch.no_grad(): for batch_id, test_data in enumerate(test_data_loader): texts, text_lengths, images, utter_types = test_data # Sizes: # texts: (batch_size, dialog_context_size + 1, dialog_text_max_len) # text_lengths: (batch_size, dialog_context_size + 1) # images: (batch_size, dialog_context_size + 1, # pos_images_max_num, 3, image_size, image_size) # utter_types: (batch_size, ) # To device. texts = texts.to(GlobalConfig.device) text_lengths = text_lengths.to(GlobalConfig.device) images = images.to(GlobalConfig.device) texts.transpose_(0, 1) # (dialog_context_size + 1, batch_size, dialog_text_max_len) text_lengths.transpose_(0, 1) # (dialog_context_size + 1, batch_size) images.transpose_(0, 1) images.transpose_(1, 2) # (dialog_context_size + 1, pos_images_max_num, batch_size, 3, # image_size, image_size) # Encode context. context, hiddens = encode_context(context_text_encoder, context_image_encoder, context_encoder, texts, text_lengths, images) # (batch_size, context_vector_size) knowledge_entry = celebrity_scores encode_knowledge_func = partial(celebrity_memory, knowledge_entry) text_eval(to_hidden, text_decoder, text_length, id2word, context, texts[-1], hiddens, encode_knowledge_func, output_file=output_file) output_file.close()
def knowledge_celebrity_valid( context_text_encoder: TextEncoder, context_image_encoder: ImageEncoder, context_encoder: ContextEncoder, to_hidden: ToHidden, celebrity_memory: Memory, text_decoder: TextDecoder, valid_dataset: Dataset, celebrity_scores, text_length: int): """Knowledge celebrity valid. Args: context_text_encoder (TextEncoder): Context text encoder. context_image_encoder (ImageEncoder): Context image encoder. context_encoder (ContextEncoder): Context encoder. to_hidden (ToHidden): Context to hidden. celebrity_memory (Memory): Celebrity Memory. text_decoder (TextDecoder): Text decoder. valid_dataset (Dataset): Valid dataset. celebrity_scores: Celebrity scores. text_length (int): Text length. """ # Valid dataset loader. valid_data_loader = DataLoader( valid_dataset, batch_size=KnowledgeCelebrityValidConfig.batch_size, shuffle=True, num_workers=KnowledgeCelebrityValidConfig.num_data_loader_workers) sum_loss = 0 num_batches = 0 # Switch to eval mode. context_text_encoder.eval() context_image_encoder.eval() context_encoder.eval() to_hidden.eval() celebrity_memory.eval() text_decoder.eval() with torch.no_grad(): for batch_id, valid_data in enumerate(valid_data_loader): # Only valid `ValidConfig.num_batches` batches. if batch_id >= KnowledgeCelebrityValidConfig.num_batches: break num_batches += 1 texts, text_lengths, images, utter_types = valid_data # Sizes: # texts: (batch_size, dialog_context_size + 1, dialog_text_max_len) # text_lengths: (batch_size, dialog_context_size + 1) # images: (batch_size, dialog_context_size + 1, # pos_images_max_num, 3, image_size, image_size) # utter_types: (batch_size, ) # To device. texts = texts.to(GlobalConfig.device) text_lengths = text_lengths.to(GlobalConfig.device) images = images.to(GlobalConfig.device) # utter_types = utter_types.to(GlobalConfig.device) texts.transpose_(0, 1) # (dialog_context_size + 1, batch_size, dialog_text_max_len) text_lengths.transpose_(0, 1) # (dialog_context_size + 1, batch_size) images.transpose_(0, 1) images.transpose_(1, 2) # (dialog_context_size + 1, pos_images_max_num, batch_size, 3, # image_size, image_size) # Encode context. context, hiddens = encode_context(context_text_encoder, context_image_encoder, context_encoder, texts, text_lengths, images) # (batch_size, context_vector_size) knowledge_entry = celebrity_scores encode_knowledge_func = partial(celebrity_memory, knowledge_entry) loss, n_totals = text_loss(to_hidden, text_decoder, text_length, context, texts[-1], text_lengths[-1], hiddens, encode_knowledge_func) sum_loss += loss / text_length # Switch to train mode. context_text_encoder.train() context_image_encoder.train() context_encoder.train() to_hidden.train() celebrity_memory.train() text_decoder.train() return sum_loss / num_batches
def knowledge_styletip_train(context_text_encoder: TextEncoder, context_image_encoder: ImageEncoder, context_encoder: ContextEncoder, train_dataset: Dataset, valid_dataset: Dataset, test_dataset: Dataset, model_file: str, styletips_data: StyleTipsData, vocab: Dict[str, int], embed_init=None): """Knowledge styletip train. Args: context_text_encoder (TextEncoder): Context text encoder. context_image_encoder (ImageEncoder): Context image encoder. context_encoder (ContextEncoder): Context encoder. train_dataset (Dataset): Train dataset. valid_dataset (Dataset): Valid dataset. test_dataset (Dataset): Test dataset. model_file (str): Saved model file. styletips_data (StyleTipsData): Style tips data. vocab (Dict[str, int]): Vocabulary. embed_init: Initial embedding (vocab_size, embed_size). """ # Data loader. train_data_loader = DataLoader( dataset=train_dataset, batch_size=KnowledgeStyletipTrainConfig.batch_size, shuffle=True, num_workers=KnowledgeStyletipTrainConfig.num_data_loader_workers) styletip_edges = torch.stack( [torch.tensor([x, y]) for x, y in styletips_data.edges]) styletip_edges = styletip_edges.to(GlobalConfig.device) # Model. vocab_size = len(vocab) graph_encoder_config = GraphEncoderConfig(len(styletips_data.vocab)) styletip_memory_config = StyletipMemoryConfig(len(styletips_data.edges)) text_decoder_config = KnowledgeTextDecoderConfig(vocab_size, MemoryConfig.memory_size, MemoryConfig.output_size, embed_init) to_hidden = ToHidden(text_decoder_config) to_hidden = to_hidden.to(GlobalConfig.device) graph_encoder = GraphEncoder(graph_encoder_config) graph_encoder = graph_encoder.to(GlobalConfig.device) styletip_memory = Memory(styletip_memory_config) styletip_memory = styletip_memory.to(GlobalConfig.device) text_decoder = TextDecoder(text_decoder_config) text_decoder = text_decoder.to(GlobalConfig.device) # Model parameters. params = list( chain.from_iterable([ list(model.parameters()) for model in [ context_text_encoder, context_image_encoder, context_encoder, to_hidden, graph_encoder, styletip_memory, text_decoder ] ])) optimizer = Adam(params, lr=KnowledgeStyletipTrainConfig.learning_rate) epoch_id = 0 min_valid_loss = None # Load saved state. if isfile(model_file): state = torch.load(model_file) to_hidden.load_state_dict(state['to_hidden']) graph_encoder.load_state_dict(state['graph_encoder']) styletip_memory.load_state_dict(state['styletip_memory']) text_decoder.load_state_dict(state['text_decoder']) optimizer.load_state_dict(state['optimizer']) epoch_id = state['epoch_id'] min_valid_loss = state['min_valid_loss'] # Loss. sum_loss = 0 bad_loss_cnt = 0 # Switch to train mode. context_text_encoder.train() context_image_encoder.train() context_encoder.train() to_hidden.train() graph_encoder.train() styletip_memory.train() text_decoder.train() finished = False for epoch_id in range(epoch_id, KnowledgeStyletipTrainConfig.num_iterations): for batch_id, train_data in enumerate(train_data_loader): # Set gradients to 0. optimizer.zero_grad() texts, text_lengths, images, utter_types = train_data # Sizes: # texts: (batch_size, dialog_context_size + 1, dialog_text_max_len) # text_lengths: (batch_size, dialog_context_size + 1) # images: (batch_size, dialog_context_size + 1, # pos_images_max_num, 3, image_size, image_size) # utter_types: (batch_size, ) # To device. texts = texts.to(GlobalConfig.device) text_lengths = text_lengths.to(GlobalConfig.device) images = images.to(GlobalConfig.device) utter_types = utter_types.to(GlobalConfig.device) texts.transpose_(0, 1) # (dialog_context_size + 1, batch_size, dialog_text_max_len) text_lengths.transpose_(0, 1) # (dialog_context_size + 1, batch_size) images.transpose_(0, 1) images.transpose_(1, 2) # (dialog_context_size + 1, pos_images_max_num, batch_size, 3, # image_size, image_size) # Encode context. context, hiddens = encode_context(context_text_encoder, context_image_encoder, context_encoder, texts, text_lengths, images) # (batch_size, context_vector_size) knowledge_entry = graph_encoder(styletip_edges) encode_knowledge_func = partial(styletip_memory, knowledge_entry) loss, n_totals = text_loss(to_hidden, text_decoder, text_decoder_config.text_length, context, texts[-1], text_lengths[-1], hiddens, encode_knowledge_func) sum_loss += loss / text_decoder_config.text_length loss.backward() optimizer.step() # Print loss every `TrainConfig.print_freq` batches. if (batch_id + 1) % KnowledgeStyletipTrainConfig.print_freq == 0: cur_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") sum_loss /= KnowledgeStyletipTrainConfig.print_freq print('epoch: {} \tbatch: {} \tloss: {} \ttime: {}'.format( epoch_id + 1, batch_id + 1, sum_loss, cur_time)) sum_loss = 0 # Valid every `TrainConfig.valid_freq` batches. if (batch_id + 1) % KnowledgeStyletipTrainConfig.valid_freq == 0: valid_loss = knowledge_styletip_valid( context_text_encoder, context_image_encoder, context_encoder, to_hidden, graph_encoder, styletip_memory, text_decoder, valid_dataset, styletip_edges, text_decoder_config.text_length) cur_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") print('valid_loss: {} \ttime: {}'.format(valid_loss, cur_time)) # Save current best model. if min_valid_loss is None or valid_loss < min_valid_loss: min_valid_loss = valid_loss bad_loss_cnt = 0 save_dict = { 'task': KNOWLEDGE_STYLETIP_SUBTASK, 'epoch_id': epoch_id, 'min_valid_loss': min_valid_loss, 'optimizer': optimizer.state_dict(), 'context_text_encoder': context_text_encoder.state_dict(), 'context_image_encoder': context_image_encoder.state_dict(), 'context_encoder': context_encoder.state_dict(), 'to_hidden': to_hidden.state_dict(), 'graph_encoder': graph_encoder.state_dict(), 'styletip_memory': styletip_memory.state_dict(), 'text_decoder': text_decoder.state_dict() } torch.save(save_dict, model_file) print('Best model saved.') else: bad_loss_cnt += 1 if bad_loss_cnt > KnowledgeStyletipTrainConfig.patience: knowledge_styletip_test( context_text_encoder, context_image_encoder, context_encoder, to_hidden, graph_encoder, styletip_memory, text_decoder, test_dataset, styletip_edges, text_decoder_config.text_length, vocab) finished = True break if finished: break
def __init__(self): self.__name = "" self._memory = Memory.Memory()
from model import Model, Memory, GameRunner import gym import tensorflow as tf if __name__ == "__main__": env_name = 'MountainCar-v0' env = gym.make(env_name) num_states = env.env.observation_space.shape[0] num_actions = env.env.action_space.n BATCH_SIZE = 100 model = Model(num_states, num_actions, BATCH_SIZE) mem = Memory(50000) with tf.Session() as sess: sess.run(model.var_init) gr = GameRunner(sess, model, env, mem, MAX_EPSILON, MIN_EPSILON, LAMBDA) num_episodes = 300 cnt = 0 while cnt < num_episodes: if cnt % 10 == 0: print('Episode {} of {}'.format(cnt + 1, num_episodes)) gr.run() cnt += 1 plt.plot(gr.reward_store) plt.show() plt.close("all") plt.plot(gr.max_x_store)
), False) p4.eval() print('model loaded') treesearcher = Robot("tree_searcher") treesearcher.add_soul(p4) treesearcher.set_device(device) Greed = Robot("MrGreed") IF = Robot("MrIf") #robot = [treesearcher, Greed, Greed, Greed] robot = [treesearcher, IF, IF, IF] tser_ct = 0 for single_rbt in robot: if single_rbt.name == "tree_searcher": tser_ct += 1 mbuffer = Memory(13 * tser_ct * (game_batch)) R18_instance = Robot("R18") R18_instance.worker.device = device #imaginary_robot = [Greed, Greed, Greed, Greed] imaginary_robot = [IF, IF, IF, IF] treesearcher.set_imaginary_robot_list(imaginary_robot) #vis = visdom.Visdom() #gainline = vis.line(X=np.array([0]), Y=np.array([0]), opts=dict(showlegend=True, title='Gain over Mr.if')) gain_vec = [] #game.save_model(robot_v4, prophet_v4, "robot-net.txt", "prophet-net.txt") for i in range(1000): trainingv = np.zeros((game_batch - 1, 4))
def memory(memory_size, memory_feature_size): return Memory(memory_size, memory_feature_size)
gameExit = True text = font.render('SCORE : '+str(score)+'\n', True, (255,255,255)) gameDisplay.blit(text,(400,400)) loop = loop + 1 pygame.quit() quit() # Instantiate memory memory = Memory(max_size = memory_size) for i in range(pretrain_length): # If it's the first step if i == 0: state = Reset() state, stacked_frames = stack_frames(stacked_frames, state, True) # Get the next_state, the rewards, done by taking a random action choice = random.randint(1,len(possible_actions))-1 action = possible_actions[choice] next_state, reward, done = Step(action) #env.render() # Stack the frames