class NeuMF(BaseMF): def __init__(self, model_config): super(NeuMF, self).__init__(model_config) self.GMF = GMF(model_config) self.MLP = MLP(model_config) self.mapping = nn.Linear(2, 1) def fix_left(self, optimizer): for param in self.GMF.parameters(): param.requires_grad = False groups = optimizer.param_groups lr_, betas_ = groups[0]["lr"], groups[0]["betas"] optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr_, betas=betas_) return optimizer def fix_right(self, optimizer): for param in self.MLP.parameters(): param.requires_grad = False groups = optimizer.param_groups lr_, betas_ = groups[0]["lr"], groups[0]["betas"] optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr_, betas=betas_) return optimizer def load_pretrained_embedding(self, model_data): model_data_1, model_data_2 = model_data[0], model_data[1] if model_data_1 != "default": print("Loading GMF embedding in NeuMF...") self.GMF.load_embedding_from_file(model_data_1) if model_data_2 != "default": print("Loading MLP embedding in NeuMF...") self.MLP.load_embedding_from_file(model_data_2) def load_pretrained_model(self, model_data): model_data_1, model_data_2 = model_data[0], model_data[1] if model_data_1 != "default": print("Loading GMF model in NeuMF...") self.GMF.load_model_from_file(model_data_1) if model_data_2 != "default": print("Loading MLP model in NeuMF...") self.MLP.load_model_from_file(model_data_2) def get_similarity(self, input): users, items = input[0], input[1] sim_GMF = self.GMF([users, items]) sim_MLP = self.MLP([users, items]) features = torch.cat((sim_GMF, sim_MLP), dim=1) sim = self.mapping(features) return sim
def main(args): # environment initialization env = BaseEnv(size=args.env_size) # embedding network initialization f_s_a = MLP(env.state_size + env.action_size, args.s_a_hidden_size, args.embedding_dim) f_s = MLP(env.state_size, args.s_hidden_size, args.embedding_dim) # buffer initialization replay_buffer_1 = ReplayBuffer(args, env) replay_buffer_2 = ReplayBuffer(args, env) goal_buffer = GoalBuffer() init_goal = tuple(np.random.randint(1, args.env_size[0] + 1, size=2)) goal_buffer.store(init_goal) # agent initialization agent = Agent(env_size=args.env_size) # optimizer initialization s_a_optimizer = torch.optim.Adam(f_s_a.parameters(), lr=args.s_a_lr) s_optimizer = torch.optim.Adam(f_s.parameters(), lr=args.s_a_lr) log_loss = [] for epoch in tqdm.tqdm(range(args.epoch_num)): start_position = np.random.randint(1, args.env_size[0] + 1, size=2) goal = goal_buffer.sample_batch_goal(size=1)[0] print("goal point is :{}".format(goal)) g_feature = agent.get_state_feature(goal) ns, r, terminate = env.reset(size=args.env_size, start_pos=start_position) for step in range(args.max_step): s = ns s_feature = agent.get_state_feature(s) action, min_dist = agent.get_best_action(s_feature, f_s_a, f_s, g_feature) ns, r, terminate = env.step(action) goal_buffer.store(ns) ns_feature = agent.get_state_feature(ns) vec_action = vectorize_action(action) # print(s_feature.shape, vec_action.shape) # store one step loss # s_a_pred = f_s_a.predict(np.concatenate((s_feature, vec_action), axis=0).reshape((1, -1))) # ns_pred = f_s.predict(np.array(ns_feature).reshape((1, -1))) e_1 = agent.get_dist(s_feature, vec_action, ns_feature, f_s_a, f_s)[0] replay_buffer_1.add([s_feature, vec_action, ns_feature, None, e_1]) # store two step loss sub_g = goal_buffer.sample_batch_goal(size=1, with_weights=False) sub_g_feature = agent.get_states_feature(sub_g)[0] na, min_dist = agent.get_best_action(ns_feature, f_s_a, f_s, sub_g_feature) dist_ns = agent.get_dist(ns_feature, vectorize_action(na), sub_g_feature, f_s_a, f_s) target = dist_ns + 1 dist_s = agent.get_dist(s_feature, vec_action, sub_g_feature, f_s_a, f_s) e_2 = abs(dist_s - target) replay_buffer_2.add( [s_feature, vec_action, ns_feature, sub_g_feature, e_2]) if terminate: break for step in range(args.random_step): s = ns s_feature = agent.get_state_feature(s) action = agent.get_random_action() ns, r, terminate = env.step(action) goal_buffer.store(ns) ns_feature = agent.get_state_feature(ns) vec_action = vectorize_action(action) # print(s_feature.shape, vec_action.shape) # store one step loss # s_a_pred = f_s_a.predict(np.concatenate((s_feature, vec_action), axis=0).reshape((1, -1))) # ns_pred = f_s.predict(np.array(ns_feature).reshape((1, -1))) e_1 = agent.get_dist(s_feature, vec_action, ns_feature, f_s_a, f_s)[0] replay_buffer_1.add([s_feature, vec_action, ns_feature, None, e_1]) # store two step loss sub_g = goal_buffer.sample_batch_goal(size=1, with_weights=False) sub_g_feature = agent.get_states_feature(sub_g)[0] na, min_dist = agent.get_best_action(ns_feature, f_s_a, f_s, sub_g_feature) dist_ns = agent.get_dist(ns_feature, vectorize_action(na), sub_g_feature, f_s_a, f_s) target = dist_ns + 1 dist_s = agent.get_dist(s_feature, vec_action, sub_g_feature, f_s_a, f_s) e_2 = abs(dist_s - target) replay_buffer_2.add( [s_feature, vec_action, ns_feature, sub_g_feature, e_2]) batch_1, _, index_1 = replay_buffer_1.get_batch_data() _, batch_2, index_2 = replay_buffer_2.get_batch_data() loss_1 = torch.mean( torch.norm((f_s_a(batch_1['sa']) - f_s(batch_1['ns'])), dim=1)) na = agent.get_best_actions(batch_2['ns'], f_s_a, f_s, batch_2['g']) target = agent.get_dist(batch_2['ns'], na, batch_2['g'], f_s_a, f_s) + 1 pred = torch.norm((f_s_a(batch_2['sa']) - f_s(batch_2['g'])), dim=1) if (epoch + 1) % 100 == 0: print(pred - target) print(len(replay_buffer_1), len(replay_buffer_2)) # goal_buffer.goal_visualize() loss_2 = torch.mean(torch.abs(pred - target)) # if epoch >= 1500: # args.reg_term = 0.5 loss = (1 - args.reg_term) * loss_1 + args.reg_term * loss_2 log_loss.append(loss) s_a_optimizer.zero_grad() s_optimizer.zero_grad() loss.backward() # nn.utils.clip_grad_norm_(f_s.parameters(), args.grad_clip) # nn.utils.clip_grad_norm_(f_s_a.parameters(), args.grad_clip) s_a_optimizer.step() s_optimizer.step() # Update replay buffer with torch.no_grad(): e_update_1 = torch.norm(torch.FloatTensor( f_s_a.predict(batch_1['sa']) - f_s.predict(batch_1['ns'])), dim=1) na = agent.get_best_actions(batch_2['ns'], f_s_a, f_s, batch_2['g']) target = agent.get_dist(batch_2['ns'], na, batch_2['g'], f_s_a, f_s) + 1 pred = torch.norm(torch.FloatTensor( f_s_a.predict(batch_2['sa']) - f_s.predict(batch_2['g'])), dim=1) e_update_2 = torch.abs(pred - target) replay_buffer_1.update_error(index_1, e_update_1) replay_buffer_2.update_error(index_2, e_update_2) if (epoch + 1) % 100 == 0: print( "epoch number: {}/{}, total_loss: {}, loss_normal: {}, loss_update: {}" .format(epoch + 1, args.epoch_num, loss, loss_1, loss_2)) # if epoch == args.epoch_num - 1: # print(s_a_embed, s_embed) if (epoch + 1) % 1000 == 0 and loss < 1: save_model("./model.pt", f_s_a, f_s, args.epoch_num, loss, s_a_optimizer, s_optimizer) print("Saving model at epoch: {}".format(epoch)) if (epoch + 1) % 10000 == 0: plt.plot(range(epoch + 1), log_loss, color='red') plt.savefig("./{}.pdf".format(int((epoch + 1) / 10000)), dpi=1200, bbox_inches='tight')