distil_ret.append(val) end = time.time() print("Gumbel sampling. Hit :", sum(distil_ret), end - start) rl_file_name = "RL-p%d-t%d-d%d-l[%s, %s]" % ( args.num_procs, args.num_tasks, int(use_deadline), args.range_l, args.range_r) RLModel = Solver( args.num_procs, args.embedding_size, args.hidden_size, args.num_tasks, use_deadline=False, use_cuda=False ) RLModel = RLModel.to("cpu") with open("../Pandamodels/rlmodels/" + rl_file_name + ".torchmodel", "rb") as f: tmp = torch.load(f) RLModel.load_state_dict(tmp.state_dict()) RLModel.eval() SAMPLING_NUMBER = 1 ret = [] rlstart = time.time() for i, batch in eval_loader: _ret = [] for _ in range(SAMPLING_NUMBER): store = [] _, _, actions = RLModel(batch, argmax=True) for j, chosen in enumerate(actions.cpu().numpy()):
# for p in range(args.num_tasks): # order[chosen[p]] = args.num_tasks - p - 1 # if use_cuda: # ret.append(test_module(_batch[j].cpu().numpy(), args.num_procs, order, use_deadline, False)) # else: # ret.append(test_module(_batch[j].numpy(), args.num_procs, order, use_deadline, False)) # print("[Before training][RL model generates %d]" % (np.sum(ret))) linear_model = LinearSolver(args.num_procs, args.num_tasks, args.use_deadline, use_cuda) # TRAIN LOOP if use_cuda: linear_model = linear_model.to("cuda:0") rl_model = rl_model.to("cuda:0") linear_model = linear_model.train() criterion = nn.MSELoss() optimizer = optim.Adam(linear_model.parameters(), lr=5e-3) start = time.time() for epoch in range(args.num_epochs): loss_ = 0 avg_hit = [] for batch_idx, (_, sample_batch) in enumerate(train_loader): optimizer.zero_grad() rewards, probs, action = rl_model(sample_batch) # rl_order = rl_label[batch_idx] rl_order = torch.zeros_like(action) for i in range(rl_order.size(0)): # batch size
def kl_div(n_step): util_range = get_util_range(args.num_procs) trsets = [] tesets = [] on = False for util in util_range: on = False if util == args.range_l: on = True if on: if positive: load_file_name = "../Pandadata/tr/%d-%d/positive/%s" else: load_file_name = "../Pandadata/tr/%d-%d/%s" with open(load_file_name % (args.num_procs, args.num_tasks, util), 'rb') as f: ts = pickle.load(f) trsets.append(ts) with open("../Pandadata/te/%d-%d/%s" % (args.num_procs, args.num_tasks, util), 'rb') as f: ts = pickle.load(f) tesets.append(ts) if util == args.range_r: break train_dataset = Datasets(trsets) test_dataset = Datasets(tesets) train_dataset.setlen(args.num_train_dataset) test_dataset.setlen(args.num_test_dataset) train_loader = DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True ) test_loader = DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True ) eval_loader = DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True ) temp_fname = "localRL-p%d-t%d-d%d-l[%s, %s].torchmodel" % \ (args.num_procs, args.num_tasks, int(use_deadline), args.range_l, args.range_r) model = torch.load("../Pandamodels/localrlmodels/" + temp_fname).cuda() rl_model = Solver( args.num_procs, args.embedding_size, args.hidden_size, args.num_tasks, use_deadline=False, use_cuda=True, ret_embedded_vector=True, ) rl_model.load_state_dict(model.state_dict()) if use_cuda: model = model.cuda() rl_model = rl_model.cuda() rl_model = rl_model.eval() if use_cuda: rl_model = rl_model.to("cuda:0") ss = np.array(list(range(32))) ss2 = np.array(list(reversed(range(32)))) guide = torch.LongTensor(np.array([ss, ss2], dtype=np.int32)).cuda() for epoch in range(args.num_epochs): loss_ = 0 avg_hit = [] for batch_idx, (_, sample_batch) in enumerate(train_loader): sample_batch = sample_batch[:2, :, :] _, actions, distributions = rl_model(sample_batch, guide=guide) break break a = distributions[0][5].detach().cpu().numpy() b = distributions[1][5].detach().cpu().numpy() print(0.5 * np.sum(np.abs(a - b))) exit(0) kl_div = 0 KL_calc = torch.nn.KLDivLoss(reduction="batchmean") actions = actions.squeeze() # Timestep +1 if n_step == 1: for t in range(args.num_tasks - 1): previous_distribution = distributions[t].squeeze() sampled_task = actions[t] previous_distribution[sampled_task] = 0 # renormalized_distribution = previous_distribution renormalized_distribution = torch.log(previous_distribution / torch.sum(previous_distribution)) next_distribution = distributions[t+1] kl_div += KL_calc(renormalized_distribution, next_distribution) # kl_div += torch.nn.KLDivLoss(size_average=False)(renormalized_distribution, next_distribution) return kl_div / (args.num_tasks-1) # Timestep +3 elif n_step == 3: for t in range(args.num_tasks - 3): prev_distribution = distributions[t].squeeze() first_sampled_mask = actions[t] second_sampled_mask = actions[t+1] third_sampled_mask = actions[t+2] prev_distribution[first_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[second_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution = prev_distribution.detach().cpu().numpy() # renormalized_distribution = torch.log(prev_distribution) rl_next_distribution = distributions[t+2] rl_next_distribution = rl_next_distribution.detach().cpu().numpy() kl_div += np.sum(np.abs(prev_distribution - rl_next_distribution)) # kl_div += KL_calc(renormalized_distribution, rl_next_distribution) return kl_div / (args.num_tasks-3) # Timestep +5 else: for t in range(args.num_tasks - 5): prev_distribution = distributions[t].squeeze() first_sampled_mask = actions[t] second_sampled_mask = actions[t+1] third_sampled_mask = actions[t+2] fourth_sampled_mask = actions[t+3] fifth_sampled_mask = actions[t+4] prev_distribution[first_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[second_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[third_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[fourth_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[fifth_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) renormalized_distribution = torch.log(prev_distribution) rl_next_distribution = distributions[t+5] kl_div += KL_calc(renormalized_distribution, rl_next_distribution) return kl_div / (args.num_tasks - 5)