rl_model = Solver(args.num_procs, args.embedding_size, args.hidden_size, args.num_tasks, use_deadline=False, use_cuda=True) rl_model.load_state_dict(tmp.state_dict()) if args.use_cuda: rl_model.cuda() """Freeze the weight of the global reinforcement model""" freezing_param_name = ["init_w", "embedding", "mha"] for name, param in rl_model.named_parameters(): if name.split(".")[1] in freezing_param_name: param.requires_grad = False """Evaluate global model before the training""" rl_model.eval() ret = [] for i, batch in eval_loader: if use_cuda: batch = batch.cuda() R, log_prob, actions = rl_model(batch, argmax=True) for j, chosen in enumerate(actions.cpu().numpy()): order = np.zeros_like(chosen) for k in range(args.num_tasks): order[chosen[k]] = args.num_tasks - k - 1 # 중요할수록 숫자가 높다. if use_cuda: ret.append( test_module(batch[j].cpu().numpy(), args.num_procs, order, args.use_deadline, False)) else: ret.append(
temp_fname = "localRL-p%d-t%d-d%d-l[%s, %s].torchmodel" % \ (args.num_procs, args.num_tasks, int(use_deadline), args.range_l, args.range_r) model = torch.load("../Pandamodels/localrlmodels/" + temp_fname).cuda() rl_model = Solver(args.num_procs, args.embedding_size, args.hidden_size, args.num_tasks, use_deadline=False, use_cuda=True) rl_model.load_state_dict(model.state_dict()) if use_cuda: model = model.cuda() rl_model = rl_model.cuda() rl_model = rl_model.eval() ret = [] # for i, _batch in eval_loader: # if use_cuda: # _batch = _batch.cuda() # R, log_prob, actions = model(_batch, argmax=True) # for j, chosen in enumerate(actions.cpu().numpy()): # order = np.zeros_like(chosen) # for p in range(args.num_tasks): # order[chosen[p]] = args.num_tasks - p - 1 # if use_cuda: # ret.append(test_module(_batch[j].cpu().numpy(), args.num_procs, order, use_deadline, False)) # else: # ret.append(test_module(_batch[j].numpy(), args.num_procs, order, use_deadline, False))
args.hidden_size, args.num_tasks, use_deadline=False, use_cuda=use_cuda) bl_model = Solver(args.num_procs, args.embedding_size, args.hidden_size, args.num_tasks, use_deadline=False, use_cuda=use_cuda) bl_model.load_state_dict(model.state_dict()) if use_cuda: model = model.cuda() bl_model = bl_model.cuda() bl_model = bl_model.eval() def wrap(x): _sample, num_proc, use_deadline = x return heu.OPA( _sample, num_proc, None, use_deadline) # 여기서 OPA에 테스트 안넘겼는데 까고 들어가보면 DA어쩌구 테스트를 사용함. with ProcessPoolExecutor(max_workers=4) as executor: inputs = [] res_opa = np.zeros(len(test_dataset), dtype=int).tolist() for i, sample in test_dataset: #ret = heu.OPA(sample, args.num_procs, heu.test_DA_LC, use_deadline) inputs.append((sample, args.num_procs, use_deadline)) for i, ret in tqdm(enumerate(executor.map(wrap, inputs))): res_opa[i] = ret
if args.use_cuda: model = model.cuda() # Train loop moving_avg = torch.zeros(args.num_train_dataset) if args.use_cuda: moving_avg = moving_avg.cuda() #generating first baseline cc = 1 for (indices, sample_batch) in tqdm(train_data_loader): if args.use_cuda: sample_batch = sample_batch.cuda() rewards, _, _ = model(sample_batch) print(rewards) moving_avg[indices] = rewards.float() model.eval() ret = [] res_tkc = [] res_rm = [] res_opa = [] for i, sample in tqdm(test_dataset): scores = heu.get_DkC_scores(sample, args.num_procs) priority = scores_to_priority(scores) res_tkc.append( heu.test_DA(sample, args.num_procs, priority, use_deadline)) scores = heu.get_DM_scores(sample, args.num_procs) priority = scores_to_priority(scores) res_rm.append( heu.test_DA(sample, args.num_procs, priority, use_deadline))
rl_file_name = "RL-p%d-t%d-d%d-l[%s, %s]" % ( args.num_procs, args.num_tasks, int(use_deadline), args.range_l, args.range_r) RLModel = Solver( args.num_procs, args.embedding_size, args.hidden_size, args.num_tasks, use_deadline=False, use_cuda=False ) RLModel = RLModel.to("cpu") with open("../Pandamodels/rlmodels/" + rl_file_name + ".torchmodel", "rb") as f: tmp = torch.load(f) RLModel.load_state_dict(tmp.state_dict()) RLModel.eval() SAMPLING_NUMBER = 1 ret = [] rlstart = time.time() for i, batch in eval_loader: _ret = [] for _ in range(SAMPLING_NUMBER): store = [] _, _, actions = RLModel(batch, argmax=True) for j, chosen in enumerate(actions.cpu().numpy()): order = np.zeros_like(chosen) for p in range(args.num_tasks): order[chosen[p]] = args.num_tasks - p - 1 store.append(test_module(batch[j].cpu().numpy(), args.num_procs, order, use_deadline, False))
def kl_div(n_step): util_range = get_util_range(args.num_procs) trsets = [] tesets = [] on = False for util in util_range: on = False if util == args.range_l: on = True if on: if positive: load_file_name = "../Pandadata/tr/%d-%d/positive/%s" else: load_file_name = "../Pandadata/tr/%d-%d/%s" with open(load_file_name % (args.num_procs, args.num_tasks, util), 'rb') as f: ts = pickle.load(f) trsets.append(ts) with open("../Pandadata/te/%d-%d/%s" % (args.num_procs, args.num_tasks, util), 'rb') as f: ts = pickle.load(f) tesets.append(ts) if util == args.range_r: break train_dataset = Datasets(trsets) test_dataset = Datasets(tesets) train_dataset.setlen(args.num_train_dataset) test_dataset.setlen(args.num_test_dataset) train_loader = DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True ) test_loader = DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True ) eval_loader = DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True ) temp_fname = "localRL-p%d-t%d-d%d-l[%s, %s].torchmodel" % \ (args.num_procs, args.num_tasks, int(use_deadline), args.range_l, args.range_r) model = torch.load("../Pandamodels/localrlmodels/" + temp_fname).cuda() rl_model = Solver( args.num_procs, args.embedding_size, args.hidden_size, args.num_tasks, use_deadline=False, use_cuda=True, ret_embedded_vector=True, ) rl_model.load_state_dict(model.state_dict()) if use_cuda: model = model.cuda() rl_model = rl_model.cuda() rl_model = rl_model.eval() if use_cuda: rl_model = rl_model.to("cuda:0") ss = np.array(list(range(32))) ss2 = np.array(list(reversed(range(32)))) guide = torch.LongTensor(np.array([ss, ss2], dtype=np.int32)).cuda() for epoch in range(args.num_epochs): loss_ = 0 avg_hit = [] for batch_idx, (_, sample_batch) in enumerate(train_loader): sample_batch = sample_batch[:2, :, :] _, actions, distributions = rl_model(sample_batch, guide=guide) break break a = distributions[0][5].detach().cpu().numpy() b = distributions[1][5].detach().cpu().numpy() print(0.5 * np.sum(np.abs(a - b))) exit(0) kl_div = 0 KL_calc = torch.nn.KLDivLoss(reduction="batchmean") actions = actions.squeeze() # Timestep +1 if n_step == 1: for t in range(args.num_tasks - 1): previous_distribution = distributions[t].squeeze() sampled_task = actions[t] previous_distribution[sampled_task] = 0 # renormalized_distribution = previous_distribution renormalized_distribution = torch.log(previous_distribution / torch.sum(previous_distribution)) next_distribution = distributions[t+1] kl_div += KL_calc(renormalized_distribution, next_distribution) # kl_div += torch.nn.KLDivLoss(size_average=False)(renormalized_distribution, next_distribution) return kl_div / (args.num_tasks-1) # Timestep +3 elif n_step == 3: for t in range(args.num_tasks - 3): prev_distribution = distributions[t].squeeze() first_sampled_mask = actions[t] second_sampled_mask = actions[t+1] third_sampled_mask = actions[t+2] prev_distribution[first_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[second_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution = prev_distribution.detach().cpu().numpy() # renormalized_distribution = torch.log(prev_distribution) rl_next_distribution = distributions[t+2] rl_next_distribution = rl_next_distribution.detach().cpu().numpy() kl_div += np.sum(np.abs(prev_distribution - rl_next_distribution)) # kl_div += KL_calc(renormalized_distribution, rl_next_distribution) return kl_div / (args.num_tasks-3) # Timestep +5 else: for t in range(args.num_tasks - 5): prev_distribution = distributions[t].squeeze() first_sampled_mask = actions[t] second_sampled_mask = actions[t+1] third_sampled_mask = actions[t+2] fourth_sampled_mask = actions[t+3] fifth_sampled_mask = actions[t+4] prev_distribution[first_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[second_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[third_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[fourth_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) prev_distribution[fifth_sampled_mask] = 0 prev_distribution = prev_distribution / torch.sum(prev_distribution) renormalized_distribution = torch.log(prev_distribution) rl_next_distribution = distributions[t+5] kl_div += KL_calc(renormalized_distribution, rl_next_distribution) return kl_div / (args.num_tasks - 5)