def validate(vali_set, model): N_JOBS = vali_set[0][0].shape[0] N_MACHINES = vali_set[0][0].shape[1] from JSSP_Env import SJSSP from mb_agg import g_pool_cal from agent_utils import sample_select_action from agent_utils import greedy_select_action import numpy as np import torch from Params import configs env = SJSSP(n_j=N_JOBS, n_m=N_MACHINES) device = torch.device(configs.device) g_pool_step = g_pool_cal( graph_pool_type=configs.graph_pool_type, batch_size=torch.Size([1, env.number_of_tasks, env.number_of_tasks]), n_nodes=env.number_of_tasks, device=device) make_spans = [] # rollout using model for data in vali_set: adj, fea, candidate, mask = env.reset(data) rewards = -env.initQuality while True: fea_tensor = torch.from_numpy(np.copy(fea)).to(device) adj_tensor = torch.from_numpy(np.copy(adj)).to(device).to_sparse() candidate_tensor = torch.from_numpy(np.copy(candidate)).to(device) mask_tensor = torch.from_numpy(np.copy(mask)).to(device) with torch.no_grad(): pi, _ = model(x=fea_tensor, graph_pool=g_pool_step, padded_nei=None, adj=adj_tensor, candidate=candidate_tensor.unsqueeze(0), mask=mask_tensor.unsqueeze(0)) # action = sample_select_action(pi, candidate) action = greedy_select_action(pi, candidate) adj, fea, reward, done, candidate, mask = env.step(action.item()) rewards += reward if done: break make_spans.append(rewards - env.posRewards) # print(rewards - env.posRewards) return np.array(make_spans)
type=int, default=200, help='Seed for validate set generation') params = parser.parse_args() N_JOBS_P = params.Pn_j N_MACHINES_P = params.Pn_m LOW = params.low HIGH = params.high SEED = params.seed N_JOBS_N = params.Nn_j N_MACHINES_N = params.Nn_m from JSSP_Env import SJSSP from PPO_jssp_multiInstances import PPO env = SJSSP(n_j=N_JOBS_P, n_m=N_MACHINES_P) ppo = PPO( configs.lr, configs.gamma, configs.k_epochs, configs.eps_clip, n_j=N_JOBS_P, n_m=N_MACHINES_P, num_layers=configs.num_layers, neighbor_pooling_type=configs.neighbor_pooling_type, input_dim=configs.input_dim, hidden_dim=configs.hidden_dim, num_mlp_layers_feature_extract=configs.num_mlp_layers_feature_extract, num_mlp_layers_actor=configs.num_mlp_layers_actor, hidden_dim_actor=configs.hidden_dim_actor,
import numpy as np from JSSP_Env import SJSSP from uniform_instance_gen import uni_instance_gen from Params import configs import time n_j = 200 n_m = 50 low = 1 high = 99 SEED = 11 np.random.seed(SEED) env = SJSSP(n_j=n_j, n_m=n_m) # rollout env random action t1 = time.time() data = uni_instance_gen(n_j=n_j, n_m=n_m, low=low, high=high) dur = np.array([[83, 65, 3], [69, 42, 64], [27, 27, 18]]) mch = np.array([[3, 2, 1], [1, 2, 3], [2, 1, 3]]) # data = (dur, mch) print('Dur') print(data[0]) print('Mach') print(data[-1]) print() _, _, omega, mask = env.reset(data) # print('Init end time') # print(env.LBs) # print() rewards = [-env.initQuality] while True:
def main(): from JSSP_Env import SJSSP envs = [ SJSSP(n_j=configs.n_j, n_m=configs.n_m) for _ in range(configs.num_envs) ] from uniform_instance_gen import uni_instance_gen data_generator = uni_instance_gen dataLoaded = np.load('./DataGen/generatedData' + str(configs.n_j) + '_' + str(configs.n_m) + '_Seed' + str(configs.np_seed_validation) + '.npy') vali_data = [] for i in range(dataLoaded.shape[0]): vali_data.append((dataLoaded[i][0], dataLoaded[i][1])) torch.manual_seed(configs.torch_seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(configs.torch_seed) np.random.seed(configs.np_seed_train) memories = [Memory() for _ in range(configs.num_envs)] ppo = PPO( configs.lr, configs.gamma, configs.k_epochs, configs.eps_clip, n_j=configs.n_j, n_m=configs.n_m, num_layers=configs.num_layers, neighbor_pooling_type=configs.neighbor_pooling_type, input_dim=configs.input_dim, hidden_dim=configs.hidden_dim, num_mlp_layers_feature_extract=configs.num_mlp_layers_feature_extract, num_mlp_layers_actor=configs.num_mlp_layers_actor, hidden_dim_actor=configs.hidden_dim_actor, num_mlp_layers_critic=configs.num_mlp_layers_critic, hidden_dim_critic=configs.hidden_dim_critic) g_pool_step = g_pool_cal(graph_pool_type=configs.graph_pool_type, batch_size=torch.Size([ 1, configs.n_j * configs.n_m, configs.n_j * configs.n_m ]), n_nodes=configs.n_j * configs.n_m, device=device) # training loop log = [] validation_log = [] optimal_gaps = [] optimal_gap = 1 record = 100000 for i_update in range(configs.max_updates): t3 = time.time() ep_rewards = [0 for _ in range(configs.num_envs)] adj_envs = [] fea_envs = [] candidate_envs = [] mask_envs = [] for i, env in enumerate(envs): adj, fea, candidate, mask = env.reset( data_generator(n_j=configs.n_j, n_m=configs.n_m, low=configs.low, high=configs.high)) adj_envs.append(adj) fea_envs.append(fea) candidate_envs.append(candidate) mask_envs.append(mask) ep_rewards[i] = -env.initQuality # rollout the env while True: fea_tensor_envs = [ torch.from_numpy(np.copy(fea)).to(device) for fea in fea_envs ] adj_tensor_envs = [ torch.from_numpy(np.copy(adj)).to(device).to_sparse() for adj in adj_envs ] candidate_tensor_envs = [ torch.from_numpy(np.copy(candidate)).to(device) for candidate in candidate_envs ] mask_tensor_envs = [ torch.from_numpy(np.copy(mask)).to(device) for mask in mask_envs ] with torch.no_grad(): action_envs = [] a_idx_envs = [] for i in range(configs.num_envs): pi, _ = ppo.policy_old( x=fea_tensor_envs[i], graph_pool=g_pool_step, padded_nei=None, adj=adj_tensor_envs[i], candidate=candidate_tensor_envs[i].unsqueeze(0), mask=mask_tensor_envs[i].unsqueeze(0)) action, a_idx = select_action(pi, candidate_envs[i], memories[i]) action_envs.append(action) a_idx_envs.append(a_idx) adj_envs = [] fea_envs = [] candidate_envs = [] mask_envs = [] # Saving episode data for i in range(configs.num_envs): memories[i].adj_mb.append(adj_tensor_envs[i]) memories[i].fea_mb.append(fea_tensor_envs[i]) memories[i].candidate_mb.append(candidate_tensor_envs[i]) memories[i].mask_mb.append(mask_tensor_envs[i]) memories[i].a_mb.append(a_idx_envs[i]) adj, fea, reward, done, candidate, mask = envs[i].step( action_envs[i].item()) adj_envs.append(adj) fea_envs.append(fea) candidate_envs.append(candidate) mask_envs.append(mask) ep_rewards[i] += reward memories[i].r_mb.append(reward) memories[i].done_mb.append(done) if envs[0].done(): break for j in range(configs.num_envs): ep_rewards[j] -= envs[j].posRewards loss, v_loss = ppo.update(memories, configs.n_j * configs.n_m, configs.graph_pool_type) for memory in memories: memory.clear_memory() mean_rewards_all_env = sum(ep_rewards) / len(ep_rewards) log.append([i_update, mean_rewards_all_env]) if i_update % 100 == 0: file_writing_obj = open( './' + 'log_' + str(configs.n_j) + '_' + str(configs.n_m) + '_' + str(configs.low) + '_' + str(configs.high) + '.txt', 'w') file_writing_obj.write(str(log)) # log results print('Episode {}\t Last reward: {:.2f}\t Mean_Vloss: {:.8f}'.format( i_update, mean_rewards_all_env, v_loss)) # validate and save use mean performance t4 = time.time() if i_update % 99 == 0: vali_result = -validate(vali_data, ppo.policy).mean() validation_log.append(vali_result) if vali_result < record: torch.save( ppo.policy.state_dict(), './{}.pth'.format( str(configs.n_j) + '_' + str(configs.n_m) + '_' + str(configs.low) + '_' + str(configs.high))) record = vali_result print('The validation quality is:', vali_result) file_writing_obj1 = open( './' + 'vali_' + str(configs.n_j) + '_' + str(configs.n_m) + '_' + str(configs.low) + '_' + str(configs.high) + '.txt', 'w') file_writing_obj1.write(str(validation_log)) t5 = time.time()
type=str, default='tai', help='Which benchmark to test') params = parser.parse_args() N_JOBS_P = params.Pn_j N_MACHINES_P = params.Pn_m benchmark = params.which_benchmark N_JOBS_N = params.Nn_j N_MACHINES_N = params.Nn_m LOW = configs.low HIGH = configs.high from JSSP_Env import SJSSP from PPO_jssp_multiInstances import PPO env = SJSSP(n_j=N_JOBS_P, n_m=N_MACHINES_P) ppo = PPO( configs.lr, configs.gamma, configs.k_epochs, configs.eps_clip, n_j=N_JOBS_P, n_m=N_MACHINES_P, num_layers=configs.num_layers, neighbor_pooling_type=configs.neighbor_pooling_type, input_dim=configs.input_dim, hidden_dim=configs.hidden_dim, num_mlp_layers_feature_extract=configs.num_mlp_layers_feature_extract, num_mlp_layers_actor=configs.num_mlp_layers_actor, hidden_dim_actor=configs.hidden_dim_actor,