Exemple #1
0
def validate(vali_set, model):
    N_JOBS = vali_set[0][0].shape[0]
    N_MACHINES = vali_set[0][0].shape[1]

    from JSSP_Env import SJSSP
    from mb_agg import g_pool_cal
    from agent_utils import sample_select_action
    from agent_utils import greedy_select_action
    import numpy as np
    import torch
    from Params import configs
    env = SJSSP(n_j=N_JOBS, n_m=N_MACHINES)
    device = torch.device(configs.device)
    g_pool_step = g_pool_cal(
        graph_pool_type=configs.graph_pool_type,
        batch_size=torch.Size([1, env.number_of_tasks, env.number_of_tasks]),
        n_nodes=env.number_of_tasks,
        device=device)
    make_spans = []
    # rollout using model
    for data in vali_set:
        adj, fea, candidate, mask = env.reset(data)
        rewards = -env.initQuality
        while True:
            fea_tensor = torch.from_numpy(np.copy(fea)).to(device)
            adj_tensor = torch.from_numpy(np.copy(adj)).to(device).to_sparse()
            candidate_tensor = torch.from_numpy(np.copy(candidate)).to(device)
            mask_tensor = torch.from_numpy(np.copy(mask)).to(device)
            with torch.no_grad():
                pi, _ = model(x=fea_tensor,
                              graph_pool=g_pool_step,
                              padded_nei=None,
                              adj=adj_tensor,
                              candidate=candidate_tensor.unsqueeze(0),
                              mask=mask_tensor.unsqueeze(0))
            # action = sample_select_action(pi, candidate)
            action = greedy_select_action(pi, candidate)
            adj, fea, reward, done, candidate, mask = env.step(action.item())
            rewards += reward
            if done:
                break
        make_spans.append(rewards - env.posRewards)
        # print(rewards - env.posRewards)
    return np.array(make_spans)
Exemple #2
0
                    type=int,
                    default=200,
                    help='Seed for validate set generation')
params = parser.parse_args()

N_JOBS_P = params.Pn_j
N_MACHINES_P = params.Pn_m
LOW = params.low
HIGH = params.high
SEED = params.seed
N_JOBS_N = params.Nn_j
N_MACHINES_N = params.Nn_m

from JSSP_Env import SJSSP
from PPO_jssp_multiInstances import PPO
env = SJSSP(n_j=N_JOBS_P, n_m=N_MACHINES_P)

ppo = PPO(
    configs.lr,
    configs.gamma,
    configs.k_epochs,
    configs.eps_clip,
    n_j=N_JOBS_P,
    n_m=N_MACHINES_P,
    num_layers=configs.num_layers,
    neighbor_pooling_type=configs.neighbor_pooling_type,
    input_dim=configs.input_dim,
    hidden_dim=configs.hidden_dim,
    num_mlp_layers_feature_extract=configs.num_mlp_layers_feature_extract,
    num_mlp_layers_actor=configs.num_mlp_layers_actor,
    hidden_dim_actor=configs.hidden_dim_actor,
Exemple #3
0
import numpy as np
from JSSP_Env import SJSSP
from uniform_instance_gen import uni_instance_gen
from Params import configs
import time

n_j = 200
n_m = 50
low = 1
high = 99
SEED = 11
np.random.seed(SEED)
env = SJSSP(n_j=n_j, n_m=n_m)

# rollout env random action
t1 = time.time()
data = uni_instance_gen(n_j=n_j, n_m=n_m, low=low, high=high)
dur = np.array([[83, 65, 3], [69, 42, 64], [27, 27, 18]])
mch = np.array([[3, 2, 1], [1, 2, 3], [2, 1, 3]])
# data = (dur, mch)
print('Dur')
print(data[0])
print('Mach')
print(data[-1])
print()
_, _, omega, mask = env.reset(data)
# print('Init end time')
# print(env.LBs)
# print()
rewards = [-env.initQuality]
while True:
def main():

    from JSSP_Env import SJSSP
    envs = [
        SJSSP(n_j=configs.n_j, n_m=configs.n_m)
        for _ in range(configs.num_envs)
    ]

    from uniform_instance_gen import uni_instance_gen
    data_generator = uni_instance_gen

    dataLoaded = np.load('./DataGen/generatedData' + str(configs.n_j) + '_' +
                         str(configs.n_m) + '_Seed' +
                         str(configs.np_seed_validation) + '.npy')
    vali_data = []
    for i in range(dataLoaded.shape[0]):
        vali_data.append((dataLoaded[i][0], dataLoaded[i][1]))

    torch.manual_seed(configs.torch_seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(configs.torch_seed)
    np.random.seed(configs.np_seed_train)

    memories = [Memory() for _ in range(configs.num_envs)]

    ppo = PPO(
        configs.lr,
        configs.gamma,
        configs.k_epochs,
        configs.eps_clip,
        n_j=configs.n_j,
        n_m=configs.n_m,
        num_layers=configs.num_layers,
        neighbor_pooling_type=configs.neighbor_pooling_type,
        input_dim=configs.input_dim,
        hidden_dim=configs.hidden_dim,
        num_mlp_layers_feature_extract=configs.num_mlp_layers_feature_extract,
        num_mlp_layers_actor=configs.num_mlp_layers_actor,
        hidden_dim_actor=configs.hidden_dim_actor,
        num_mlp_layers_critic=configs.num_mlp_layers_critic,
        hidden_dim_critic=configs.hidden_dim_critic)

    g_pool_step = g_pool_cal(graph_pool_type=configs.graph_pool_type,
                             batch_size=torch.Size([
                                 1, configs.n_j * configs.n_m,
                                 configs.n_j * configs.n_m
                             ]),
                             n_nodes=configs.n_j * configs.n_m,
                             device=device)
    # training loop
    log = []
    validation_log = []
    optimal_gaps = []
    optimal_gap = 1
    record = 100000
    for i_update in range(configs.max_updates):

        t3 = time.time()

        ep_rewards = [0 for _ in range(configs.num_envs)]
        adj_envs = []
        fea_envs = []
        candidate_envs = []
        mask_envs = []

        for i, env in enumerate(envs):
            adj, fea, candidate, mask = env.reset(
                data_generator(n_j=configs.n_j,
                               n_m=configs.n_m,
                               low=configs.low,
                               high=configs.high))
            adj_envs.append(adj)
            fea_envs.append(fea)
            candidate_envs.append(candidate)
            mask_envs.append(mask)
            ep_rewards[i] = -env.initQuality
        # rollout the env
        while True:
            fea_tensor_envs = [
                torch.from_numpy(np.copy(fea)).to(device) for fea in fea_envs
            ]
            adj_tensor_envs = [
                torch.from_numpy(np.copy(adj)).to(device).to_sparse()
                for adj in adj_envs
            ]
            candidate_tensor_envs = [
                torch.from_numpy(np.copy(candidate)).to(device)
                for candidate in candidate_envs
            ]
            mask_tensor_envs = [
                torch.from_numpy(np.copy(mask)).to(device)
                for mask in mask_envs
            ]

            with torch.no_grad():
                action_envs = []
                a_idx_envs = []
                for i in range(configs.num_envs):
                    pi, _ = ppo.policy_old(
                        x=fea_tensor_envs[i],
                        graph_pool=g_pool_step,
                        padded_nei=None,
                        adj=adj_tensor_envs[i],
                        candidate=candidate_tensor_envs[i].unsqueeze(0),
                        mask=mask_tensor_envs[i].unsqueeze(0))
                    action, a_idx = select_action(pi, candidate_envs[i],
                                                  memories[i])
                    action_envs.append(action)
                    a_idx_envs.append(a_idx)

            adj_envs = []
            fea_envs = []
            candidate_envs = []
            mask_envs = []
            # Saving episode data
            for i in range(configs.num_envs):
                memories[i].adj_mb.append(adj_tensor_envs[i])
                memories[i].fea_mb.append(fea_tensor_envs[i])
                memories[i].candidate_mb.append(candidate_tensor_envs[i])
                memories[i].mask_mb.append(mask_tensor_envs[i])
                memories[i].a_mb.append(a_idx_envs[i])

                adj, fea, reward, done, candidate, mask = envs[i].step(
                    action_envs[i].item())

                adj_envs.append(adj)
                fea_envs.append(fea)
                candidate_envs.append(candidate)
                mask_envs.append(mask)
                ep_rewards[i] += reward
                memories[i].r_mb.append(reward)
                memories[i].done_mb.append(done)
            if envs[0].done():
                break
        for j in range(configs.num_envs):
            ep_rewards[j] -= envs[j].posRewards

        loss, v_loss = ppo.update(memories, configs.n_j * configs.n_m,
                                  configs.graph_pool_type)
        for memory in memories:
            memory.clear_memory()
        mean_rewards_all_env = sum(ep_rewards) / len(ep_rewards)
        log.append([i_update, mean_rewards_all_env])
        if i_update % 100 == 0:
            file_writing_obj = open(
                './' + 'log_' + str(configs.n_j) + '_' + str(configs.n_m) +
                '_' + str(configs.low) + '_' + str(configs.high) + '.txt', 'w')
            file_writing_obj.write(str(log))

        # log results
        print('Episode {}\t Last reward: {:.2f}\t Mean_Vloss: {:.8f}'.format(
            i_update, mean_rewards_all_env, v_loss))

        # validate and save use mean performance
        t4 = time.time()
        if i_update % 99 == 0:
            vali_result = -validate(vali_data, ppo.policy).mean()
            validation_log.append(vali_result)
            if vali_result < record:
                torch.save(
                    ppo.policy.state_dict(), './{}.pth'.format(
                        str(configs.n_j) + '_' + str(configs.n_m) + '_' +
                        str(configs.low) + '_' + str(configs.high)))
                record = vali_result
            print('The validation quality is:', vali_result)
            file_writing_obj1 = open(
                './' + 'vali_' + str(configs.n_j) + '_' + str(configs.n_m) +
                '_' + str(configs.low) + '_' + str(configs.high) + '.txt', 'w')
            file_writing_obj1.write(str(validation_log))
        t5 = time.time()
Exemple #5
0
                    type=str,
                    default='tai',
                    help='Which benchmark to test')
params = parser.parse_args()

N_JOBS_P = params.Pn_j
N_MACHINES_P = params.Pn_m
benchmark = params.which_benchmark
N_JOBS_N = params.Nn_j
N_MACHINES_N = params.Nn_m
LOW = configs.low
HIGH = configs.high

from JSSP_Env import SJSSP
from PPO_jssp_multiInstances import PPO
env = SJSSP(n_j=N_JOBS_P, n_m=N_MACHINES_P)

ppo = PPO(
    configs.lr,
    configs.gamma,
    configs.k_epochs,
    configs.eps_clip,
    n_j=N_JOBS_P,
    n_m=N_MACHINES_P,
    num_layers=configs.num_layers,
    neighbor_pooling_type=configs.neighbor_pooling_type,
    input_dim=configs.input_dim,
    hidden_dim=configs.hidden_dim,
    num_mlp_layers_feature_extract=configs.num_mlp_layers_feature_extract,
    num_mlp_layers_actor=configs.num_mlp_layers_actor,
    hidden_dim_actor=configs.hidden_dim_actor,