def train_tsp(args): # Goals: # TSP20, 3.82 (Optimal) - 3.97 (DRL4VRP) # TSP50, 5.70 (Optimal) - 6.08 (DRL4VRP) # TSP100, 7.77 (OptimalBS) - 8.44 (DRL4VRP) from tasks import tsp from tasks.tsp import TSPDataset STATIC_SIZE = 2 # (x, y) DYNAMIC_SIZE = 1 # dummy for compatability train_data = TSPDataset(args['num_nodes'], args['train_size'], args['seed']) valid_data = TSPDataset(args['num_nodes'], args['valid_size'], args['seed']) args['train_data'] = train_data args['valid_data'] = valid_data args['reward_fn'] = tsp.reward args['render_fn'] = tsp.render mask_fn = tsp.update_mask update_fn = None actor = DRL4TSP(STATIC_SIZE, DYNAMIC_SIZE, args['hidden_size'], update_fn, mask_fn, args['num_layers'], args['dropout']).to(device) #critic = Critic(args['hidden_size']).to(device) critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, args['hidden_size']).to(device) train(actor, critic, **args)
def train_tsp(args, w1=1, w2=0, checkpoint=None): # Goals from paper: # TSP20, 3.97 # TSP50, 6.08 # TSP100, 8.44 from tasks import motsp from tasks.motsp import TSPDataset STATIC_SIZE = 4 # (x, y) DYNAMIC_SIZE = 1 # dummy for compatibility train_data = TSPDataset(args.num_nodes, args.train_size, args.seed) valid_data = TSPDataset(args.num_nodes, args.valid_size, args.seed + 1) update_fn = None actor = DRL4TSP(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size, update_fn, motsp.update_mask, args.num_layers, args.dropout).to(device) critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size).to(device) kwargs = vars(args) kwargs['train_data'] = train_data kwargs['valid_data'] = valid_data kwargs['reward_fn'] = motsp.reward kwargs['render_fn'] = motsp.render if checkpoint: path = os.path.join(checkpoint, 'actor.pt') actor.load_state_dict(torch.load(path, device)) # actor.static_encoder.state_dict().get("conv.weight").size() path = os.path.join(checkpoint, 'critic.pt') critic.load_state_dict(torch.load(path, device)) if not args.test: train(actor, critic, w1, w2, **kwargs) test_data = TSPDataset(args.num_nodes, args.valid_size, args.seed + 2) test_dir = 'test' test_loader = DataLoader(test_data, args.valid_size, False, num_workers=0) out = validate(test_loader, actor, motsp.reward, w1, w2, motsp.render, test_dir, num_plot=5) print('w1=%2.2f,w2=%2.2f. Average tour length: ' % (w1, w2), out)
def __init__(self, args): # Setting maximum capacity for vehicles based on the number of nodes chosen. LOAD_DICT = {10: 20, 20: 30, 50: 40, 100: 50} MAX_DEMAND = 9 STATIC_SIZE = 2 # (x, y) DYNAMIC_SIZE = 2 # (load, demand) max_load = LOAD_DICT[args.num_nodes] train_data = VehicleRoutingDataset(args.train_size, args.num_nodes, max_load, MAX_DEMAND, args.seed) valid_data = VehicleRoutingDataset(args.valid_size, args.num_nodes, max_load, MAX_DEMAND, args.seed + 1) actor = DRL4TSP(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size, train_data.update_dynamic, train_data.update_mask, args.num_layers, args.dropout).to(device) critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size).to(device) kwargs = vars(args) kwargs['train_data'] = train_data kwargs['valid_data'] = valid_data kwargs['reward_fn'] = vrp.reward kwargs['render_fn'] = vrp.render if args.checkpoint: path = os.path.join(args.checkpoint, 'actor.pt') actor.load_state_dict(torch.load(path, device)) path = os.path.join(args.checkpoint, 'critic.pt') critic.load_state_dict(torch.load(path, device)) if not args.test: self.train(actor, critic, **kwargs) elif not args.checkpoint: raise TestError() test_data = VehicleRoutingDataset(args.valid_size, args.num_nodes, max_load, MAX_DEMAND, args.seed + 2) test_dir = args.test_dir print("Saving test results to {}".format(test_dir)) test_loader = DataLoader(test_data, args.batch_size, False, num_workers=0) out = self.validate(test_loader, actor, vrp.reward, vrp.render, test_dir, num_plot=5) print('Average tour length: ', out)
def train_tsp(args): # Goals from paper: # TSP20, 3.97 # TSP50, 6.08 # TSP100, 8.44 from tasks import tsp from tasks.tsp import TSPDataset STATIC_SIZE = 2 # (x, y) DYNAMIC_SIZE = 1 # dummy for compatibility train_data = TSPDataset(args.num_nodes, args.train_size, args.seed) valid_data = TSPDataset(args.num_nodes, args.valid_size, args.seed + 1) update_fn = None actor = DRL4TSP(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size, update_fn, tsp.update_mask, args.num_layers, args.dropout).to(device) critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size).to(device) kwargs = vars(args) kwargs['train_data'] = train_data kwargs['valid_data'] = valid_data kwargs['reward_fn'] = tsp.reward kwargs['render_fn'] = tsp.render if args.checkpoint: path = os.path.join(args.checkpoint, 'actor.pt') actor.load_state_dict(torch.load(path, device)) path = os.path.join(args.checkpoint, 'critic.pt') critic.load_state_dict(torch.load(path, device)) if not args.test: train(actor, critic, **kwargs) test_data = TSPDataset(args.num_nodes, args.train_size, args.seed + 2) test_dir = 'test' test_loader = DataLoader(test_data, args.batch_size, False, num_workers=0) out = validate(test_loader, actor, tsp.reward, tsp.render, test_dir, num_plot=5) print('Average tour length: ', out)
def train_vrp(args): # Goals: # VRP10, Capacity 20: 4.65 (BS) - 4.80 (Greedy) # VRP20, Capacity 30: 6.34 (BS) - 6.51 (Greedy) # VRP50, Capacity 40: 11.08 (BS) - 11.32 (Greedy) # VRP100, Capacity 50: 16.86 (BS) - 17.12 (Greedy) from tasks import vrp from tasks.vrp import VehicleRoutingDataset # Determines the maximum amount of load for a vehicle based on num nodes LOAD_DICT = {10: 20, 20: 30, 50: 40, 100: 50} MAX_DEMAND = 9 STATIC_SIZE = 2 # (x, y) DYNAMIC_SIZE = 2 # (load, demand) max_load = LOAD_DICT[args['num_nodes']] train_data = VehicleRoutingDataset(args['train_size'], args['num_nodes'], max_load, MAX_DEMAND, args['seed']) valid_data = VehicleRoutingDataset(args['valid_size'], args['num_nodes'], max_load, MAX_DEMAND, args['seed']) args['train_data'] = train_data args['valid_data'] = valid_data args['reward_fn'] = vrp.reward args['render_fn'] = vrp.render actor = DRL4TSP(STATIC_SIZE, DYNAMIC_SIZE, args['hidden_size'], train_data.update_dynamic, train_data.update_mask, args['num_layers'], args['dropout']).to(device) #critic = Critic(args['hidden_size']).to(device) critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, args['hidden_size']).to(device) train(actor, critic, **args) '''
def train_vrp(args): # Goals from paper: # VRP10, Capacity 20: 4.84 (Greedy) # VRP20, Capacity 30: 6.59 (Greedy) # VRP50, Capacity 40: 11.39 (Greedy) # VRP100, Capacity 50: 17.23 (Greedy) from tasks import vrp from tasks.vrp import VehicleRoutingDataset # Determines the maximum amount of load for a vehicle based on num nodes LOAD_DICT = {10: 20, 20: 30, 50: 40, 100: 50} MAX_DEMAND = 9 STATIC_SIZE = 2 # (x, y) DYNAMIC_SIZE = 2 # (load, demand) max_load = LOAD_DICT[args.num_nodes] train_data = VehicleRoutingDataset(args.train_size, args.num_nodes, max_load, MAX_DEMAND, args.seed) valid_data = VehicleRoutingDataset(args.valid_size, args.num_nodes, max_load, MAX_DEMAND, args.seed + 1) actor = DRL4TSP(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size, train_data.update_dynamic, train_data.update_mask, args.num_layers, args.dropout).to(device) critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, args.hidden_size).to(device) kwargs = vars(args) kwargs['train_data'] = train_data kwargs['valid_data'] = valid_data kwargs['reward_fn'] = vrp.reward kwargs['render_fn'] = vrp.render if args.checkpoint: path = os.path.join(args.checkpoint, 'actor.pt') actor.load_state_dict(torch.load(path, device)) path = os.path.join(args.checkpoint, 'critic.pt') critic.load_state_dict(torch.load(path, device)) if not args.test: train(actor, critic, **kwargs) test_data = VehicleRoutingDataset(args.valid_size, args.num_nodes, max_load, MAX_DEMAND, args.seed + 2) test_dir = 'test' test_loader = DataLoader(test_data, args.batch_size, False, num_workers=0) out = validate(test_loader, actor, vrp.reward, vrp.render, test_dir, num_plot=5) print('Average tour length: ', out)
# It is convenient to visualize it in matlab device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # "../tsp_transfer_100run_500000_5epoch_20city/20"效果一般。应该再训练一遍 save_dir = "../tsp_transfer_100run_500000_5epoch_40city/40" # save_dir = "../tsp_transfer/100" # param update_fn = None STATIC_SIZE = 4 # (x, y) DYNAMIC_SIZE = 1 # dummy for compatibility # claim model actor = DRL4TSP(STATIC_SIZE, DYNAMIC_SIZE, 128, update_fn, motsp.update_mask, 1, 0.1).to(device) critic = StateCritic(STATIC_SIZE, DYNAMIC_SIZE, 128).to(device) # data 143 from Post_process.convet_kro_dataloader import Kro_dataset kro = 1 D = 200 if kro: D = 200 Test_data = Kro_dataset(D) Test_loader = DataLoader(Test_data, 1, False, num_workers=0) else: # 40city_train: city20 13 city40 143 city70 2523
import os from model import DRL4TSP, Encoder import argparse from tasks import motsp from trainer_motsp import StateCritic device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') STATIC_SIZE_original = 2 # (x, y) STATIC_SIZE = 3 # (x, y) DYNAMIC_SIZE = 1 # dummy for compatibility update_fn = None hidden_size = 128 num_layers = 1 dropout = 0.1 checkpoint = "tsp20" actor = DRL4TSP(STATIC_SIZE_original, DYNAMIC_SIZE, hidden_size, update_fn, motsp.update_mask, num_layers, dropout).to(device) critic = StateCritic(STATIC_SIZE_original, DYNAMIC_SIZE, hidden_size).to(device) # 加载原128*2*1的原模型 path = os.path.join(checkpoint, 'actor.pt') actor.load_state_dict(torch.load(path, device)) path = os.path.join(checkpoint, 'critic.pt') critic.load_state_dict(torch.load(path, device)) # 其中actor的static_encoder,decoder需要更改维度,critic需要更改维度 # static_encoder static_parameter = actor.static_encoder.state_dict() temp = static_parameter['conv.weight'] temp = torch.cat([temp, temp[:, 1, :].unsqueeze(1)], dim=1) # 在第二维拓展一列 static_parameter['conv.weight'] = temp