コード例 #1
0
def sampling(cfg, env, test_input):
	test_inputs = test_input.repeat(cfg.batch,1,1)
	device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
	act_model = PtrNet1(cfg)
	if os.path.exists(cfg.act_model_path):	
		act_model.load_state_dict(torch.load(cfg.act_model_path, map_location = device))
	act_model = act_model.to(device)
	pred_tours, _ = act_model(test_inputs, device)
	l_batch = env.stack_l(test_inputs, pred_tours)
	index_lmin = torch.argmin(l_batch)
	best_tour = pred_tours[index_lmin]
	return best_tour
コード例 #2
0
def train_model(cfg, env, log_path=None):
    date = datetime.now().strftime('%m%d_%H_%M')
    if cfg.islogger:
        param_path = cfg.log_dir + '%s_%s_param.csv' % (
            date, cfg.task)  # cfg.log_dir = ./Csv/
        print(f'generate {param_path}')
        with open(param_path, 'w') as f:
            f.write(''.join('%s,%s\n' % item for item in vars(cfg).items()))

    act_model = PtrNet1(cfg)
    if cfg.optim == 'Adam':
        act_optim = optim.Adam(act_model.parameters(), lr=cfg.lr)
    if cfg.is_lr_decay:
        act_lr_scheduler = optim.lr_scheduler.StepLR(
            act_optim, step_size=cfg.lr_decay_step, gamma=cfg.lr_decay)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    act_model = act_model.to(device)

    if cfg.mode == 'train':
        cri_model = PtrNet2(cfg)
        if cfg.optim == 'Adam':
            cri_optim = optim.Adam(cri_model.parameters(), lr=cfg.lr)
        if cfg.is_lr_decay:
            cri_lr_scheduler = optim.lr_scheduler.StepLR(
                cri_optim, step_size=cfg.lr_decay_step, gamma=cfg.lr_decay)
        cri_model = cri_model.to(device)
        ave_cri_loss = 0.

    mse_loss = nn.MSELoss()
    dataset = Generator(cfg, env)
    dataloader = DataLoader(dataset, batch_size=cfg.batch, shuffle=True)

    ave_act_loss, ave_L = 0., 0.
    min_L, cnt = 1e7, 0
    t1 = time()
    # for i, inputs in tqdm(enumerate(dataloader)):
    for i, inputs in enumerate(dataloader):
        inputs = inputs.to(device)
        pred_tour, ll = act_model(inputs, device)
        real_l = env.stack_l_fast(inputs, pred_tour)
        if cfg.mode == 'train':
            pred_l = cri_model(inputs, device)
            cri_loss = mse_loss(pred_l, real_l.detach())
            cri_optim.zero_grad()
            cri_loss.backward()
            nn.utils.clip_grad_norm_(cri_model.parameters(),
                                     max_norm=1.,
                                     norm_type=2)
            cri_optim.step()
            if cfg.is_lr_decay:
                cri_lr_scheduler.step()
        elif cfg.mode == 'train_emv':
            if i == 0:
                L = real_l.detach().mean()
            else:
                L = (L * 0.9) + (0.1 * real_l.detach().mean())
            pred_l = L

        adv = real_l.detach() - pred_l.detach()
        act_loss = (adv * ll).mean()
        act_optim.zero_grad()
        act_loss.backward()
        nn.utils.clip_grad_norm_(act_model.parameters(),
                                 max_norm=1.,
                                 norm_type=2)
        act_optim.step()
        if cfg.is_lr_decay:
            act_lr_scheduler.step()

        ave_act_loss += act_loss.item()
        if cfg.mode == 'train':
            ave_cri_loss += cri_loss.item()
        ave_L += real_l.mean().item()

        if i % cfg.log_step == 0:
            t2 = time()
            if cfg.mode == 'train':
                print(
                    'step:%d/%d, actic loss:%1.3f, critic loss:%1.3f, L:%1.3f, %dmin%dsec'
                    % (i, cfg.steps, ave_act_loss / (i + 1), ave_cri_loss /
                       (i + 1), ave_L / (i + 1), (t2 - t1) // 60,
                       (t2 - t1) % 60))
                if cfg.islogger:
                    if log_path is None:
                        log_path = cfg.log_dir + '%s_%s_train.csv' % (
                            date, cfg.task)  # cfg.log_dir = ./Csv/
                        with open(log_path, 'w') as f:
                            f.write(
                                'step,actic loss,critic loss,average distance,time\n'
                            )
                    else:
                        with open(log_path, 'a') as f:
                            f.write('%d,%1.4f,%1.4f,%1.4f,%dmin%dsec\n' %
                                    (i, ave_act_loss / (i + 1), ave_cri_loss /
                                     (i + 1), ave_L / (i + 1), (t2 - t1) // 60,
                                     (t2 - t1) % 60))

            elif cfg.mode == 'train_emv':
                print('step:%d/%d, actic loss:%1.3f, L:%1.3f, %dmin%dsec' %
                      (i, cfg.steps, ave_act_loss / (i + 1), ave_L / (i + 1),
                       (t2 - t1) // 60, (t2 - t1) % 60))
                if cfg.islogger:
                    if log_path is None:
                        log_path = cfg.log_dir + '%s_%s_train_emv.csv' % (
                            date, cfg.task)  # cfg.log_dir = ./Csv/
                        with open(log_path, 'w') as f:
                            f.write('step,actic loss,average distance,time\n')
                    else:
                        with open(log_path, 'a') as f:
                            f.write('%d,%1.4f,%1.4f,%dmin%dsec\n' %
                                    (i, ave_act_loss / (i + 1), ave_L /
                                     (i + 1), (t2 - t1) // 60, (t2 - t1) % 60))
            if (ave_L / (i + 1) < min_L):
                min_L = ave_L / (i + 1)

            else:
                cnt += 1
                print(f'cnt: {cnt}/20')
                if (cnt >= 20):
                    print('early stop, average cost cant decrease anymore')
                    if log_path is not None:
                        with open(log_path, 'a') as f:
                            f.write('\nearly stop')
                    break
            t1 = time()
    if cfg.issaver:
        torch.save(act_model.state_dict(),
                   cfg.model_dir + '%s_%s_step%d_act.pt' %
                   (cfg.task, date, i))  # 'cfg.model_dir = ./Pt/'
        print('save model...')
コード例 #3
0
def train_model(cfg, env, log_path=None):
    torch.autograd.set_detect_anomaly(True)
    date = datetime.now().strftime('%m%d_%H_%M')
    act_model = PtrNet1(cfg)
    if cfg.optim == 'Adam':
        act_optim = optim.Adam(act_model.parameters(), lr=cfg.lr)
    act_lr_scheduler = optim.lr_scheduler.StepLR(act_optim,
                                                 step_size=cfg.lr_decay_step,
                                                 gamma=cfg.lr_decay)

    cri_model = PtrNet2(cfg)
    if cfg.optim == 'Adam':
        cri_optim = optim.Adam(cri_model.parameters(), lr=cfg.lr)
    cri_lr_scheduler = optim.lr_scheduler.StepLR(cri_optim,
                                                 step_size=cfg.lr_decay_step,
                                                 gamma=cfg.lr_decay)
    cri_loss_func = nn.MSELoss()

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    act_model, cri_model = act_model.to(device), cri_model.to(device)

    for i in tqdm(range(cfg.steps)):
        inputs = env.stack_nodes()
        inputs = inputs.to(device)
        pred_tour, neg_log = act_model(inputs, device)
        real_l = env.stack_l(inputs, pred_tour)
        pred_l = cri_model(inputs, device)
        cri_optim.zero_grad()
        cri_loss = cri_loss_func(pred_l, real_l)
        cri_loss.backward()
        nn.utils.clip_grad_norm_(cri_model.parameters(),
                                 max_norm=1,
                                 norm_type=2)
        '''
		calculate norm of gradient, then modify norm to be less than max_norm value
		'''
        cri_optim.step()
        cri_lr_scheduler.step()
        adv = pred_l.detach(
        ) - real_l  # detach();requires_grad = False, prevents the gradient for advantage, actor-model from going into critic-model
        act_optim.zero_grad()
        act_loss = torch.mean(adv * neg_log)
        act_loss.backward()
        nn.utils.clip_grad_norm_(act_model.parameters(),
                                 max_norm=1,
                                 norm_type=2)
        act_optim.step()
        act_lr_scheduler.step()

        if i % 10 == 0:
            print('step:%d, actic loss:%1.3f\n' % (i, act_loss))

        if i % cfg.log_step == 0:
            if cfg.islogger:
                if log_path is None:
                    log_path = cfg.log_dir + 'train_%s.csv' % (
                        date)  #cfg.log_dir = ./Csv/
                    with open(log_path, 'w') as f:
                        f.write('step,actic loss,critic loss,distance\n')
                else:
                    with open(log_path, 'a') as f:
                        f.write(
                            '%d,%1.4f,%1.4f, %1.4f\n' %
                            (i, act_loss.item(), cri_loss.item(), real_l[0]))

            if cfg.issaver:
                torch.save(act_model.state_dict(),
                           cfg.model_dir + '%s_step%d_act.pt' %
                           (date, i))  #'cfg.model_dir = ./Pt/'
コード例 #4
0
def active_search(cfg, env, test_input, log_path=None):
    '''
	active search updates model parameters even during inference on a single input
	test input:(city_t,xy)
	'''
    date = datetime.now().strftime('%m%d_%H_%M')
    test_inputs = test_input.repeat(cfg.batch, 1, 1)
    random_tours = env.stack_random_tours()
    baseline = env.stack_l_fast(test_inputs, random_tours)
    l_min = baseline[0]

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    act_model = PtrNet1(cfg)
    if os.path.exists(cfg.act_model_path):
        act_model.load_state_dict(
            torch.load(cfg.act_model_path, map_location=device))

    if cfg.optim == 'Adam':
        act_optim = optim.Adam(act_model.parameters(), lr=cfg.lr)

    act_model = act_model.to(device)
    baseline = baseline.to(device)

    for i in tqdm(range(cfg.steps)):
        '''
		- page 6/15 in papar
		we randomly shuffle the input sequence before feeding it to our pointer network. 
		This increases the stochasticity of the sampling procedure and leads to large improvements in Active Search.
		'''
        test_inputs = test_inputs.to(device)
        shuffle_inputs = env.shuffle(test_inputs)
        pred_shuffle_tours, neg_log = act_model(shuffle_inputs, device)
        pred_tours = env.back_tours(pred_shuffle_tours, shuffle_inputs,
                                    test_inputs, device)

        l_batch = env.stack_l_fast(test_inputs, pred_tours)

        index_lmin = torch.argmin(l_batch)
        if torch.min(l_batch) != l_batch[index_lmin]:
            raise RuntimeError
        if l_batch[index_lmin] < l_min:
            best_tour = pred_tours[index_lmin]
            print('update best tour, min l(%1.3f -> %1.3f)' %
                  (l_min, l_batch[index_lmin]))
            l_min = l_batch[index_lmin]

        adv = l_batch - baseline
        act_optim.zero_grad()
        act_loss = torch.mean(adv * neg_log)
        '''
		adv(batch) = l_batch(batch) - baseline(batch)
		mean(adv(batch) * neg_log(batch)) -> act_loss(scalar) 
		'''
        act_loss.backward()
        nn.utils.clip_grad_norm_(act_model.parameters(),
                                 max_norm=1.,
                                 norm_type=2)
        act_optim.step()
        baseline = baseline * cfg.alpha + (1 - cfg.alpha) * torch.mean(l_batch,
                                                                       dim=0)
        print('step:%d/%d, actic loss:%1.3f' % (i, cfg.steps, act_loss.data))

        if cfg.islogger:
            if i % cfg.log_step == 0:
                if log_path is None:
                    log_path = cfg.log_dir + 'active_search_%s.csv' % (
                        date)  #cfg.log_dir = ./Csv/
                    with open(log_path, 'w') as f:
                        f.write('step,actic loss,minimum distance\n')
                else:
                    with open(log_path, 'a') as f:
                        f.write('%d,%1.4f,%1.4f\n' % (i, act_loss, l_min))
    return best_tour