def run(opts): # Pretty print the run args pp.pprint(vars(opts)) # Set the random seed torch.manual_seed(opts.seed) # Optionally configure tensorboard tb_logger = None if not opts.no_tensorboard: tb_logger = TbLogger( os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name)) os.makedirs(opts.save_dir) # Save arguments so exact configuration can always be found with open(os.path.join(opts.save_dir, "args.json"), 'w') as f: json.dump(vars(opts), f, indent=True) # Set the device opts.device = torch.device("cuda:0" if opts.use_cuda else "cpu") # Figure out what's the problem problem = load_problem(opts.problem) # Load data from load_path load_data = {} assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given" load_path = opts.load_path if opts.load_path is not None else opts.resume if load_path is not None: print(' [*] Loading data from {}'.format(load_path)) load_data = torch_load_cpu(load_path) # Initialize model model_class = { 'attention': AttentionModel, 'pointer': PointerNetwork }.get(opts.model, None) assert model_class is not None, "Unknown model: {}".format(model_class) model = model_class(opts.embedding_dim, opts.hidden_dim, problem, n_encode_layers=opts.n_encode_layers, mask_inner=True, mask_logits=True, normalization=opts.normalization, tanh_clipping=opts.tanh_clipping, checkpoint_encoder=opts.checkpoint_encoder, shrink_size=opts.shrink_size, steps=opts.awe_steps, graph_size=opts.graph_size).to(opts.device) if opts.use_cuda and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # Overwrite model parameters by parameters to load model_ = get_inner_model(model) model_.load_state_dict({ **model_.state_dict(), **load_data.get('model', {}) }) # Initialize baseline if opts.baseline == 'exponential': baseline = ExponentialBaseline(opts.exp_beta) elif opts.baseline == 'constant': baseline = ConstantBaseline() elif opts.baseline == 'critic' or opts.baseline == 'critic_lstm': assert problem.NAME == 'tsp', "Critic only supported for TSP" baseline = CriticBaseline( (CriticNetworkLSTM(2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.tanh_clipping) if opts.baseline == 'critic_lstm' else CriticNetwork( 2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.normalization)).to(opts.device)) elif opts.baseline == 'rollout': baseline = RolloutBaseline(model, problem, opts) elif opts.baseline == 'critic_lp': assert problem.NAME == 'lp' dim_vocab = {2: 2, 3: 5, 4: 15, 5: 52, 6: 203, 7: 877, 8: 4140} baseline = CriticBaseline( (CriticNetworkLP(dim_vocab[opts.awe_steps], opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.normalization)).to(opts.device)) else: assert opts.baseline is None, "Unknown baseline: {}".format( opts.baseline) baseline = NoBaseline() if opts.bl_warmup_epochs > 0: baseline = WarmupBaseline(baseline, opts.bl_warmup_epochs, warmup_exp_beta=opts.exp_beta) # Load baseline from data, make sure script is called with same type of baseline if 'baseline' in load_data: baseline.load_state_dict(load_data['baseline']) # Initialize optimizer optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': opts.lr_model }] + ([{ 'params': baseline.get_learnable_parameters(), 'lr': opts.lr_critic }] if len(baseline.get_learnable_parameters()) > 0 else [])) # Load optimizer state if 'optimizer' in load_data: optimizer.load_state_dict(load_data['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): # if isinstance(v, torch.Tensor): if torch.is_tensor(v): state[k] = v.to(opts.device) # Initialize learning rate scheduler, decay by lr_decay once per epoch! lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) # Start the actual training loop val_dataset = problem.make_dataset(num_samples=opts.val_size, filename=opts.val_dataset, distribution=opts.data_distribution, size=opts.graph_size, degree=opts.degree, steps=opts.awe_steps, awe_samples=opts.awe_samples) if opts.resume: epoch_resume = int( os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1]) torch.set_rng_state(load_data['rng_state']) if opts.use_cuda: torch.cuda.set_rng_state_all(load_data['cuda_rng_state']) # Set the random states # Dumping of state was done before epoch callback, so do that now (model is loaded) baseline.epoch_callback(model, epoch_resume) print("Resuming after {}".format(epoch_resume)) opts.epoch_start = epoch_resume + 1 if opts.eval_only: validate(model, val_dataset, opts) else: extra = {'updates': 0, 'avg_reward': 10**8, "best_epoch": -1} start = time.time() for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs): train_epoch(model, optimizer, baseline, lr_scheduler, epoch, val_dataset, problem, tb_logger, opts, extra) finish = time.time() with open("experiments.log", "a+") as f: f.write("{} {:.4f} {} {:.2f}\n".format( '-'.join(opts.train_dataset.split('/')[-2:]), extra["avg_reward"], extra["best_epoch"], finish - start)) print("Took {:.2f} sec for {} epochs".format(finish - start, opts.n_epochs))
def _run_rl(opts): # Pretty print the run args pp.pprint(vars(opts)) # Set the random seed torch.manual_seed(opts.seed) # Optionally configure tensorboard tb_logger = None if not opts.no_tensorboard: tb_logger = TbLogger( os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name)) os.makedirs(opts.save_dir) # Save arguments so exact configuration can always be found with open(os.path.join(opts.save_dir, "args.json"), 'w') as f: json.dump(vars(opts), f, indent=True) # Set the device opts.device = torch.device("cuda:0" if opts.use_cuda else "cpu") # Figure out what's the problem problem = load_problem(opts.problem) # Load data from load_path load_data = {} assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given" load_path = opts.load_path if opts.load_path is not None else opts.resume if load_path is not None: print(' [*] Loading data from {}'.format(load_path)) load_data = torch_load_cpu(load_path) # Initialize model model_class = { 'attention': AttentionModel, 'pointer': PointerNetwork }.get(opts.model, None) assert model_class is not None, "Unknown model: {}".format(model_class) encoder_class = { 'gat': GraphAttentionEncoder, 'gcn': GCNEncoder, 'mlp': MLPEncoder }.get(opts.encoder, None) assert encoder_class is not None, "Unknown encoder: {}".format( encoder_class) model = model_class(opts.embedding_dim, opts.hidden_dim, problem, encoder_class, n_encode_layers=opts.n_encode_layers, mask_inner=True, mask_logits=True, normalization=opts.normalization, tanh_clipping=opts.tanh_clipping, checkpoint_encoder=opts.checkpoint_encoder, shrink_size=opts.shrink_size).to(opts.device) if opts.use_cuda and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # Compute number of network parameters print(model) nb_param = 0 for param in model.parameters(): nb_param += np.prod(list(param.data.size())) print('Number of parameters: ', nb_param) # Overwrite model parameters by parameters to load model_ = get_inner_model(model) model_.load_state_dict({ **model_.state_dict(), **load_data.get('model', {}) }) # Initialize baseline if opts.baseline == 'exponential': baseline = ExponentialBaseline(opts.exp_beta) elif opts.baseline == 'critic' or opts.baseline == 'critic_lstm': assert problem.NAME == 'tsp', "Critic only supported for TSP" baseline = CriticBaseline( (CriticNetworkLSTM(2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.tanh_clipping) if opts.baseline == 'critic_lstm' else CriticNetwork( encoder_class, 2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.normalization)).to(opts.device)) elif opts.baseline == 'rollout': baseline = RolloutBaseline(model, problem, opts) else: assert opts.baseline is None, "Unknown baseline: {}".format( opts.baseline) baseline = NoBaseline() if opts.bl_warmup_epochs > 0: baseline = WarmupBaseline(baseline, opts.bl_warmup_epochs, warmup_exp_beta=opts.exp_beta) # Load baseline from data, make sure script is called with same type of baseline if 'baseline' in load_data: baseline.load_state_dict(load_data['baseline']) # Initialize optimizer optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': opts.lr_model }] + ([{ 'params': baseline.get_learnable_parameters(), 'lr': opts.lr_critic }] if len(baseline.get_learnable_parameters()) > 0 else [])) # Load optimizer state if 'optimizer' in load_data: optimizer.load_state_dict(load_data['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): # if isinstance(v, torch.Tensor): if torch.is_tensor(v): state[k] = v.to(opts.device) # Initialize learning rate scheduler, decay by lr_decay once per epoch! lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) # Start the actual training loop val_dataset = problem.make_dataset(size=opts.graph_size, num_samples=opts.val_size, filename=opts.val_dataset) opts.val_size = val_dataset.size if opts.resume: epoch_resume = int( os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1]) torch.set_rng_state(load_data['rng_state']) if opts.use_cuda: torch.cuda.set_rng_state_all(load_data['cuda_rng_state']) # Set the random states # Dumping of state was done before epoch callback, so do that now (model is loaded) baseline.epoch_callback(model, epoch_resume) print("Resuming after {}".format(epoch_resume)) opts.epoch_start = epoch_resume + 1 if opts.eval_only: validate(model, val_dataset, opts) else: for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs): train_epoch(model, optimizer, baseline, lr_scheduler, epoch, val_dataset, problem, tb_logger, opts)
def run(opts): rank = opts.local_rank if torch.cuda.device_count() > 1 else 0 # Set the random seed torch.manual_seed(opts.seed + rank) random.seed(opts.seed + rank) np.random.seed(opts.seed + rank) if not os.path.exists(opts.save_dir) and rank == 0: os.makedirs(opts.save_dir) # Optionally configure wandb if not opts.no_wandb and rank == 0: wandb.login('never', '31ce01e4120061694da54a54ab0dafbee1262420') wandb.init(dir=opts.save_dir, config=opts, project='large_scale_tsp', name=opts.run_name, sync_tensorboard=True, save_code=True) # Set the device if opts.use_cuda: torch.cuda.set_device(rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') opts.device = torch.device("cuda", rank) else: opts.device = torch.device("cpu") # Figure out what's the problem problem = load_problem(opts.problem) # Load data from load_path load_data = {} assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given" load_path = opts.load_path if opts.load_path is not None else opts.resume if load_path is not None: if rank == 0: print(' [*] Loading data from {}'.format(load_path)) load_data = torch_load_cpu(load_path) # Initialize model model_class = { 'attention': AttentionModel, 'pointer': PointerNetwork }.get(opts.model, None) assert model_class is not None, "Unknown model: {}".format(model_class) model: torch.nn.Module = model_class( opts.embedding_dim, opts.hidden_dim, problem, attention_type=opts.attention_type, n_encode_layers=opts.n_encode_layers, n_heads=opts.n_heads, feed_forward_dim=opts.feed_forward_dim, encoding_knn_size=opts.encoding_knn_size, decoding_knn_size=opts.decoding_knn_size, mask_inner=True, mask_logits=True, normalization=opts.normalization, tanh_clipping=opts.tanh_clipping, checkpoint_encoder=opts.checkpoint_encoder, shrink_size=opts.shrink_size).to(opts.device) if opts.init_normalization_parameters: for m in model.modules(): if isinstance(m, Normalization): m.init_parameters() if opts.use_cuda: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to( opts.device) model = DDP(model, device_ids=[rank]) # Overwrite model parameters by parameters to load model_ = get_inner_model(model) model_.load_state_dict({ **model_.state_dict(), **load_data.get('model', {}) }) # Initialize baseline if opts.baseline == 'exponential': baseline = ExponentialBaseline(opts.exp_beta) elif opts.baseline == 'critic' or opts.baseline == 'critic_lstm': assert problem.NAME == 'tsp', "Critic only supported for TSP" baseline = CriticBaseline( (CriticNetworkLSTM(2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.tanh_clipping) if opts.baseline == 'critic_lstm' else CriticNetwork( 2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.normalization)).to(opts.device)) elif opts.baseline == 'rollout': baseline = RolloutBaseline(model, problem, opts) else: assert opts.baseline is None, "Unknown baseline: {}".format( opts.baseline) baseline = NoBaseline() if opts.bl_warmup_epochs > 0: baseline = WarmupBaseline(baseline, opts.bl_warmup_epochs, warmup_exp_beta=opts.exp_beta) # Load baseline from data, make sure script is called with same type of baseline if 'baseline' in load_data: baseline.load_state_dict(load_data['baseline']) # Initialize optimizer optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': opts.lr_model }] + ([{ 'params': baseline.get_learnable_parameters(), 'lr': opts.lr_critic }] if len(baseline.get_learnable_parameters()) > 0 else [])) scaler = torch.cuda.amp.GradScaler() if opts.precision == 16 else None # Load optimizer state if 'optimizer' in load_data: optimizer.load_state_dict(load_data['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): # if isinstance(v, torch.Tensor): if torch.is_tensor(v): state[k] = v.to(opts.device) # Initialize learning rate scheduler, decay by lr_decay once per epoch! lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) # Start the actual training loop val_dataset = problem.make_dataset(size=opts.graph_size, num_samples=opts.val_size, filename=opts.val_dataset, distribution=opts.data_distribution) if opts.resume: epoch_resume = int( os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1]) torch.set_rng_state(load_data['rng_state']) if opts.use_cuda: torch.cuda.set_rng_state_all(load_data['cuda_rng_state']) # Set the random states # Dumping of state was done before epoch callback, so do that now (model is loaded) baseline.epoch_callback(model, epoch_resume) if rank == 0: print("Resuming after {}".format(epoch_resume)) opts.epoch_start = epoch_resume + 1 if opts.eval_only: validate(model, val_dataset, opts) else: for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs): train_epoch(model, optimizer, scaler, baseline, lr_scheduler, epoch, val_dataset, problem, opts)
def run(opts): # start time start_time = time() train_run = [] opts.save_hrs.sort() run_name = opts.run_name # Pretty print the run args pp.pprint(vars(opts)) # Set the random seed torch.manual_seed(opts.seed) # Optionally configure tensorboard tb_logger = None if not opts.no_tensorboard: tb_logger = TbLogger( os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name)) os.makedirs(opts.save_dir) # Save arguments so exact configuration can always be found with open(os.path.join(opts.save_dir, "args.json"), 'w') as f: json.dump(vars(opts), f, indent=True) # Set the device opts.device = torch.device("cuda:0" if opts.use_cuda else "cpu") # Figure out what's the problem problem = load_problem(opts.problem) # Load data from load_path load_data = {} assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given" load_path = opts.load_path if opts.load_path is not None else opts.resume if load_path is not None: print(' [*] Loading data from {}'.format(load_path)) load_data = torch_load_cpu(load_path) # Initialize model model_class = { 'attention': AttentionModel, 'pointer': PointerNetwork }.get(opts.model, None) assert model_class is not None, "Unknown model: {}".format(model_class) model = model_class(opts.embedding_dim, opts.hidden_dim, problem, n_encode_layers=opts.n_encode_layers, mask_inner=True, mask_logits=True, normalization=opts.normalization, tanh_clipping=opts.tanh_clipping, checkpoint_encoder=opts.checkpoint_encoder, shrink_size=opts.shrink_size).to(opts.device) if opts.use_cuda and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # Overwrite model parameters by parameters to load model_ = get_inner_model(model) model_.load_state_dict({ **model_.state_dict(), **load_data.get('model', {}) }) # Initialize baseline if opts.baseline == 'exponential': baseline = ExponentialBaseline(opts.exp_beta) elif opts.baseline == 'critic' or opts.baseline == 'critic_lstm': assert problem.NAME == 'tsp', "Critic only supported for TSP" baseline = CriticBaseline( (CriticNetworkLSTM(2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.tanh_clipping) if opts.baseline == 'critic_lstm' else CriticNetwork( 2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.normalization)).to(opts.device)) elif opts.baseline == 'rollout': baseline = RolloutBaseline(model, problem, opts) else: assert opts.baseline is None, "Unknown baseline: {}".format( opts.baseline) baseline = NoBaseline() if opts.bl_warmup_epochs > 0: baseline = WarmupBaseline(baseline, opts.bl_warmup_epochs, warmup_exp_beta=opts.exp_beta) # Load baseline from data, make sure script is called with same type of baseline if 'baseline' in load_data: baseline.load_state_dict(load_data['baseline']) # Initialize optimizer optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': opts.lr_model }] + ([{ 'params': baseline.get_learnable_parameters(), 'lr': opts.lr_critic }] if len(baseline.get_learnable_parameters()) > 0 else [])) # Load optimizer state if 'optimizer' in load_data: optimizer.load_state_dict(load_data['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): # if isinstance(v, torch.Tensor): if torch.is_tensor(v): state[k] = v.to(opts.device) # Initialize learning rate scheduler, decay by lr_decay once per epoch! lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) # Start the actual training loop val_dataset = problem.make_dataset(size=opts.graph_size, num_samples=opts.val_size, filename=opts.val_dataset, distribution=opts.data_distribution) if opts.resume: epoch_resume = int( os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1]) torch.set_rng_state(load_data['rng_state']) if opts.use_cuda: torch.cuda.set_rng_state_all(load_data['cuda_rng_state']) # Set the random states # Dumping of state was done before epoch callback, so do that now (model is loaded) baseline.epoch_callback(model, epoch_resume) print("Resuming after {}".format(epoch_resume)) opts.epoch_start = epoch_resume + 1 torch.save(model, os.path.join('.', 'empty.pt')) if opts.eval_only: validate(model, val_dataset, opts) else: for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs): avg_time = train_epoch(model, optimizer, baseline, lr_scheduler, epoch, val_dataset, problem, tb_logger, opts, start_time) train_run.append(avg_time) for hr in opts.save_hrs: if (time() - start_time) > hr * 3600: opts.save_hrs.remove(hr) print('Saving model and state...') hr_time = int(round((time() - start_time) / 3600)) with open( '../models/att/hist_{}_{}hr.pickle'.format( run_name, hr_time), 'wb') as handle: pickle.dump(train_run, handle, protocol=pickle.HIGHEST_PROTOCOL) torch.save( { 'model': get_inner_model(model).state_dict(), 'optimizer': optimizer.state_dict(), 'rng_state': torch.get_rng_state(), 'cuda_rng_state': torch.cuda.get_rng_state_all(), 'baseline': baseline.state_dict() }, os.path.join( '../models/att', '{}_{}hr-model-att-only.pt'.format( run_name, hr_time))) torch.save( model, os.path.join( '../models/att', '{}_{}hr-model.pt'.format(run_name, hr_time)))
def run(opts): # Pretty print the run args pp.pprint(vars(opts)) # Set the random seed torch.manual_seed(opts.seed) # Optionally configure tensorboard tb_logger = None if not opts.no_tensorboard and opts.no_dirpg: tb_logger = TbLogger( os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name)) if not opts.no_dirpg: task = Task.init(project_name='DirPG-TSP', task_name=opts.run_name) tb_logger = SummaryWriter( os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name)) tb_logger.add_text('Comment', opts.comment, 0) os.makedirs(opts.save_dir) # Save arguments so exact configuration can always be found with open(os.path.join(opts.save_dir, "args.json"), 'w') as f: json.dump(vars(opts), f, indent=True) # Set the device opts.device = torch.device("cuda:0" if opts.use_cuda else "cpu") # Figure out what's the problem problem = load_problem(opts.problem) # Load data from load_path load_data = {} assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given" load_path = opts.load_path if opts.load_path is not None else opts.resume if load_path is not None: print(' [*] Loading data from {}'.format(load_path)) load_data = torch_load_cpu(load_path) # Initialize model model_class = { 'attention': AttentionModel, 'pointer': PointerNetwork }.get(opts.model, None) assert model_class is not None, "Unknown model: {}".format(model_class) model = model_class(opts.embedding_dim, opts.hidden_dim, problem, n_encode_layers=opts.n_encode_layers, mask_inner=True, mask_logits=True, normalization=opts.normalization, tanh_clipping=opts.tanh_clipping, checkpoint_encoder=opts.checkpoint_encoder, shrink_size=opts.shrink_size).to(opts.device) if opts.use_cuda and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # Overwrite model parameters by parameters to load model_ = get_inner_model(model) model_.load_state_dict({ **model_.state_dict(), **load_data.get('model', {}) }) print(model_) # Initialize baseline if opts.baseline == 'exponential': baseline = ExponentialBaseline(opts.exp_beta) elif opts.baseline == 'critic' or opts.baseline == 'critic_lstm': assert problem.NAME == 'tsp', "Critic only supported for TSP" baseline = CriticBaseline( (CriticNetworkLSTM(2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.tanh_clipping) if opts.baseline == 'critic_lstm' else CriticNetwork( 2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.normalization)).to(opts.device)) elif opts.baseline == 'rollout': baseline = RolloutBaseline(model, problem, opts) print(" rollout" * 30) else: assert opts.baseline is None, "Unknown baseline: {}".format( opts.baseline) baseline = NoBaseline() if opts.bl_warmup_epochs > 0: print(opts.bl_warmup_epochs) baseline = WarmupBaseline(baseline, opts.bl_warmup_epochs, warmup_exp_beta=opts.exp_beta) print(" WarmupBaseline" * 30) # Load baseline from data, make sure script is called with same type of baseline if 'baseline' in load_data: baseline.load_state_dict(load_data['baseline']) # Initialize optimizer optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': opts.lr_model }] + ([{ 'params': baseline.get_learnable_parameters(), 'lr': opts.lr_critic }] if len(baseline.get_learnable_parameters()) > 0 else [])) # Load optimizer state if 'optimizer' in load_data: optimizer.load_state_dict(load_data['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): # if isinstance(v, torch.Tensor): if torch.is_tensor(v): state[k] = v.to(opts.device) # Initialize learning rate scheduler, decay by lr_decay once per epoch! lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) # Start the actual training loop val_dataset = problem.make_dataset(size=opts.graph_size, num_samples=opts.val_size, filename=opts.val_dataset, distribution=opts.data_distribution) if opts.resume: epoch_resume = int( os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1]) torch.set_rng_state(load_data['rng_state']) if opts.use_cuda: torch.cuda.set_rng_state_all(load_data['cuda_rng_state']) # Set the random states # Dumping of state was done before epoch callback, so do that now (model is loaded) baseline.epoch_callback(model, epoch_resume) print("Resuming after {}".format(epoch_resume)) opts.epoch_start = epoch_resume + 1 model = dirpg.DirPG(model, opts) if not opts.no_dirpg else model if opts.eval_only: validate(model, val_dataset, opts) else: interactions_count = opts.epoch_start * opts.epoch_size * opts.max_interactions epoch = opts.epoch_start while interactions_count < opts.total_interactions: # for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs): train_epoch( model, optimizer, baseline, lr_scheduler, epoch, interactions_count, val_dataset, problem, tb_logger, opts, ) print("interactions_count model so far ", interactions_count) n_interactions = get_inner_model(model).get_and_reset_interactions(opts.use_cuda, opts.no_dirpg)\ if opts.no_dirpg else model.model.get_and_reset_interactions(opts.use_cuda, opts.no_dirpg) interactions_count += n_interactions print("interactions_count model new", n_interactions) interactions_count += get_inner_model(baseline.baseline.model).get_and_reset_interactions(opts.use_cuda, opts.no_dirpg)\ if baseline.__class__.__name__ != "NoBaseline" else 0 print("interactions_count baseline ", interactions_count) print("interactions_count: {} out of {} ".format( interactions_count, opts.total_interactions)) epoch += 1
def run(opts): # Pretty print the run args pprint.pprint(vars(opts)) # Set the random seed torch.manual_seed(opts.seed) # Optionally configure tensorboard tb_logger = None if not opts.no_tensorboard: tb_logger = TbLogger( os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name)) if not os.path.exists(opts.save_dir): os.makedirs(opts.save_dir) # Save arguments so exact configuration can always be found with open(os.path.join(opts.save_dir, "args.json"), 'w') as f: json.dump(vars(opts), f, indent=True) # Set the device opts.device = torch.device("cuda" if opts.use_cuda else "cpu") # Figure out what's the problem problem = load_problem(opts.problem)(p_size=opts.graph_size, with_assert=not opts.no_assert) # Load data from load_path load_data = {} assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given" load_path = opts.load_path if opts.load_path is not None else opts.resume if load_path is not None: print(' [*] Loading data from {}'.format(load_path)) load_data = torch_load_cpu(load_path) # Initialize model model_class = { 'attention': AttentionModel, }.get(opts.model, None) assert model_class is not None, "Unknown model: {}".format(model_class) model = model_class(problem=problem, embedding_dim=opts.embedding_dim, hidden_dim=opts.hidden_dim, n_heads=opts.n_heads_encoder, n_layers=opts.n_encode_layers, normalization=opts.normalization, device=opts.device).to(opts.device) if opts.use_cuda and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # Overwrite model parameters by parameters to load model_ = get_inner_model(model) model_.load_state_dict({ **model_.state_dict(), **load_data.get('model', {}) }) # Load the validation datasets val_dataset = problem.make_dataset(size=opts.graph_size, num_samples=opts.val_size, filename=opts.val_dataset) # Do validation only if opts.eval_only: validate(problem, model, val_dataset, tb_logger, opts, _id=0) else: # Initialize baseline baseline = CriticBaseline( CriticNetwork(problem=problem, embedding_dim=opts.embedding_dim, hidden_dim=opts.hidden_dim, n_heads=opts.n_heads_decoder, n_layers=opts.n_encode_layers, normalization=opts.normalization, device=opts.device).to(opts.device)) # Load baseline from data, make sure script is called with same type of baseline if 'baseline' in load_data: baseline.load_state_dict(load_data['baseline']) # Initialize optimizer optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': opts.lr_model }] + ([{ 'params': baseline.get_learnable_parameters(), 'lr': opts.lr_critic }] if len(baseline.get_learnable_parameters()) > 0 else [])) # Load optimizer state if 'optimizer' in load_data: optimizer.load_state_dict(load_data['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): # if isinstance(v, torch.Tensor): if torch.is_tensor(v): state[k] = v.to(opts.device) # Initialize learning rate scheduler, decay by lr_decay once per epoch! lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) if opts.resume: epoch_resume = int( os.path.splitext(os.path.split( opts.resume)[-1])[0].split("-")[1]) torch.set_rng_state(load_data['rng_state']) if opts.use_cuda: torch.cuda.set_rng_state_all(load_data['cuda_rng_state']) # Set the random states # Dumping of state was done before epoch callback, so do that now (model is loaded) print("Resuming after {}".format(epoch_resume)) opts.epoch_start = epoch_resume + 1 # Start the actual training loop for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs): train_epoch(problem, model, optimizer, baseline, lr_scheduler, epoch, val_dataset, tb_logger, opts)
def run(opts): # Pretty print the run args pp.pprint(vars(opts)) # Set the random seed torch.manual_seed(opts.seed) # Optionally configure tensorboard tb_logger = None if not opts.no_tensorboard: tb_logger = TbLogger( os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name)) os.makedirs(opts.save_dir) # Save arguments so exact configuration can always be found with open(os.path.join(opts.save_dir, "args.json"), 'w') as f: json.dump(vars(opts), f, indent=True) # Set the device opts.device = torch.device("cuda:0" if opts.use_cuda else "cpu") # Figure out what's the problem problem = load_problem(opts.problem) # Load data from load_path load_data = {} assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given" load_path = opts.load_path if opts.load_path is not None else opts.resume if load_path is not None: print(' [*] Loading data from {}'.format(load_path)) load_data = torch_load_cpu(load_path) # Initialize model model = AttentionModel(opts.embedding_dim, opts.hidden_dim, problem, n_encode_layers=opts.n_encode_layers, mask_inner=True, mask_logits=True, normalization=opts.normalization, tanh_clipping=opts.tanh_clipping, checkpoint_encoder=opts.checkpoint_encoder, shrink_size=opts.shrink_size).to(opts.device) if opts.use_cuda and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # Overwrite model parameters by parameters to load model_ = get_inner_model(model) model_.load_state_dict({ **model_.state_dict(), **load_data.get('model', {}) }) # Initialize baseline if opts.baseline == 'critic': baseline = CriticBaseline( (CriticNetwork(2, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers, opts.normalization)).to(opts.device)) elif opts.baseline == 'rollout': baseline = RolloutBaseline(model, problem, opts) elif opts.baseline == 'oracle': baseline = OracleBaseline() else: assert opts.baseline is None, "Unknown baseline: {}".format( opts.baseline) baseline = NoBaseline() # Load baseline from data, make sure script is called with same type of baseline if 'baseline' in load_data: baseline.load_state_dict(load_data['baseline']) # Initialize optimizer optimizer = optim.Adam([{ 'params': model.parameters(), 'lr': opts.lr_model }] + ([{ 'params': baseline.get_learnable_parameters(), 'lr': opts.lr_critic }] if len(baseline.get_learnable_parameters()) > 0 else [])) # Load optimizer state if 'optimizer' in load_data: optimizer.load_state_dict(load_data['optimizer']) for state in optimizer.state.values(): for k, v in state.items(): # if isinstance(v, torch.Tensor): if torch.is_tensor(v): state[k] = v.to(opts.device) # Initialize learning rate scheduler, decay by lr_decay once per epoch! lr_scheduler = optim.lr_scheduler.LambdaLR( optimizer, lambda epoch: opts.lr_decay**epoch) # Start the actual training loop val_dataset = problem.make_dataset(size=opts.graph_size, num_samples=opts.val_size, filename=opts.val_dataset, distribution=opts.data_distribution) val_dataset_tensor = torch.stack(val_dataset.data) dist = (val_dataset_tensor.transpose(1, 2).repeat_interleave( opts.graph_size, 2).transpose(1, 2).float() - val_dataset_tensor.repeat(1, opts.graph_size, 1).float()).norm( p=2, dim=2).view(opts.val_size, opts.graph_size, opts.graph_size) DP_val_solution = [held_karp(dist[i])[0] for i in range(opts.val_size)] DP_val_solution = torch.tensor(DP_val_solution) DP_val_solution = DP_val_solution.mean() problem.DP_cost = DP_val_solution print('problem_DPCost = ', DP_val_solution) if opts.resume: epoch_resume = int( os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1]) torch.set_rng_state(load_data['rng_state']) if opts.use_cuda: torch.cuda.set_rng_state_all(load_data['cuda_rng_state']) # Set the random states # Dumping of state was done before epoch callback, so do that now (model is loaded) baseline.epoch_callback(model, epoch_resume) print("Resuming after {}".format(epoch_resume)) opts.epoch_start = epoch_resume + 1 if opts.eval_only: validate(model, val_dataset, opts) else: for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs): train_epoch(model, optimizer, baseline, lr_scheduler, epoch, val_dataset, problem, tb_logger, opts)