def setup_train(self, model_file_path=None): self.model = Model(model_file_path) params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \ list(self.model.reduce_state.parameters()) initial_lr = config.lr_coverage if config.is_coverage else config.lr self.optimizer = Adagrad( params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc) start_iter, start_loss = 0, 0 if model_file_path is not None: state = torch.load(model_file_path, map_location=lambda storage, location: storage) start_iter = state['iter'] start_loss = state['current_loss'] if not config.is_coverage: self.optimizer.load_state_dict(state['optimizer']) if use_cuda: for state in self.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() return start_iter, start_loss
def post_setup(self): self.optimizer = Adagrad(self.model.parameters(), lr=0.001, weight_decay=0.0005) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, factor=0.5, patience=20, threshold=0.0001)
def post_setup(self, optimizer=None): if optimizer is None: print("None opt") self.optimizer = Adagrad(self.model.parameters(), lr=LEARNING_RATE, weight_decay=0.0005) else: print("Opt filled") self.optimizer = optimizer self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, factor=0.5, patience=20, threshold=0.0001)
def create_model(args, model=None): # Create MVCCN model based on the given architecture. if model is None: model = SVCNN(nclasses=args.num_classes, pretraining=args.pretrained, cnn_name=args.arch, feature_extraction=args.feature_extraction) else: model = MVCNN(model, num_views=args.nview) # Multi GPUs if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # Send model to GPU or keep it to the CPU model = model.to(device=args.device) if args.optimizer == "ADAM": optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), args.learning_rate, weight_decay=args.weight_decay) elif args.optimizer == "ADAGRAD": optimizer = Adagrad(filter(lambda p: p.requires_grad, model.parameters()), args.learning_rate, weight_decay=args.weight_decay) else: # If we use feature extraction (features weights are frozen), we need to keep only differentiable params optimizer = SGD(filter(lambda p: p.requires_grad, model.parameters()), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) return model, optimizer
def get_existing_model(model_name, start_episode_counter=0, fetch_losses=False): model_state_dict = torch.load(PATH, map_location='cpu') auto_encoder_model = AutoEncoderModel() auto_encoder_model = auto_encoder_model.float() auto_encoder_model.load_state_dict(model_state_dict['model']) auto_encoder_model = auto_encoder_model.to(DEVICE) optimizer = Adagrad(auto_encoder_model.parameters(), lr=LEARNING_RATE, weight_decay=0.0005) optimizer.load_state_dict(model_state_dict['optimizer']) losses = None if fetch_losses: losses = np.load("{}-{}-{}.npy".format(LOSSES_FILE_PATH, model_name, str(start_episode_counter))) return auto_encoder_model, optimizer, losses
def set_parameters(self, params): self.params = list(params) # careful: params may be a generator if self.method == 'sgd': self.optimizer = SGD(self.params, lr=self.lr) elif self.method == 'adagrad': self.optimizer = Adagrad(self.params, lr=self.lr) elif self.method == 'adadelta': self.optimizer = Adadelta(self.params, lr=self.lr) elif self.method == 'adam': self.optimizer = Adam(self.params, lr=self.lr) else: raise RuntimeError("Invalid optim method: " + self.method)
def test_train_lcwa(self) -> None: """Test that LCWA training does not fail.""" loop = LCWATrainingLoop( model=self.model, optimizer=Adagrad(params=self.model.get_grad_params(), lr=0.001), ) losses = self._safe_train_loop( loop, num_epochs=self.train_num_epochs, batch_size=self.train_batch_size, sampler='default', ) self.assertIsInstance(losses, list)
def get_optimizer(model: nn.Module, optim: str, lr: float) -> Optimizer: """ Return the optimizer that corresponds to string optim. Add the parameters from model and set learning rate to lr :param model: model to get the parameters from :param optim: name of the optimizer :param lr: learning rate to use in the optimizer :return: """ if optim == "adagrad": return Adagrad(model.parameters(), lr=lr) elif optim == "sgd": return SGD(model.parameters(), lr=lr) elif optim == "rmsprop": return RMSprop(model.parameters(), lr=lr) elif optim == "adam": return Adam(model.parameters(), lr=lr) else: raise ValueError("Invalid optimizer")
def get_optimizer(params, settings): lrs = False if settings['optimizer'] == 'SGD': optimizer = torch.optim.SGD(params, lr=settings['lr'], momentum=settings['momentum'], weight_decay=settings['wd']) lrs = True elif settings['optimizer'] == 'Adagrad': optimizer = Adagrad(params, lr=settings['lr'], lr_decay=0, weight_decay=settings['wd'], initial_accumulator_value=0, eps=1e-10) elif settings['optimizer'] == 'Adadelta': optimizer = Adadelta(params, lr=1.0, rho=0.9, eps=1e-06, weight_decay=settings['wd']) elif settings['optimizer'] == 'Adam': optimizer = Adam(params, lr=settings['lr'], betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) lrs = True else: print('optimizer name invalid, using default SGD') optimizer = torch.optim.SGD(params, 0.005, momentum=0.9, weight_decay=0.0005) return optimizer, lrs
def main(): args = parser.parse_args() # REPRODUCIBILITY torch.manual_seed(0) np.random.seed(0) if args.debug: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) # Retrieve views candidates and right number of views if args.case == '1': args.vcand = np.load('view_candidates/vcand_case1.npy') args.nview = 12 elif args.case == '2': args.vcand = np.load('view_candidates/vcand_case2.npy') args.nview = 20 elif args.case == '3': args.vcand = np.load('view_candidates/vcand_case3.npy') args.nview = 160 # Names for the saved checkpoints args.fname_best = 'rotationnet{}_model_best{}.pth.tar'.format(args.nview, datetime.now().strftime("%d_%b_%Y_%H_%M_%S")) args.fname = 'rotationnet{}_model{}.pth.tar'.format(args.nview, datetime.now().strftime("%d_%b_%Y_%H_%M_%S")) logger.debug("Number of view candidates: {}".format(np.shape(args.vcand)[0])) logger.debug("Number of views: {}".format(args.nview)) if torch.cuda.is_available(): args.device = torch.device('cuda') else: args.device = torch.device('cpu') logger.debug("PyTorch is using {}".format(args.device)) # Mini batch size is used to do an update of the gradient so it need to be divisible by the number of views # otherwise one or more classification are not complete if args.batch_size % args.nview != 0: logger.error('Batch size should be multiplication of the number of views, {}'.format(args.nview)) exit(1) # Get number of classes logger.debug("Number of classes: {}".format(args.num_classes)) # Create RotationNet model based on the given architecture. # The output size is (num_classes + wrong_view class) * the number of views model = RotationNet(args.arch, args.pretrained, (args.num_classes + 1) * args.nview, args.feature_extraction, args.depth) # Multi GPUs if torch.cuda.device_count() > 1: logger.debug("Using multiple GPUs") model = torch.nn.DataParallel(model) # Send model to GPU or keep it to the CPU model = model.to(device=args.device) # Define loss function (criterion) and optimizer # Sending loss to cuda is unnecessary because loss function is not stateful # TODO test if it works without sending loss to GPU criterion = nn.CrossEntropyLoss().to(device=args.device) if args.optimizer == "ADAM": optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), args.learning_rate, weight_decay=args.weight_decay) elif args.optimizer == "ADAGRAD": optimizer = Adagrad(filter(lambda p: p.requires_grad, model.parameters()), args.learning_rate, weight_decay=args.weight_decay) else: # If we use feature extraction (features weights are frozen), we need to keep only differentiable params optimizer = SGD(filter(lambda p: p.requires_grad, model.parameters()), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # https://stackoverflow.com/questions/58961768/set-torch-backends-cudnn-benchmark-true-or-not # some boost when the network do not change # useless because cluster do not have cudnn # cudnn.benchmark = True logger.info("Model args: {}".format(args)) if args.train_type == 'k-fold': logger.debug("K-fold training") train_k_fold(model, criterion, optimizer, args) elif args.train_type == 'hold-out': logger.debug("Hold-out training") train_hold_out(model, criterion, optimizer, args) elif args.train_type == 'full': logger.debug("Full training") train_all(model, criterion, optimizer, args) elif args.train_type == 'evaluate': logger.debug("Start evaluation on test set") test_model(model, criterion, args) elif args.train_type == 'aligned': logger.debug("Holt-out training on aligned set") train_hold_out_aligned(model, criterion,optimizer, args) elif args.train_type == "test": logger.debug("Start real time test") threshold_evaluation(model, args)
class Train(object): def __init__(self): self.vocab = Vocab(config.vocab_path, config.vocab_size) self.batcher = Batcher(config.train_data_path, self.vocab, mode='train', batch_size=config.batch_size, single_pass=False) time.sleep(15) train_dir = os.path.join(config.log_root, 'train_{}'.format(int(time.time()))) if not os.path.exists(train_dir): os.mkdir(train_dir) self.model_dir = os.path.join(train_dir, 'model') if not os.path.exists(self.model_dir): os.mkdir(self.model_dir) self.summary_writer = tf.summary.FileWriter(train_dir) def save_model(self, running_avg_loss, iters): state = { 'iter': iters, 'encoder_state_dict': self.model.encoder.state_dict(), 'decoder_state_dict': self.model.decoder.state_dict(), 'reduce_state_dict': self.model.reduce_state.state_dict(), 'optimizer': self.optimizer.state_dict(), 'current_loss': running_avg_loss } model_save_path = os.path.join( self.model_dir, 'model_{}_{}'.format(iters, int(time.time()))) torch.save(state, model_save_path) def setup_train(self, model_file_path=None): self.model = Model(model_file_path) params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \ list(self.model.reduce_state.parameters()) initial_lr = config.lr_coverage if config.is_coverage else config.lr self.optimizer = Adagrad( params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc) start_iter, start_loss = 0, 0 if model_file_path is not None: state = torch.load(model_file_path, map_location=lambda storage, location: storage) start_iter = state['iter'] start_loss = state['current_loss'] if not config.is_coverage: self.optimizer.load_state_dict(state['optimizer']) if use_cuda: for state in self.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() return start_iter, start_loss def train_one_batch(self, batch): enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_1, coverage = \ get_input_from_batch(batch) dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = \ get_output_from_batch(batch) self.optimizer.zero_grad() encoder_outputs, encoder_feature, encoder_hidden = self.model.encoder( enc_batch, enc_lens) s_t_1 = self.model.reduce_state(encoder_hidden) step_losses = [] for di in range(min(max_dec_len, config.max_dec_steps)): y_t_1 = dec_batch[:, di] # Teacher forcing final_dist, s_t_1, c_t_1, attn_dist, p_gen, next_coverage = self.model.decoder( y_t_1, s_t_1, encoder_outputs, encoder_feature, enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab, coverage, di) target = target_batch[:, di] gold_probs = torch.gather(final_dist, 1, target.unsqueeze(1)).squeeze() step_loss = -torch.log(gold_probs + config.eps) if config.is_coverage: step_coverage_loss = torch.sum(torch.min(attn_dist, coverage), 1) step_loss = step_loss + config.cov_loss_wt * step_coverage_loss coverage = next_coverage step_mask = dec_padding_mask[:, di] step_loss = step_loss * step_mask step_losses.append(step_loss) sum_losses = torch.sum(torch.stack(step_losses, 1), 1) batch_avg_loss = sum_losses / dec_lens_var loss = torch.mean(batch_avg_loss) loss.backward() self.norm = clip_grad_norm_(self.model.encoder.parameters(), config.max_grad_norm) clip_grad_norm_(self.model.decoder.parameters(), config.max_grad_norm) clip_grad_norm_(self.model.reduce_state.parameters(), config.max_grad_norm) self.optimizer.step() return loss.item() def trainIters(self, n_iters, model_file_path=None): iter, running_avg_loss = self.setup_train(model_file_path) start = time.time() while iter < n_iters: batch = self.batcher.next_batch() loss = self.train_one_batch(batch) running_avg_loss = calc_running_avg_loss(loss, running_avg_loss, self.summary_writer, iter) iter += 1 if iter % 100 == 0: self.summary_writer.flush() print_interval = 1000 if iter % print_interval == 0: print('steps %d, seconds for %d batch: %.2f , loss: %f' % (iter, print_interval, time.time() - start, loss)) start = time.time() if iter % 5000 == 0: self.save_model(running_avg_loss, iter)
class AutoEncoder: BATCH_SIZE = 32 def __init__(self, model): # if torch.cuda.device_count() > 1: # self.model.bak.bak.bak.bak = nn.DataParallel(model.bak.bak.bak.bak) # else: self.model = model # Default is the same model.bak.bak.bak.bak self.optimizer = None self.scheduler = None self.criterion = nn.MSELoss() self.losses = [] def post_setup(self): self.optimizer = Adagrad(self.model.parameters(), lr=0.001, weight_decay=0.0005) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, factor=0.5, patience=20, threshold=0.0001) def train_batches(self, episodes_count, files_list): episode_counter = 0 prev_loss = 0.0 if LOAD_ALL_MEMORY: dataset_loader = TrainingDataSampler(EPISODES_COUNT) dataset_loader.load_all_training_data(files_list) else: dataset_loader = TrainingFileManager(PARENT_DIR_LIST, EPISODES_COUNT) dataset_loader.start() while episode_counter < episodes_count: cuboids = dataset_loader.get_training_data() print("{} : Running episode {} and prev loss {}".format( datetime.datetime.now(), str(episode_counter), prev_loss)) cuboids = cuboids.to(DEVICE) output = self.model(cuboids) output = output.to(DEVICE) self.optimizer.zero_grad() # zero the gradient buffers loss = self.criterion(output, cuboids) loss.backward() self.optimizer.step() # Does the update self.losses.append(loss.item()) if episode_counter > 0 and episode_counter % SNAPSHOT_DURATION == 0: np.save("{}-{}".format(LOSSES_FILE_PATH, str(episode_counter)), np.array(self.losses)) torch.save( auto_encoder, "{}-{}".format(MODEL_FILE_PATH, str(episode_counter))) cv2.imwrite("test1.png", cuboids.cpu().detach().numpy()[0][0] * 255) cv2.imwrite("test2_1.png", output.cpu().detach().numpy()[0][0] * 255) print("Loss for episode {} is {}".format(episode_counter, loss)) prev_loss = loss.item() episode_counter += 1 np.save("{}-{}".format(LOSSES_FILE_PATH, str(episode_counter)), np.array(self.losses)) torch.save(auto_encoder, "{}-{}".format(MODEL_FILE_PATH, str(episode_counter)))
def evaluate(individual): torch.cuda.empty_cache() decoded_chromosome = individual.decode_chromosome() try: model = ConvNet(decoded_chromosome[1:]) summary(model, input_size=(3, 64, 64), device="cpu") except ValueError as e: if str(e) == "Bad Network": return None, None transformations = { 'train': transforms.Compose([ transforms.RandomHorizontalFlip(), # transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), 'val': transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]), 'test': transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) } data_dir = "data" image_datasets = { x: datasets.ImageFolder(os.path.join(data_dir, x), transformations[x]) for x in ['train', 'val', 'test'] } dataloaders = { x: DataLoader(image_datasets[x], batch_size=32, shuffle=True) for x in ['train', 'val', 'test'] } dataset_sizes = { x: len(image_datasets[x]) for x in ['train', 'val', 'test'] } device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.to(device) optimizer_name = decoded_chromosome[0] optimizer = None if optimizer_name == "adam": optimizer = optim.Adam(model.parameters()) elif optimizer_name == "rmsprop": optimizer = RMSprop(model.parameters()) elif optimizer_name == "adagrad": optimizer = Adagrad(model.parameters()) elif optimizer_name == "adadelta": optimizer = Adadelta(model.parameters()) criterion = nn.CrossEntropyLoss() now = datetime.now() model_name = now.strftime("%d%m%Y%H%M%S") # hl.build_graph(model, torch.zeros([1, 3, 64, 64]).to(device)) return model_name, 1 / train_model(model_name, model, dataloaders, dataset_sizes, criterion, optimizer, num_epochs=10)
class AutoEncoder: def __init__(self, model_name, model, optimizer=None, device=None): # if torch.cuda.device_count() > 1: # self.model = nn.DataParallel(model) # else: self.model = model self.model_name = model_name self.optimizer = optimizer self.scheduler = None self.criterion = nn.MSELoss() self.losses = [] fh = logging.FileHandler('{}/{}.log'.format(LOGS_PATH, self.model_name)) fh.setLevel(logging.DEBUG) self.logger = logging.getLogger(self.model_name) self.logger.addHandler(fh) self.device = DEVICE if not device else device def post_setup(self, optimizer=None): if optimizer is None: print("None opt") self.optimizer = Adagrad(self.model.parameters(), lr=LEARNING_RATE, weight_decay=0.0005) else: print("Opt filled") self.optimizer = optimizer self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, factor=0.5, patience=20, threshold=0.0001) def train_batches(self, episodes_count, training_data_sampler, start_episodes_counter=0): episode_counter = start_episodes_counter prev_loss = 0.0 #self.logger.warning("Available keys - %s", training_data_sampler.available_keys) #self.logger.warning("Probability map - %s", training_data_sampler.prob_map) training_data_sampler.start() while episode_counter < episodes_count: cuboids = training_data_sampler.get_training_data() self.logger.warning( "{} : Running episode {} and prev loss {}".format( datetime.datetime.now(), str(episode_counter), prev_loss)) cuboids = cuboids.to(self.device) output = self.model(cuboids) output = output.to(self.device) self.optimizer.zero_grad() # zero the gradient buffers loss = self.criterion(output, cuboids) loss.backward() self.optimizer.step() # Does the update self.losses.append(loss.item()) if episode_counter > 0 and episode_counter % SNAPSHOT_DURATION == 0: np.save( "{}-{}-{}".format(LOSSES_FILE_PATH, self.model_name, str(episode_counter)), np.array(self.losses)) torch.save( { 'optimizer': self.optimizer.state_dict(), 'model': self.model.state_dict() }, "{}-{}-state-{}".format(MODEL_FILE_PATH, self.model_name, episode_counter)) cv2.imwrite( "{}/{}-{}-test1.png".format(LOGS_PATH, self.model_name, episode_counter), cuboids.cpu().detach().numpy()[0][0] * 255) cv2.imwrite( "{}/{}-{}-test2_1.png".format(LOGS_PATH, self.model_name, episode_counter), output.cpu().detach().numpy()[0][0] * 255) self.logger.warning("Loss for episode {} is {}".format( episode_counter, loss)) prev_loss = loss.item() episode_counter += 1 np.save( "{}-{}-{}".format(LOSSES_FILE_PATH, self.model_name, str(episode_counter)), np.array(self.losses)) torch.save( { 'optimizer': self.optimizer.state_dict(), 'model': self.model.state_dict() }, "{}-{}-state-{}".format(MODEL_FILE_PATH, self.model_name, episode_counter))
def set_optimizer(model): optimizer = Adagrad(model.parameters(recurse=True), lr=conf.lr, initial_accumulator_value=conf.adagrad_init_acc) model.set_optimizer(optimizer) model.set_max_grad_norm(conf.max_grad_norm)