def loss_fn(self, out, annot): tar_vector = Losses.get_tar_vector(annot) loss_loc = Losses.get_loc_error(out, tar_vector) loss_wh = Losses.get_w_h_error(out, tar_vector) loss_conf = Losses.get_confidence_error(out, tar_vector) loss_cls = Losses.get_class_error(out, tar_vector) return loss_loc, loss_wh, loss_conf, loss_cls
def transfer(model, decoder, sess, args, vocab, data0, data1, out_path): batches, order0, order1 = get_batches(data0, data1, vocab.word2id, args.batch_size) data0_tsf, data1_tsf = [], [] losses = Losses(len(batches)) for batch in batches: ori, tsf = decoder.rewrite(batch) half = batch['size'] / 2 data0_tsf += tsf[:half] data1_tsf += tsf[half:] loss, loss_g, loss_d, loss_d0, loss_d1 = sess.run( [ model.loss, model.loss_g, model.loss_d, model.loss_d0, model.loss_d1 ], feed_dict=feed_dictionary(model, batch, args.rho, args.gamma_min)) losses.add(loss, loss_g, loss_d, loss_d0, loss_d1) n0, n1 = len(data0), len(data1) data0_tsf = reorder(order0, data0_tsf)[:n0] data1_tsf = reorder(order1, data1_tsf)[:n1] if out_path: write_sent(data0_tsf, out_path + '.0' + '.tsf') write_sent(data1_tsf, out_path + '.1' + '.tsf') return losses
def __init__(self, model, optimizer, train_loader, test_loader, args, epoch=-1, global_step=0, test_mode=False): if args.fp16: try: from apex import amp global amp amp = amp except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) self.model = model self.args = args self.optimizer = optimizer self.train_loader = train_loader self.test_loader = test_loader self.epoch = epoch self.module = model.module if hasattr( model, 'module') else model # for data parallel self.masking_policies = [ 'random', 'seen_noun', 'seen_verb', 'seen_combo_seen_noun_seen_verb', 'new_noun', 'new_verb', 'new_combo_seen_noun_seen_verb', 'new_combo_new_noun_new_verb', 'seen_combo_seen_noun_seen_verb_merge', 'new_combo_seen_noun_seen_verb_merge', 'new_combo_new_noun_new_verb_merge' ] if test_mode and not args.pointing: self.masker = TestMasker(annotation_root=args.annotation_root, masking_policy=args.test_masking_policy, tok=self.train_loader.dataset.tokenizer, p_mask_img=args.p_mask_img, p_mask_txt=args.p_mask_txt) else: self.masker = Masker(self.train_loader.dataset.tokenizer, **vars(args)) self.losses = Losses(self.module.cfg, args, **vars(args)) self.global_step = global_step
def transfer(model, sess, args, vocab, data0, data1, out_path): batches, order0, order1 = get_batches(data0, data1, vocab.word2id, args.batch_size) data0_tsf, data1_tsf = [], [] losses = Losses(len(batches)) for batch in batches: ori, tsf, loss, loss_g, loss_d, loss_d0, loss_d1 = rewrite( model, sess, args, vocab, batch) half = batch['size'] / 2 data0_tsf += tsf[:half] data1_tsf += tsf[half:] losses.add(loss, loss_g, loss_d, loss_d0, loss_d1) n0, n1 = len(data0), len(data1) data0_tsf = reorder(order0, data0_tsf)[:n0] data1_tsf = reorder(order1, data1_tsf)[:n1] if out_path: write_sent(data0_tsf, out_path + '.0' + '.tsf') write_sent(data1_tsf, out_path + '.1' + '.tsf') return losses
class Trainer: """ Class implementing the trainer for the project """ def __init__(self, model, optimizer, train_loader, test_loader, args, epoch=-1, global_step=0, test_mode=False): if args.fp16: try: from apex import amp global amp amp = amp except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) self.model = model self.args = args self.optimizer = optimizer self.train_loader = train_loader self.test_loader = test_loader self.epoch = epoch self.module = model.module if hasattr( model, 'module') else model # for data parallel self.masking_policies = [ 'random', 'seen_noun', 'seen_verb', 'seen_combo_seen_noun_seen_verb', 'new_noun', 'new_verb', 'new_combo_seen_noun_seen_verb', 'new_combo_new_noun_new_verb', 'seen_combo_seen_noun_seen_verb_merge', 'new_combo_seen_noun_seen_verb_merge', 'new_combo_new_noun_new_verb_merge' ] if test_mode and not args.pointing: self.masker = TestMasker(annotation_root=args.annotation_root, masking_policy=args.test_masking_policy, tok=self.train_loader.dataset.tokenizer, p_mask_img=args.p_mask_img, p_mask_txt=args.p_mask_txt) else: self.masker = Masker(self.train_loader.dataset.tokenizer, **vars(args)) self.losses = Losses(self.module.cfg, args, **vars(args)) self.global_step = global_step def train(self): best_eval = 0 try: for epoch in trange(self.epoch + 1, self.args.num_train_epochs, desc='Training model'): if self.args.local_rank != -1: self.train_loader.sampler.set_epoch(epoch) self.run_epoch(epoch) # Evaluate on validation set # The last one is the one that we take into account for the checkpoints val_score = self.run_epoch(epoch, train=False) # Remember best eval score and save checkpoint is_best = val_score > best_eval best_eval = max(val_score, best_eval) if self.args.local_rank <= 0 and not self.args.debug: print('Saving checkpoint') utils.save_checkpoint(self.model, self.optimizer, self.train_loader.dataset.tokenizer, is_best, epoch, self.args.checkpoint_dir, amp=amp, global_step=self.global_step, args=self.args) except KeyboardInterrupt: if self.args.local_rank <= 0: print(f'You decided to finish the training at epoch {epoch}') def run_epoch(self, epoch, train=True): """ During the training loop, we find the following arrays: - text_mask_locs: Tensor of size B x T, T being the maximum of all the B T's. Each element contains a boolean tensor that contains True if the token at that position MUST be masked. This will depend on the `target_token_ids` and whether or not the token at position belongs to the target sequence. This masking means that the specific token will be predicted (true?) in all the text losses (language model, pointing, episodic), but will not necessarily be substituted by a [MASK] token, as this is random and sometimes it stays the same or is substuted by a random word. - text_no_mask_locs: Tensor of size B x T, each element containing a boolean tensor that contains True if in that position the token CANNOT be masked. - img_no_mask_locs similarly. """ torch.cuda.synchronize() # Initialize meters avg_batch_time = utils.AverageMeter() avg_data_time = utils.AverageMeter() list_losses = ['total', 'lm', 'vm'] list_losses.extend(['pointing'] if self.args.pointing else []) list_losses.extend( ['input_pointing'] if self.args.input_pointing else []) average_meters = defaultdict(lambda: utils.AverageMeter()) if not train: avg_lm_top1 = utils.AverageMeter() avg_lm_top5 = utils.AverageMeter() avg_pointing_acc = utils.AverageMeter() avg_input_pointing_acc = utils.AverageMeter() # Switch to train mode if train: self.model.train() else: self.model.eval() end = time.time() with torch.set_grad_enabled(train), \ tqdm(self.train_loader if train else self.test_loader, desc=f'Training epoch {epoch}' if train else f'Validating {f"epoch {epoch}" if epoch else ""}', disable=self.args.local_rank > 0) as t: for batch_idx, data in enumerate(t): # Measure data loading time avg_data_time.update(time.time() - end) # -------------- Organize inputs ------------- # img_no_mask_locs = None text_no_mask_locs = None text_mask_locs = None with torch.no_grad(): if self.args.pointing: text_mask_locs, text_no_mask_locs = masker.gen_pointing_text_mask_locs( data) imgs, vm_labels, neg_vm_labels = self.masker.mask_imgs( data['imgs'].cuda(), no_mask_locs=img_no_mask_locs) # Note that this does not mask sep tokens text, lm_labels, input_pointing_labels = \ self.masker.mask_text(data['text'].cuda(), self.args.input_pointing, no_mask_locs=text_no_mask_locs, mask_locs=text_mask_locs, **data) img_bboxes = data['img_bboxes'].cuda() imgs_len = data['imgs_len'].cuda() text_len = data['text_len'].cuda() img_locs = txt_locs = None if self.args.pointing: attn_mask, img_locs, txt_locs = masker.attn_mask_pointing( imgs_len, text_len, data['seq_type'], data['num_seqs'].cuda(), self.args.attn_masking) # The input to the model is: # imgs = [[img0, img1, ..., imgN1, PAD, ..., PAD], [...], [[img0, img1, ..., imgNk, PAD, ..., PAD]]] # where the padding is such that all K in the batch have the same total lenght (minimal padding) # The N images include all the images from all the sequences, concatenated. Only padding at the end else: img_attn_mask = \ torch.arange(self.args.max_img_seq_len, device=imgs.device)[None, :] < imgs_len[:, None] text_attn_mask = \ torch.arange(self.args.max_txt_seq_len, device=imgs.device)[None, :] < text_len[:, None] attn_mask = torch.cat( (text_attn_mask[:, :1], img_attn_mask, text_attn_mask[:, 1:]), dim=1) # text starts with [IMG] token that gets moved to beginning of input in forward pass # -------------- Forward pass ---------------- # lm_preds, vm_preds, input_pointing_pred, hidden_states, *_ = \ self.model(imgs, text, img_bboxes, attention_mask=attn_mask, img_lens=imgs_len, txt_lens=text_len, img_locs=img_locs, txt_locs=txt_locs) # -------------- Compute losses -------------- # loss_values = {} if self.args.pointing: non_padding_text = (torch.arange( text.shape[1], device=text.device)[None, :] < text_len.cumsum(dim=1)[:, -1][:, None]) non_padding_imgs = (torch.arange( imgs.shape[1], device=imgs.device)[None, :] < imgs_len.cumsum(dim=1)[:, -1][:, None]) loss_values['lm'] = self.losses.lm_loss( lm_preds, lm_labels[non_padding_text]) loss_values['vm'] = self.losses.vm_loss( vm_preds, vm_labels[non_padding_imgs], neg_vm_labels[non_padding_imgs], embedder=self.module.embeddings.img_embeddings) else: loss_values['lm'] = self.losses.lm_loss( lm_preds, lm_labels) loss_values['vm'] = self.losses.vm_loss( vm_preds, vm_labels, neg_vm_labels, embedder=self.module.embeddings.img_embeddings) loss = self.args.lm_loss_lambda * loss_values[ 'lm'] + self.args.vm_loss_lambda * loss_values['vm'] if self.args.pointing: pointing_loss, (pointing_acc, pointing_cnt) = \ self.losses.pointing_loss(data, hidden_states, lm_labels, text, text_len, txt_locs) loss_values['pointing'] = pointing_loss loss += self.args.pointing_loss_lambda * loss_values[ 'pointing'] if self.args.input_pointing: input_pointing_loss, (input_pointing_acc, input_pointing_cnt), *_ = \ self.losses.input_pointing_pointing_loss( input_pointing_pred[0], input_pointing_pred[1], input_pointing_labels, txt_locs, lm_labels, data=data, log=True) loss_values['input_pointing'] = input_pointing_loss loss += self.args.input_pointing_loss_lambda * loss_values[ 'input_pointing'] if self.args.n_gpu > 1: loss = loss.mean() loss_values['total'] = loss # --------------- Update model -------------- # if train: if self.args.fp16: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: (loss / self.args.gradient_accumulation_steps).backward() if (batch_idx + 1) % self.args.gradient_accumulation_steps == 0: for loss_name in list_losses: # Record losses average_meters[loss_name].update( loss_values[loss_name].item() / self.args.gradient_accumulation_steps, imgs.size(0)) if train: if self.args.fp16: torch.nn.utils.clip_grad_norm_( amp.master_params(self.optimizer), self.args.max_grad_norm) else: torch.nn.utils.clip_grad_norm_( self.model.parameters(), self.args.max_grad_norm) self.optimizer.step() # scheduler.step() # no scheduler for now self.model.zero_grad() # Measure elapsed time avg_batch_time.update(time.time() - end) end = time.time() # ------------- Show information ------------ # postfix_kwargs = {} if not train: if self.args.pointing: lm_labels = lm_labels[non_padding_text] avg_pointing_acc.update(pointing_acc, pointing_cnt) postfix_kwargs['PointingAcc'] = avg_pointing_acc.avg if self.args.input_pointing: avg_input_pointing_acc.update( input_pointing_acc, input_pointing_cnt) postfix_kwargs[ 'input_pointingAcc'] = avg_input_pointing_acc.avg results = tests.accuracy(lm_preds, lm_labels, topk=(1, 5)) avg_lm_top1.update(*results['top1']) avg_lm_top5.update(*results['top5']) postfix_kwargs['LMTop1'] = avg_lm_top1.avg postfix_kwargs['LMTop5'] = avg_lm_top5.avg for loss_name in list_losses: postfix_kwargs[loss_name] = average_meters[loss_name].avg t.set_postfix(DataTime=avg_data_time.avg, BatchTime=avg_batch_time.avg, **postfix_kwargs) if train: if self.global_step % self.args.print_freq == 0 and self.args.writer and not self.args.debug: self.args.writer.add_scalars( 'train/loss', {**postfix_kwargs}, self.global_step * self.args.train_batch_size * self.args.step_n_gpus) self.global_step += 1 if not train: cnt = average_meters['total'].count if epoch is not None: loss_scalars = {} for loss_name in list_losses: loss_scalars[loss_name] = utils.gather_score( average_meters[loss_name].avg, cnt) acc_scalars = { 'lm_top1': utils.gather_score(avg_lm_top1.avg, cnt), 'lm_top5': utils.gather_score(avg_lm_top5.avg, cnt) } if self.args.pointing: acc_scalars['pointing_acc'] = utils.gather_score( avg_pointing_acc.avg, cnt) if self.args.input_pointing: acc_scalars['input_pointing_acc'] = utils.gather_score( avg_input_pointing_acc.avg, cnt) if self.args.writer and not self.args.debug: self.args.writer.add_scalars('val/loss', loss_scalars, epoch) self.args.writer.add_scalars('val/acc', acc_scalars, epoch) return utils.gather_score(avg_lm_top5.avg, cnt) def test(self, masking_policy=None): torch.cuda.synchronize() if masking_policy == 'all_acc_tests': for p in self.masking_policies: self.test(p) else: tests.test_accuracy(self, masking_policy)
with tf.Session(config=config) as sess: model = create_model(sess, args, vocab) if args.beam > 1: decoder = beam_search.Decoder(sess, args, vocab, model) else: decoder = greedy_decoding.Decoder(sess, args, vocab, model) if args.train: batches, _, _ = get_batches(train0, train1, vocab.word2id, args.batch_size) random.shuffle(batches) start_time = time.time() step = 0 losses = Losses(args.steps_per_checkpoint) best_dev = float('inf') learning_rate = args.learning_rate rho = args.rho gamma = args.gamma_init dropout = args.dropout_keep_prob for epoch in range(1, 1 + args.max_epochs): print '--------------------epoch %d--------------------' % epoch print 'learning_rate:', learning_rate, ' gamma:', gamma for batch in batches: feed_dict = feed_dictionary(model, batch, rho, gamma, dropout, learning_rate) loss_d0, _ = sess.run([model.loss_d0, model.optimizer_d0],
some_target_for_loc_error = { (3, 3): [[0, [1, 1], [torch.Tensor([0.1]).cuda(), torch.Tensor([0.1]).cuda()]]] } some_predictions_for_loc_error = torch.zeros((1, cell_num, cell_num, 30)) for y in range(cell_num): for x in range(cell_num): some_predictions_for_loc_error[0, y, x, :] = torch.Tensor([ 0.2, 0.2, 0, 0, 0, 0.2, 0.2, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]) unittest.TestCase().assertAlmostEqual( 0.01, Losses.get_loc_error(some_predictions_for_loc_error, some_target_for_loc_error).cpu().detach().numpy()[0], 2) some_targets_class_prob_exist = { (3, 3): [[1, [0, 0], [torch.Tensor([0]).cuda(), torch.Tensor([0]).cuda()]]] } some_predictions_class_prob_exist = torch.zeros((1, cell_num, cell_num, 30)) for y in range(cell_num): for x in range(cell_num): if y == 3 and x == 3: some_predictions_class_prob_exist[0, y, x, :] = torch.Tensor([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.2, 0.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) else:
def train(**args): """ Evaluate selected model Args: rerun (Int): Integer indicating number of repetitions for the select experiment seed (Int): Integer indicating set seed for random state save_dir (String): Top level directory to generate results folder model (String): Name of selected model dataset (String): Name of selected dataset exp (String): Name of experiment debug (Int): Debug state to avoid saving variables load_type (String): Keyword indicator to evaluate the testing or validation set pretrained (Int/String): Int/String indicating loading of random, pretrained or saved weights opt (String): Int/String indicating loading of random, pretrained or saved weights lr (Float): Learning rate momentum (Float): Momentum in optimizer weight_decay (Float): Weight_decay value final_shape ([Int, Int]): Shape of data when passed into network Return: None """ print( "\n############################################################################\n" ) print("Experimental Setup: ", args) print( "\n############################################################################\n" ) for total_iteration in range(args['rerun']): # Generate Results Directory d = datetime.datetime.today() date = d.strftime('%Y%m%d-%H%M%S') result_dir = os.path.join( args['save_dir'], args['model'], '_'.join( (args['dataset'], args['exp'], date))) log_dir = os.path.join(result_dir, 'logs') save_dir = os.path.join(result_dir, 'checkpoints') if not args['debug']: os.makedirs(result_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(save_dir, exist_ok=True) # Save copy of config file with open(os.path.join(result_dir, 'config.yaml'), 'w') as outfile: yaml.dump(args, outfile, default_flow_style=False) # Tensorboard Element writer = SummaryWriter(log_dir) # Check if GPU is available (CUDA) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load Network model = create_model_object(**args).to(device) # Load Data loader = data_loader(model_obj=model, **args) if args['load_type'] == 'train': train_loader = loader['train'] valid_loader = loader[ 'train'] # Run accuracy on train data if only `train` selected elif args['load_type'] == 'train_val': train_loader = loader['train'] valid_loader = loader['valid'] else: sys.exit('Invalid environment selection for training, exiting') # END IF # Training Setup params = [p for p in model.parameters() if p.requires_grad] if args['opt'] == 'sgd': optimizer = optim.SGD(params, lr=args['lr'], momentum=args['momentum'], weight_decay=args['weight_decay']) elif args['opt'] == 'adam': optimizer = optim.Adam(params, lr=args['lr'], weight_decay=args['weight_decay']) else: sys.exit('Unsupported optimizer selected. Exiting') # END IF scheduler = MultiStepLR(optimizer, milestones=args['milestones'], gamma=args['gamma']) if isinstance(args['pretrained'], str): ckpt = load_checkpoint(args['pretrained']) model.load_state_dict(ckpt) start_epoch = load_checkpoint(args['pretrained'], key_name='epoch') + 1 optimizer.load_state_dict( load_checkpoint(args['pretrained'], key_name='optimizer')) for quick_looper in range(start_epoch): scheduler.step() # END FOR else: start_epoch = 0 # END IF model_loss = Losses(device=device, **args) acc_metric = Metrics(**args) best_val_acc = 0.0 ############################################################################################################################################################################ # Start: Training Loop for epoch in range(start_epoch, args['epoch']): running_loss = 0.0 print('Epoch: ', epoch) # Setup Model To Train model.train() # Start: Epoch for step, data in enumerate(train_loader): if step % args['pseudo_batch_loop'] == 0: loss = 0.0 optimizer.zero_grad() # END IF x_input = data['data'].to(device) annotations = data['annots'] assert args['final_shape'] == list(x_input.size( )[-2:]), "Input to model does not match final_shape argument" outputs = model(x_input) loss = model_loss.loss(outputs, annotations) loss = loss * args['batch_size'] loss.backward() running_loss += loss.item() if np.isnan(running_loss): import pdb pdb.set_trace() # END IF if not args['debug']: # Add Learning Rate Element for param_group in optimizer.param_groups: writer.add_scalar( args['dataset'] + '/' + args['model'] + '/learning_rate', param_group['lr'], epoch * len(train_loader) + step) # END FOR # Add Loss Element writer.add_scalar( args['dataset'] + '/' + args['model'] + '/minibatch_loss', loss.item() / args['batch_size'], epoch * len(train_loader) + step) # END IF if ((epoch * len(train_loader) + step + 1) % 100 == 0): print('Epoch: {}/{}, step: {}/{} | train loss: {:.4f}'. format( epoch, args['epoch'], step + 1, len(train_loader), running_loss / float(step + 1) / args['batch_size'])) # END IF if (epoch * len(train_loader) + (step + 1)) % args['pseudo_batch_loop'] == 0 and step > 0: # Apply large mini-batch normalization for param in model.parameters(): param.grad *= 1. / float( args['pseudo_batch_loop'] * args['batch_size']) optimizer.step() # END IF # END FOR: Epoch if not args['debug']: # Save Current Model save_path = os.path.join( save_dir, args['dataset'] + '_epoch' + str(epoch) + '.pkl') save_checkpoint(epoch, step, model, optimizer, save_path) # END IF: Debug scheduler.step(epoch=epoch) print('Schedulers lr: %f', scheduler.get_lr()[0]) ## START FOR: Validation Accuracy running_acc = [] running_acc = valid(valid_loader, running_acc, model, device, acc_metric) if not args['debug']: writer.add_scalar( args['dataset'] + '/' + args['model'] + '/validation_accuracy', 100. * running_acc[-1], epoch * len(valid_loader) + step) print('Accuracy of the network on the validation set: %f %%\n' % (100. * running_acc[-1])) # Save Best Validation Accuracy Model Separately if best_val_acc < running_acc[-1]: best_val_acc = running_acc[-1] if not args['debug']: # Save Current Model save_path = os.path.join( save_dir, args['dataset'] + '_best_model.pkl') save_checkpoint(epoch, step, model, optimizer, save_path) # END IF # END IF # END FOR: Training Loop ############################################################################################################################################################################ if not args['debug']: # Close Tensorboard Element writer.close()
def __init__( self, seed=None, optimizer=Adam, optimizer_kwargs={}, learning_rate_init=0.04, gamma=0.995, # learning rate decay factor considered_groups=list( range(12)), ## group layers to be considered from start sample_variance_threshold=0.002, weight_loss_sample_variance=0, # 10. evaluation_steps=250, # number of batches between loss tracking N_batches_test=1, # number of batches considered for evaluation ): super(ImageClassifier, self).__init__(considered_groups=considered_groups) if seed is not None: torch.manual_seed(seed) #''' resnet = models.resnet18(pretrained=False) self.conv = Sequential( *(list(resnet.children())[:-1]), Flatten(), ) ''' architecture used by Dielemann et al 2015 self.conv = Sequential( # Conv2dUntiedBias(41, 41, 3, 32, kernel_size=6), Conv2d(3,32, kernel_size=6), ReLU(), MaxPool2d(2), # Conv2dUntiedBias(16, 16, 32, 64, kernel_size=5), Conv2d(32, 64, kernel_size=5), ReLU(), MaxPool2d(2), # Conv2dUntiedBias(6, 6, 64, 128, kernel_size=3), Conv2d(64, 128, kernel_size=3), ReLU(), # Conv2dUntiedBias(4, 4, 128, 128, kernel_size=3), #weight_std=0.1), Conv2d(128, 128, kernel_size=3), ReLU(), MaxPool2d(2), Flatten(), ) #''' self.dense1 = MaxOut(8192, 2048, bias=0.01) self.dense2 = MaxOut(2048, 2048, bias=0.01) self.dense3 = Sequential( MaxOut(2048, 37, bias=0.1), # LeakyReLU(negative_slope=1e-7), ALReLU(negative_slope=1e-2), ) self.dropout = Dropout(p=0.5) self.augment = Compose([ Lambda(lambda img: torch.cat([img, hflip(img)], 0)), Lambda(lambda img: torch.cat([img, rotate(img, 45)], 0)), FiveCrop(45), Lambda(lambda crops: torch.cat([ rotate(crop, ang) for crop, ang in zip(crops, (0, 90, 270, 180)) ], 0)), ]) self.N_augmentations = 16 self.N_conv_outputs = 512 self.set_optimizer(optimizer, lr=learning_rate_init, **optimizer_kwargs) # self.scheduler = ExponentialLR(self.optimizer, gamma=gamma) self.scheduler = MultiStepLR(self.optimizer, milestones=[292, 373], gamma=gamma) self.make_labels_hierarchical = False # if True, output probabilities are renormalized to fit the hierarchical label structure self.N_batches_test = N_batches_test self.evaluation_steps = evaluation_steps # number of batches between loss tracking self.weight_loss_sample_variance = weight_loss_sample_variance self.sample_variance_threshold = sample_variance_threshold self.iteration = 0 self.epoch = 0 self.losses_train = Losses("loss", "train") self.losses_valid = Losses("loss", "valid") self.sample_variances_train = Losses("sample variance", "train") self.sample_variances_valid = Losses("sample variance", "valid") for g in range(1, 12): setattr(self, f"accuracies_Q{g}_train", Accuracies("accuracy train", f"Q{g}")) setattr(self, f"accuracies_Q{g}_valid", Accuracies("accuracy valid", f"Q{g}")) self.losses_regression = Losses("loss", "regression") self.losses_variance = Losses("loss", "sample variance") ## return to random seed if seed is not None: sd = np.random.random() * 10000 torch.manual_seed(sd)
class ImageClassifier(ClassifierBase): """ model for morphological classification of galaxy images Usage ----- to use pretrained model, do >>> classifier = ImageClassifier() >>> classifier.load() >>> classifier.eval() >>> classifier.use_label_hierarchy() >>> labels = classifier(images) """ def __init__( self, seed=None, optimizer=Adam, optimizer_kwargs={}, learning_rate_init=0.04, gamma=0.995, # learning rate decay factor considered_groups=list( range(12)), ## group layers to be considered from start sample_variance_threshold=0.002, weight_loss_sample_variance=0, # 10. evaluation_steps=250, # number of batches between loss tracking N_batches_test=1, # number of batches considered for evaluation ): super(ImageClassifier, self).__init__(considered_groups=considered_groups) if seed is not None: torch.manual_seed(seed) #''' resnet = models.resnet18(pretrained=False) self.conv = Sequential( *(list(resnet.children())[:-1]), Flatten(), ) ''' architecture used by Dielemann et al 2015 self.conv = Sequential( # Conv2dUntiedBias(41, 41, 3, 32, kernel_size=6), Conv2d(3,32, kernel_size=6), ReLU(), MaxPool2d(2), # Conv2dUntiedBias(16, 16, 32, 64, kernel_size=5), Conv2d(32, 64, kernel_size=5), ReLU(), MaxPool2d(2), # Conv2dUntiedBias(6, 6, 64, 128, kernel_size=3), Conv2d(64, 128, kernel_size=3), ReLU(), # Conv2dUntiedBias(4, 4, 128, 128, kernel_size=3), #weight_std=0.1), Conv2d(128, 128, kernel_size=3), ReLU(), MaxPool2d(2), Flatten(), ) #''' self.dense1 = MaxOut(8192, 2048, bias=0.01) self.dense2 = MaxOut(2048, 2048, bias=0.01) self.dense3 = Sequential( MaxOut(2048, 37, bias=0.1), # LeakyReLU(negative_slope=1e-7), ALReLU(negative_slope=1e-2), ) self.dropout = Dropout(p=0.5) self.augment = Compose([ Lambda(lambda img: torch.cat([img, hflip(img)], 0)), Lambda(lambda img: torch.cat([img, rotate(img, 45)], 0)), FiveCrop(45), Lambda(lambda crops: torch.cat([ rotate(crop, ang) for crop, ang in zip(crops, (0, 90, 270, 180)) ], 0)), ]) self.N_augmentations = 16 self.N_conv_outputs = 512 self.set_optimizer(optimizer, lr=learning_rate_init, **optimizer_kwargs) # self.scheduler = ExponentialLR(self.optimizer, gamma=gamma) self.scheduler = MultiStepLR(self.optimizer, milestones=[292, 373], gamma=gamma) self.make_labels_hierarchical = False # if True, output probabilities are renormalized to fit the hierarchical label structure self.N_batches_test = N_batches_test self.evaluation_steps = evaluation_steps # number of batches between loss tracking self.weight_loss_sample_variance = weight_loss_sample_variance self.sample_variance_threshold = sample_variance_threshold self.iteration = 0 self.epoch = 0 self.losses_train = Losses("loss", "train") self.losses_valid = Losses("loss", "valid") self.sample_variances_train = Losses("sample variance", "train") self.sample_variances_valid = Losses("sample variance", "valid") for g in range(1, 12): setattr(self, f"accuracies_Q{g}_train", Accuracies("accuracy train", f"Q{g}")) setattr(self, f"accuracies_Q{g}_valid", Accuracies("accuracy valid", f"Q{g}")) self.losses_regression = Losses("loss", "regression") self.losses_variance = Losses("loss", "sample variance") ## return to random seed if seed is not None: sd = np.random.random() * 10000 torch.manual_seed(sd) def update_optimizer(self, **kwargs) -> None: self.set_optimizer(optimizer, **kwargs) def update_optimizer_learningrate(self, learning_rate) -> None: print("update lr", learning_rate) for i in range(len(self.optimizer.param_groups)): self.optimizer.param_groups[i]['lr'] = learning_rate def use_label_hierarchy(self) -> None: self.make_labels_hierarchical = True def forward(self, x: torch.Tensor, train=False) -> torch.Tensor: x = self.augment(x) x = self.conv(x) x = self.recombine_augmentation(x) x = self.dropout(x) x = self.dense1(x) x = self.dropout(x) x = self.dense2(x) x = self.dropout(x) x = self.dense3(x) # x += 1e-4 ## use only with LeakyReLU to prevent values < 0 if self.make_labels_hierarchical: x = make_galaxy_labels_hierarchical(x) return x def recombine_augmentation(self, x) -> torch.Tensor: """ recombine results of augmented views to single vector """ batch_size = x.size(0) // self.N_augmentations x = x.reshape(self.N_augmentations, batch_size, self.N_conv_outputs) x = x.permute(1, 0, 2) x = x.reshape(batch_size, self.N_augmentations * self.N_conv_outputs) return x def train_step(self, images: torch.tensor, labels: torch.tensor) -> float: self.train() labels_pred = self.forward(images, train=True) loss_regression = mse(labels_pred[:, self.considered_label_indices], labels[:, self.considered_label_indices]) loss_variance = self.weight_loss_sample_variance * \ loss_sample_variance(labels_pred[:,self.considered_label_indices], threshold=self.sample_variance_threshold) loss = loss_regression + loss_variance self.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm=1) self.optimizer.step() self.iteration += 1 return loss.item() def train_epoch( self, data_loader_train: torch.utils.data.DataLoader, data_loader_valid: torch.utils.data.DataLoader, track: bool = False, ) -> None: for images, labels in tqdm(data_loader_train, desc=f"epoch {self.epoch}"): images = images.to(device) labels = labels.to(device) loss = self.train_step(images, labels) if np.isnan(loss): from pdb import set_trace set_trace() loss = self.train_step(images, labels) raise Exception("loss is NaN") if not self.iteration % self.evaluation_steps - 1: loss_regression_train, loss_variance_train, accs_train, variance_train = self.evaluate_batch( images, labels, print_labels=False) loss_train = loss_regression_train + loss_variance_train * self.weight_loss_sample_variance self.losses_regression.append(self.iteration, loss_regression_train) self.losses_variance.append(self.iteration, loss_variance_train) self.losses_train.append(self.iteration, loss_train) self.sample_variances_train.append(self.iteration, variance_train) for group, acc in accs_train.items(): getattr(self, f"accuracies_Q{group}_train").append( self.iteration, acc) for images, labels in data_loader_valid: images = images.to(device) labels = labels.to(device) break loss_regression_valid, loss_variance_valid, accs_valid, variance_valid = self.evaluate_batch( images, labels) loss_valid = loss_regression_valid + loss_variance_valid * self.weight_loss_sample_variance self.losses_valid.append(self.iteration, loss_valid) self.sample_variances_valid.append(self.iteration, variance_valid) for group, acc in accs_valid.items(): getattr(self, f"accuracies_Q{group}_valid").append( self.iteration, acc) if track: import wandb logs = { "loss_regression_train": loss_regression_train, "loss_variance_train": loss_variance_train, "loss_train": loss_train, "variance_train": variance_train, "loss_regression_valid": loss_regression_valid, "loss_variance_valid": loss_variance_valid, "loss_valid": loss_valid, "variance_valid": variance_valid, } logs.update({ f"accuracy_Q{group}_train": acc for group, acc in accs_train.items() }) logs.update({ f"accuracy_Q{group}_valid": acc for group, acc in accs_valid.items() }) wandb.log(logs) self.epoch += 1 self.scheduler.step() self.save() def predict(self, images: torch.tensor) -> torch.Tensor: self.eval() return self(images) def evaluate_batches(self, data_loader: torch.utils.data.DataLoader) -> list: with torch.no_grad(): loss = 0 accs = Counter({group: 0 for group in range(1, 12)}) variance = 0 for N_test, (images, labels) in enumerate(data_loader): images = images.to(device) labels = labels.to(device) if N_test >= self.N_batches_test: break loss_, accs_, variance_ = self.evaluate_batch(images, labels) loss += loss_ accs.update(accs_) variance += variance_ loss /= N_test + 1 variance /= N_test + 1 for group in accs.keys(): accs[group] /= N_test + 1 return loss, accs, variance def evaluate_batch(self, images: torch.tensor, labels: torch.tensor, print_labels=False) -> tuple: """ evaluations for batch """ self.eval() with torch.no_grad(): labels_pred = self.forward(images) if print_labels: for i, (prediction, target) in enumerate(zip(labels_pred, labels)): print( "target\t\t", np.around(target[self.considered_label_indices].cpu(), 3)) print("\033[1mprediction\t", np.around( prediction[self.considered_label_indices].cpu(), 3), end="\033[0m\n") if i >= 2: break print( "<target>\t", np.around( torch.mean(labels[:, self.considered_label_indices], dim=0).cpu(), 3)) print( "<target>\t", np.around( torch.std(labels[:, self.considered_label_indices], dim=0).cpu(), 3)) print("\033[1m<prediction>\t", np.around( torch.mean( labels_pred[:, self.considered_label_indices], dim=0).cpu(), 3), end="\033[0m\n") print("\033[1m<prediction>\t", np.around( torch.std(labels_pred[:, self.considered_label_indices], dim=0).cpu(), 3), end="\033[0m\n") loss_regression = torch.sqrt( mse(labels_pred[:, self.considered_label_indices], labels[:, self.considered_label_indices])).item() loss_variance = self.weight_loss_sample_variance * \ loss_sample_variance(labels_pred[:,self.considered_label_indices], threshold=self.sample_variance_threshold ).item() accs = measure_accuracy_classifier( labels_pred, labels, considered_groups=self.considered_groups.considered_groups) variance = get_sample_variance( labels_pred[:, self.considered_label_indices]).item() return loss_regression, loss_variance, accs, variance def plot_losses(self, save=False): self.losses_train.plot() self.losses_valid.plot() self.losses_regression.plot(linestyle=":") self.losses_variance.plot(linestyle=":") if save: plt.savefig(folder_results + "loss.png") plt.close() else: plt.show() def plot_sample_variances(self, save=False): self.sample_variances_train.plot() self.sample_variances_valid.plot() if save: plt.savefig(folder_results + "variances.png") plt.close() else: plt.show() def plot_accuracy(self, save=False): for group in range(1, 12): if not group in self.considered_groups.considered_groups: continue getattr(self, f"accuracies_Q{group}_train").plot() if save: plt.savefig(folder_results + "accuracy_train.png") plt.close() else: plt.show() def plot_test_accuracy(self, save=False): for group in range(1, 12): if not group in self.considered_groups.considered_groups: continue getattr(self, f"accuracies_Q{group}_valid").plot() if save: plt.savefig(folder_results + "accuracy_valid.png") plt.close() else: plt.show()
def unet_model_fn(features, labels, mode, params): tf.local_variables_initializer() loss, train_op, = None, None eval_metric_ops, training_hooks, evaluation_hooks = None, None, None predictions_dict = None unet = Unet(params=params) logits = unet.model(input_tensor=features['image']) y_pred = tf.math.softmax(logits, axis=-1) output_img = tf.expand_dims(tf.cast(tf.math.argmax(y_pred, axis=-1) * 255, dtype=tf.uint8), axis=-1) if mode in (estimator.ModeKeys.TRAIN, estimator.ModeKeys.EVAL): with tf.name_scope('Loss_Calculation'): loss = Losses(logits=logits, labels=labels['label']) loss = loss.custom_loss() with tf.name_scope('Dice_Score_Calculation'): dice = f1(labels=labels['label'], predictions=y_pred) with tf.name_scope('Images_{}'.format(mode)): with tf.name_scope('Reformat_Outputs'): label = tf.expand_dims(tf.cast(tf.argmax(labels['label'], -1) * 255, dtype=tf.uint8), axis=-1) image = tf.math.divide(features['image'] - tf.reduce_max(features['image'], [0, 1, 2]), tf.reduce_max(features['image'], [0, 1, 2]) - tf.reduce_min(features['image'], [0, 1, 2])) summary.image('1_Medical_Image', image, max_outputs=1) summary.image('2_Output', output_img, max_outputs=1) summary.image('3_Output_pred', tf.expand_dims(y_pred[:, :, :, 1], -1), max_outputs=1) summary.image('4_Output_label', label, max_outputs=1) if mode == estimator.ModeKeys.TRAIN: with tf.name_scope('Learning_Rate'): global_step = tf.compat.v1.train.get_or_create_global_step() learning_rate = tf.compat.v1.train.exponential_decay(params['lr'], global_step=global_step, decay_steps=params['decay_steps'], decay_rate=params['decay_rate'], staircase=False) with tf.name_scope('Optimizer_conf'): train_op = Adam(learning_rate=learning_rate).minimize(loss=loss, global_step=global_step) with tf.name_scope('Metrics'): summary.scalar('Output_DSC', dice[1]) summary.scalar('Learning_Rate', learning_rate) if mode == estimator.ModeKeys.EVAL: eval_metric_ops = {'Metrics/Output_DSC': dice} eval_summary_hook = tf.estimator.SummarySaverHook(output_dir=params['eval_path'], summary_op=summary.merge_all(), save_steps=params['eval_steps']) evaluation_hooks = [eval_summary_hook] if mode == estimator.ModeKeys.PREDICT: predictions_dict = {'image': features['image'], 'y_preds': y_pred[:, :, :, 1], 'output_img': output_img, 'path': features['path']} return estimator.EstimatorSpec(mode, predictions=predictions_dict, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, training_hooks=training_hooks, evaluation_hooks=evaluation_hooks)