def train(train_loader, model, classifier, criterion, optimizer, epoch, opt): """one epoch training""" model.eval() classifier.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for idx, (images, labels) in enumerate(train_loader): data_time.update(time.time() - end) images = images.cuda(non_blocking=True) labels = labels.cuda(non_blocking=True) bsz = labels.shape[0] # warm-up learning rate warmup_learning_rate(opt, epoch, idx, len(train_loader), optimizer) # compute loss with torch.no_grad(): features = model.encoder(images) output = classifier(features.detach()) loss = criterion(output, labels) # update metric losses.update(loss.item(), bsz) acc1, acc5 = accuracy(output, labels, topk=(1, 5)) top1.update(acc1[0], bsz) top5.update(acc5[0], bsz) # SGD optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # print info if (idx + 1) % opt.print_freq == 0: logging.info('Train: [{0}][{1}/{2}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) sys.stdout.flush() return losses.avg, top1.avg, top5.avg
def train(train_loader, model, criterion, optimizer, epoch, opt): """one epoch training""" model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() end = time.time() for idx, (images, labels) in enumerate(train_loader): data_time.update(time.time() - end) images = torch.cat([images[0], images[1]], dim=0) if torch.cuda.is_available(): images = images.cuda(non_blocking=True) labels = labels.cuda(non_blocking=True) bsz = labels.shape[0] # warm-up learning rate warmup_learning_rate(opt, epoch, idx, len(train_loader), optimizer) # compute loss features = model(images) f1, f2 = torch.split(features, [bsz, bsz], dim=0) features = torch.cat([f1.unsqueeze(1), f2.unsqueeze(1)], dim=1) if opt.method == 'SupCon': loss = criterion(features, labels) elif opt.method == 'SimCLR': loss = criterion(features) else: raise ValueError('contrastive method not supported: {}'.format( opt.method)) # update metric losses.update(loss.item(), bsz) # SGD optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # print info if (idx + 1) % opt.print_freq == 0: print('Train: [{0}][{1}/{2}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})'.format( epoch, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) sys.stdout.flush() return losses.avg
def train(train_loader, model, criterion, optimizer, epoch, opt): """one epoch training""" model.train() batch_time = AverageMeter() data_time = AverageMeter() contrast_meter = AverageMeter() mi_meters = AverageMeter() end = time.time() batch_num = len(train_loader) iid_criterion = IIDLoss(lamb=1) for idx, (images, labels) in zip(range(batch_num), train_loader): data_time.update(time.time() - end) images = torch.cat([images[0], images[1]], dim=0) images = images.cuda(non_blocking=True) labels = labels.cuda(non_blocking=True) bsz = labels.shape[0] # warm-up learning rate warmup_learning_rate(opt, epoch, idx, len(train_loader), optimizer) # compute loss features, cluster_probs_list = model(images) f1, f2 = torch.split(features, [bsz, bsz], dim=0) features = torch.cat([f1.unsqueeze(1), f2.unsqueeze(1)], dim=1) if opt.method == 'SupCon': loss = criterion(features, labels) elif opt.method == 'SimCLR': loss = criterion(features) else: raise ValueError('contrastive method not supported: {}'.format( opt.method)) # update metric contrast_meter.update(loss.item(), bsz) iid_loss = torch.tensor(0, device=loss.device, dtype=torch.float) if opt.train_cluster: for cluster_probs in cluster_probs_list: cluster_pred, cluster_tf_pred = torch.split(cluster_probs, [bsz, bsz], dim=0) iid_sub_loss, p_i_j = iid_criterion(cluster_pred, cluster_tf_pred) iid_loss += iid_sub_loss iid_loss /= len(cluster_probs_list) mi_meters.update(-iid_loss.item(), bsz) total_loss = loss + opt.cluster_regweigt * iid_loss # SGD optimizer.zero_grad() total_loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # print info if (idx + 1) % opt.print_freq == 0: print('Train: [{0}][{1}/{2}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})\t ' 'mi {mi_meters.val:.3f} ({mi_meters.avg:.3f})'.format( epoch, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=contrast_meter, mi_meters=mi_meters)) sys.stdout.flush() return contrast_meter.avg
def train(train_loader, models, criterion, optimizer, epoch, opt): """one epoch training""" for model in models.values(): model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() end = time.time() for idx, (images, _) in enumerate(train_loader): data_time.update(time.time() - end) bsz = images[0].shape[0] views = len(images) images = torch.cat(images, dim=0) images = images.cuda(non_blocking=True) # warm-up learning rate warmup_learning_rate(opt, epoch, idx, len(train_loader), optimizer) # compute loss online_projs = models['online_encoder'](images) online_preds = models['online_predictor'](online_projs) with torch.no_grad(): target_projs = models['target_encoder'](images).detach() online_preds = torch.split(online_preds, [bsz]*views, dim=0) online_preds = [f.unsqueeze(1) for f in online_preds] online_preds = torch.cat(online_preds, dim=1) target_projs = torch.split(target_projs, [bsz]*views, dim=0) target_projs = [f.unsqueeze(1) for f in target_projs] target_projs = torch.cat(target_projs, dim=1) loss = criterion(online_preds, target_projs) # update metric losses.update(loss.item(), bsz) # SGD optimizer.zero_grad() loss.backward() optimizer.step() # update target model update_moving_average(models['target_encoder'], models['online_encoder']) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # print info if (idx + 1) % opt.print_freq == 0: print('Train: [{0}][{1}/{2}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})'.format( epoch, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) sys.stdout.flush() return losses.avg
def train(train_loader, model, classifier, criterion, optimizer, epoch, opt): """one epoch training""" model.eval() classifier.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() end = time.time() for idx, (images, labels) in enumerate(train_loader): print(images.shape) data_time.update(time.time() - end) images = images.cuda(non_blocking=True) labels = labels.cuda(non_blocking=True) bsz = labels.shape[0] # warm-up learning rate warmup_learning_rate(opt, epoch, idx, len(train_loader), optimizer) # compute loss with torch.no_grad(): features = model.encoder(images) print(f"post-encoder: {features.shape}") features = torch.flatten(features, 0) # print(f"flattened: {features.shape}") output = classifier(features.detach()) print(f"output shape: {output.shape}") print(f"labels: {labels}, output: {output.data}") #ADDED m = torch.nn.Sigmoid() out_trans = m(output.data) labels = labels.type(torch.FloatTensor) out_grad = out_trans.clone().detach().requires_grad_(True) # loss = criterion(output, labels) loss = criterion(out_grad, labels) # update metric losses.update(loss.item(), bsz) # acc1, acc5 = accuracy(output, labels, topk=(1, 5)) # CHANGE FOR BATCH SIZE acc1, acc5 = accuracy(output, labels, topk=(1, 1)) top1.update(acc1[0], bsz) # SGD optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # print info if (idx + 1) % opt.print_freq == 0: print('Train: [{0}][{1}/{2}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) sys.stdout.flush() return losses.avg, top1.avg
def train(train_loader, model, head, criterion, optimizer, epoch, opt): """one epoch training""" model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() end = time.time() for idx, (images, labels, aug_labels) in enumerate(train_loader): data_time.update(time.time() - end) for i, grp in enumerate(images): images[i] = grp.cuda(non_blocking=True) for i in range(2): for j, aug_l in enumerate(aug_labels[i]): if j == 0: aug_labels[i][j] = aug_l.cuda(non_blocking=True) bsz = labels.shape[0] # warm-up learning rate warmup_learning_rate(opt, epoch, idx, len(train_loader), optimizer) # compute loss features = [] for grp in images: features.append(model.encoder(grp)) f1, f2 = F.normalize(model.head(features[1])), F.normalize( model.head(features[2])) features_ss = torch.cat([f1.unsqueeze(1), f2.unsqueeze(1)], dim=1) aug_pred_1 = head(features[0], features[1]) aug_pred_2 = head(features[0], features[2]) aug_pred = [aug_pred_1, aug_pred_2] if opt.method == 'SupCon': loss = criterion[0](features_ss, labels) + criterion[1](aug_pred, aug_labels) pass elif opt.method == 'SimCLR': loss = criterion[0](features_ss) + criterion[1](aug_pred, aug_labels) else: raise ValueError('contrastive method not supported: {}'.format( opt.method)) # update metric losses.update(loss.item(), bsz) # SGD optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # print info if (idx + 1) % opt.print_freq == 0: print('Train: [{0}][{1}/{2}]\t' 'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'DT {data_time.val:.3f} ({data_time.avg:.3f})\t' 'loss {loss.val:.3f} ({loss.avg:.3f})'.format( epoch, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) sys.stdout.flush() return losses.avg