def train(train_loader, model, criterion, optimizer, gmaker, tensorshape, epoch, args): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') slosses = AverageMeter('SuperLoss', ':.4e') # top1 = AverageMeter('Acc@1', ':6.2f') # top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, slosses], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() total_loss = 0 super_loss = 0 for i, (lengths, center, coords, types, radii, afflabel) in enumerate(train_loader): deltaG = afflabel.cuda(args.gpu, non_blocking=True) types = types.cuda(args.gpu, non_blocking=True) radii = radii.squeeze().cuda(args.gpu, non_blocking=True) coords = coords.cuda(args.gpu, non_blocking=True) coords_q = torch.empty(*coords.shape, device=coords.device, dtype=coords.dtype) batch_size = coords.shape[0] if i == 0: print(batch_size) if batch_size != types.shape[0] or batch_size != radii.shape[0]: raise RuntimeError("Inconsistent batch sizes in dataset outputs") output1 = torch.empty(batch_size, *tensorshape, dtype=coords.dtype, device=coords.device) output2 = torch.empty(batch_size, *tensorshape, dtype=coords.dtype, device=coords.device) for idx in range(batch_size): t = molgrid.Transform( molgrid.float3(*(center[idx].numpy().tolist())), random_translate=2, random_rotation=True) t.forward(coords[idx][:lengths[idx]], coords_q[idx][:lengths[idx]]) gmaker.forward(t.get_rotation_center(), coords_q[idx][:lengths[idx]], types[idx][:lengths[idx]], radii[idx][:lengths[idx]], molgrid.tensor_as_grid(output1[idx])) t.forward(coords[idx][:lengths[idx]], coords[idx][:lengths[idx]]) gmaker.forward(t.get_rotation_center(), coords[idx][:lengths[idx]], types[idx][:lengths[idx]], radii[idx][:lengths[idx]], molgrid.tensor_as_grid(output2[idx])) # measure data loading time data_time.update(time.time() - end) # compute output output, target, preds = model(im_q=output1, im_k=output2) loss = criterion(output, target) if args.semi_super: if i == 0: print(preds[:10]) print(deltaG[:10]) lossmask = deltaG.gt(0) sloss = torch.sum(lossmask * nn.functional.mse_loss( preds, deltaG, reduction='none')) / lossmask.sum() super_loss += sloss.item() loss += sloss slosses.update(sloss.item(), lossmask.sum()) total_loss += loss.item() # acc1/acc5 are (K+1)-way contrast classifier accuracy # measure accuracy and record loss # acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), output1.size(0)) # top1.update(acc1[0], images[0].size(0)) # top5.update(acc5[0], images[0].size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() if args.semi_super: torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) if args.semi_super: wandb.log({"Supervised Loss": super_loss / len(train_loader)}, commit=False) wandb.log({"Total Loss": total_loss / len(train_loader)})
def validate(val_loader, model, criterion, args): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') r = AverageMeter('Pearson R', ':6.2f') rmse = AverageMeter('RMSE', ':6.2f') progress = ProgressMeter( len(val_loader), [batch_time, losses, r, rmse], prefix='Test: ') # switch to evaluate mode model.eval() predictions = [] targets = [] with torch.no_grad(): end = time.time() for i, (lengths, center, coords, types, radii, labels) in enumerate(train_loader): types = types.cuda(args.gpu, non_blocking=True) radii = radii.squeeze().cuda(args.gpu, non_blocking=True) coords = coords.cuda(args.gpu, non_blocking=True) batch_size = coords.shape[0] if batch_size != types.shape[0] or batch_size != radii.shape[0]: raise RuntimeError("Inconsistent batch sizes in dataset outputs") output1 = torch.empty(batch_size,*tensorshape,dtype=coords.dtype,device=coords.device) for idx in range(batch_size): t = molgrid.Transform(molgrid.float3(*(center[idx].numpy().tolist())),random_translate=2,random_rotation=True) t.forward(coords[idx][:lengths[idx]],coords_q[idx][:lengths[idx]]) gmaker.forward(t.get_rotation_center(), coords_q[idx][:lengths[idx]], types[idx][:lengths[idx]], radii[idx][:lengths[idx]], molgrid.tensor_as_grid(output1[idx])) del lengths, center, coords, types, radii torch.cuda.empty_cache() target = labels.cuda(args.gpu, non_blocking=True) # compute output prediction = model(output1) loss = criterion(prediction, target) # measure accuracy and record loss r_val, rmse_val = accuracy(prediction, target) losses.update(loss.item(), output1.size(0)) r.update(r_val, output1.size(0)) rmse.update(rmse_val, output1.size(0)) predictions += prediction.detach().flatten().tolist() targets += target.detach().flatten().tolist() # measure elapsed time batch_time.update(time.time() - end) end = time.time() r_avg, rmse_avg = accuracy(predictions,target) return r_avg, rmse_avg
def train(train_loader, model, criterion, optimizer, gmaker, tensorshape, epoch, args): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') # top1 = AverageMeter('Acc@1', ':6.2f') # top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() total_loss = 0 end = time.time() for i, (lengths, center, coords, types, radii, _) in enumerate(train_loader): types = types.cuda(args.gpu, non_blocking=True) radii = radii.squeeze().cuda(args.gpu, non_blocking=True) coords = coords.cuda(args.gpu, non_blocking=True) coords_q = torch.empty(*coords.shape, device=coords.device, dtype=coords.dtype) batch_size = coords.shape[0] if batch_size != types.shape[0] or batch_size != radii.shape[0]: raise RuntimeError("Inconsistent batch sizes in dataset outputs") output1 = torch.empty(batch_size, *tensorshape, dtype=coords.dtype, device=coords.device) output2 = torch.empty(batch_size, *tensorshape, dtype=coords.dtype, device=coords.device) for idx in range(batch_size): t = molgrid.Transform( molgrid.float3(*(center[idx].numpy().tolist())), random_translate=2, random_rotation=True) t.forward(coords[idx][:lengths[idx]], coords_q[idx][:lengths[idx]]) gmaker.forward(t.get_rotation_center(), coords_q[idx][:lengths[idx]], types[idx][:lengths[idx]], radii[idx][:lengths[idx]], molgrid.tensor_as_grid(output1[idx])) t.forward(coords[idx][:lengths[idx]], coords[idx][:lengths[idx]]) gmaker.forward(t.get_rotation_center(), coords[idx][:lengths[idx]], types[idx][:lengths[idx]], radii[idx][:lengths[idx]], molgrid.tensor_as_grid(output2[idx])) del lengths, center, coords, types, radii torch.cuda.empty_cache() # measure data loading time data_time.update(time.time() - end) # compute output output, target = model(im_q=output1, im_k=output2) loss = criterion(output, target) total_loss += float(loss.item()) # acc1/acc5 are (K+1)-way contrast classifier accuracy # measure accuracy and record loss # acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), output1.size(0)) # top1.update(acc1[0], images[0].size(0)) # top5.update(acc5[0], images[0].size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) return total_loss / len(train_loader.dataset)
def train(train_loader, model, criterion, optimizer, epoch, args): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') r = AverageMeter('Pearson R', ':6.2f') rmse = AverageMeter('RMSE', ':6.2f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses], prefix="Epoch: [{}]".format(epoch)) targets = [] predictions = [] """ Switch to eval mode: Under the protocol of linear classification on frozen features/models, it is not legitimate to change any part of the pre-trained model. BatchNorm in train mode may revise running mean/std (even if it receives no gradient), which are part of the model parameters too. """ model.eval() end = time.time() total_loss = 0 end = time.time() for i, (lengths, center, coords, types, radii, labels) in enumerate(train_loader): types = types.cuda(args.gpu, non_blocking=True) radii = radii.squeeze().cuda(args.gpu, non_blocking=True) coords = coords.cuda(args.gpu, non_blocking=True) batch_size = coords.shape[0] if batch_size != types.shape[0] or batch_size != radii.shape[0]: raise RuntimeError("Inconsistent batch sizes in dataset outputs") output1 = torch.empty(batch_size,*tensorshape,dtype=coords.dtype,device=coords.device) for idx in range(batch_size): t = molgrid.Transform(molgrid.float3(*(center[idx].numpy().tolist())),random_translate=2,random_rotation=True) t.forward(coords[idx][:lengths[idx]],coords_q[idx][:lengths[idx]]) gmaker.forward(t.get_rotation_center(), coords_q[idx][:lengths[idx]], types[idx][:lengths[idx]], radii[idx][:lengths[idx]], molgrid.tensor_as_grid(output1[idx])) del lengths, center, coords, types, radii torch.cuda.empty_cache() target = labels.cuda(args.gpu, non_blocking=True) # compute output prediction = model(output1) loss = criterion(prediction, target) # measure accuracy and record loss r_val, rmse_val = accuracy(prediction, target) losses.update(loss.item(), output1.size(0)) r.update(r_val, output1.size(0)) rmse.update(rmse_val, output1.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() predictions += prediction.detach().flatten().tolist() targets += target.detach().flatten().tolist() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) r_avg, rmse_avg = accuracy(predictions,targets) return r_avg, rmse_avg