def train(train_loader, net, criterion, optimizer, epoch): """ train for one epoch on the training set """ batch_time = meter.TimeMeter(True) data_time = meter.TimeMeter(True) losses = meter.AverageValueMeter() prec = meter.ClassErrorMeter(topk=[1], accuracy=True) # training mode net.train() for i, (views, pcs, labels) in enumerate(train_loader): batch_time.reset() views = views.to(device=config.device) pcs = pcs.to(device=config.device) labels = labels.to(device=config.device) preds = net(pcs, views) # bz x C x H x W loss = criterion(preds, labels) prec.add(preds.detach(), labels.detach()) losses.add(loss.item()) # batchsize optimizer.zero_grad() loss.backward() optimizer.step() if i % config.print_freq == 0: print(f'Epoch: [{epoch}][{i}/{len(train_loader)}]\t' f'Batch Time {batch_time.value():.3f}\t' f'Epoch Time {data_time.value():.3f}\t' f'Loss {losses.value()[0]:.4f} \t' f'Prec@1 {prec.value(1):.3f}\t') print(f'prec at epoch {epoch}: {prec.value(1)} ')
def baselinetrain(dataset): print('---------------------------------------------') print('baseline training: %s' % (opt.dataset)) begin = datetime.datetime.now() # f = gzip.open(opt.mnist_file_path, 'rb') opt.train = True opt.supervise = True opt.multimodal = False # data_set = BaseLineDataset(f) data_set = dataset dataloader = data.DataLoader(data_set, \ batch_size=32, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) loss_meter = meter.AverageValueMeter() previous_loss = 1e10 if opt.baseline_modal == 0: net = ResNet().cuda() else: net = TextNet(opt.text_cfg).cuda() optimizer = net.optimizer lr = opt.lr test_result = [] max_ap = 0 max_epoch = 0 for epoch in range(opt.epoch): print('train epoch', epoch, '.............................') loss_meter.reset() start = time.time() net.train() for ii, (train_x_1, train_x_2, train_y) in enumerate(dataloader): train_x = (train_x_1, train_x_2)[opt.baseline_modal] train_x, train_y = train_x.cuda(), train_y.cuda() loss = net.train_step(train_x, train_y) loss_meter.add(loss.item()) end = time.time() print('epoch training duration: %d s' % (end - start)) print("epoch:{epoch},lr:{lr},loss:{loss}".format( epoch=epoch, loss=loss_meter.value()[0], lr=lr)) result = baseline_eval(data_set, net) if result[-1] > max_ap: max_ap = result[-1] max_epoch = epoch test_result.append(result) # update learning rate if loss_meter.value()[0] > previous_loss: scale_lr(optimizer, opt.lr_decay) previous_loss = loss_meter.value()[0] t.save( net.state_dict(), 'checkpoint/baseline_%s_modal%d_epoch%d.pth' % (opt.dataset, opt.baseline_modal, epoch)) print('total trainning duration:', datetime.datetime.now() - begin) print('---------------------------------------------') np.save('result/%s_modal%d_baseline' % (opt.dataset, opt.baseline_modal), np.array(test_result)) return max_epoch
def train(train_loader, model, criterion, optimizer, epoch, cfg): """ train for one epoch on the training set """ batch_time = meter.TimeMeter(True) data_time = meter.TimeMeter(True) losses = meter.AverageValueMeter() prec = meter.ClassErrorMeter(topk=[1], accuracy=True) # training mode model.train() for i, (shapes, labels) in enumerate(train_loader): batch_time.reset() # bz x 12 x 3 x 224 x 224 labels = labels.long().view(-1) shapes = Variable(shapes) labels = Variable(labels) if cfg.cuda: shapes = shapes.cuda() labels = labels.cuda() preds = model(shapes) # bz x C x H x W if cfg.have_aux: preds, aux = preds loss_main = criterion(preds, labels) loss_aux = criterion(aux, labels) softmax_loss = loss_main + 0.3 * loss_aux else: softmax_loss = criterion(preds, labels) loss = softmax_loss prec.add(preds.data, labels.data) losses.add(loss.data[0], preds.size(0)) # batchsize optimizer.zero_grad() loss.backward() optimizer.step() if i % cfg.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Batch Time {batch_time:.3f}\t' 'Epoch Time {data_time:.3f}\t' 'Loss {loss:.4f} \t' 'Prec@1 {top1:.3f}\t'.format(epoch, i, len(train_loader), batch_time=batch_time.value(), data_time=data_time.value(), loss=losses.value()[0], top1=prec.value(1))) print('prec at epoch {0}: {1} '.format(epoch, prec.value(1)))
def train(train_loader, model, criterion, optimizer, epoch, cfg): """ train for one epoch on the training set """ batch_time = meter.TimeMeter(True) data_time = meter.TimeMeter(True) losses = meter.AverageValueMeter() prec = meter.ClassErrorMeter(topk=[1], accuracy=True) # training mode model.train() for i, (meshes, adjs, labels) in enumerate(train_loader): batch_time.reset() # bz x n x 3 meshes = meshes.transpose(1, 2) labels = labels.long().view(-1) if cfg.cuda: meshes = meshes.cuda() adjs = adjs.cuda() labels = labels.cuda() preds, _ = model(meshes) # bz x C x H x W loss = criterion(preds, labels) # print('pred: %d, gt: %d'%(torch.argmax(preds.cpu().data), labels.item())) prec.add(preds.cpu().data.numpy(), labels.item()) losses.add(loss.item(), preds.size(0)) # batchsize optimizer.zero_grad() loss.backward() optimizer.step() if i % cfg.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Batch Time {batch_time:.3f}\t' 'Epoch Time {data_time:.3f}\t' 'Loss {loss:.4f} \t' 'Prec@1 {top1:.3f}\t'.format(epoch, i, len(train_loader), batch_time=batch_time.value(), data_time=data_time.value(), loss=losses.value()[0], top1=prec.value(1))) print('prec at epoch {0}: {1} '.format(epoch, prec.value(1)))
def train(train_loader, net, criterion, optimizer, lr_scheduler, epoch): """ train for one epoch on the training set """ batch_time = meter.TimeMeter(True) data_time = meter.TimeMeter(True) losses = meter.AverageValueMeter() prec = meter.ClassErrorMeter(topk=[1], accuracy=True) # training mode net.train() alpha = 0.01 for i, (views, dps, pcs, labels) in enumerate(train_loader): batch_time.reset() # Data views = views.to(device=config.device) pcs = pcs.to(device=config.device) dps = dps.to(device=config.device) labels = labels.to(device=config.device) # print(f'DataSize:\nmulti-views:{views.size()}\ndepth-images:{dps.size()}\npoint-cloud:{pcs.size()}\nlabels:{labels.size()}') # Network f_pc, f_mv, f_dp, _, _, _, de_p, de_v, de_d, dis_p, dis_v, dis_d, cls_p, cls_v, cls_d, fea, preds = net(pcs, views, dps) # bz x C x H x W # Generator optimizer[0].zero_grad() rl1 = criterion[1](de_d, f_dp) + criterion[1](de_p, f_pc) + criterion[1](de_v, f_mv) valid = torch.FloatTensor(20, 1).fill_(1.0).to(device=config.device) fake = torch.FloatTensor(20, 1).fill_(0.0).to(device=config.device) g_loss = alpha * (criterion[0](dis_v, valid) + criterion[0](dis_p, valid)) + (1 - alpha) * rl1 g_loss.backward(retain_graph=True) optimizer[0].step() lr_scheduler[0].step(epoch=epoch) # Classifier optimizer[2].zero_grad() c_loss = criterion[2](cls_p, f_pc) + criterion[2](cls_v, f_pc) + criterion[2](cls_d, f_pc) # Different from ARGF c_loss.backward(retain_graph=True) optimizer[2].step() lr_scheduler[2].step(epoch=epoch) # Discriminator optimizer[1].zero_grad() real_loss = criterion[0](dis_d, valid) # avg_v += torch.sum(dis_d.squeeze().data) / (len(train_loader) * config.pvd_net.train.batch_sz) fake_loss = criterion[0](dis_p, fake) + criterion[0](dis_v, fake) d_loss = 0.5 * (real_loss + fake_loss) d_loss.backward(retain_graph=True) optimizer[1].step() lr_scheduler[1].step(epoch=epoch) # Fusion optimizer[3].zero_grad() loss = criterion[3](preds, labels) loss.backward() optimizer[3].step() lr_scheduler[3].step(epoch=epoch) prec.add(preds.detach(), labels.detach()) losses.add(loss.item()) # batchsize if i % config.print_freq == 0: print(f'Epoch: [{epoch}][{i}/{len(train_loader)}]\t' f'Batch Time {batch_time.value():.3f}\t' f'Epoch Time {data_time.value():.3f}\t' f'Loss {losses.value()[0]:.4f} \t' f'Prec@1 {prec.value(1):.3f}\t') print(f'prec at epoch {epoch}: {prec.value(1)} ')
def semi_baseline_train(dataset): print('---------------------------------------------') print('semi baseline training: %s' % (opt.dataset)) begin = datetime.datetime.now() # f = gzip.open(opt.mnist_file_path, 'rb') opt.train = True opt.supervise = False opt.multimodal = False # if opt.generation_method == 'noise': # data_set = NoiseDataset(f) # else: # data_set = SeperationDataset(f) data_set = dataset dataloader = data.DataLoader(data_set, \ batch_size=4, \ shuffle=True, \ # pin_memory=True, num_workers=opt.num_workers) loss_meter = meter.AverageValueMeter() previous_loss = 1e10 model = ClusterLossModel(opt.baseline_modal).cuda() optimizer = model.optimizer lr = opt.lr test_result = [] first_epoch_loss = [] epoch_loss = [] max_ap = 0 max_epoch = 0 for epoch in range(opt.epoch): print('train epoch', epoch, '.............................') loss_meter.reset() start = time.time() model.train() for ii, (train_left_x, train_right_x, train_y, unlabel_left_x, unlabel_right_x) in enumerate(dataloader): # train faster rcnn train_x = (train_left_x, train_right_x)[opt.baseline_modal] train_x = t.cat(tuple(train_x)) train_y = t.cat(tuple(train_y)) unlabel_x = (unlabel_left_x, unlabel_right_x)[opt.baseline_modal] unlabel_x = t.cat(tuple(unlabel_x)) train_x, train_y, unlabel_x = train_x.cuda(), train_y.cuda( ), unlabel_x.cuda() # loss,class_loss = dammn.train_step((train_left_x,train_right_x),train_y,(unlabel_left_x,unlabel_right_x)) loss, class_loss, ent_loss = model.train_step( train_x, train_y, unlabel_x) loss_meter.add(loss.item()) if epoch == 0: first_epoch_loss.append([class_loss.item(), ent_loss.item()]) # semi_eval(data_set,model) print('class_loss:', class_loss.item()) print('ent_loss:', ent_loss.item()) epoch_loss.append([class_loss.item(), ent_loss.item()]) end = time.time() print('epoch training duration: %d s' % (end - start)) print("epoch:{epoch},lr:{lr},loss:{loss}".format( epoch=epoch, loss=loss_meter.value()[0], lr=lr)) result = semi_eval(data_set, model) if result[-1] > max_ap: max_ap = result[-1] max_epoch = epoch test_result.append(result) # # update learning rate if loss_meter.value()[0] > previous_loss: scale_lr(optimizer, opt.lr_decay) previous_loss = loss_meter.value()[0] t.save( model.state_dict(), 'checkpoint/semi_baseline_%s_modal%d_epoch%d.pth' % (opt.dataset, opt.baseline_modal, epoch)) print('total trainning duration:', datetime.datetime.now() - begin) print('---------------------------------------------') np.save( 'result/%s_modal%d_cluster_baseline' % (opt.dataset, opt.baseline_modal), np.array(test_result)) np.save('result/%s_cluster_baseline_first_epoch_loss' % (opt.dataset), np.array(first_epoch_loss)) np.save('result/%s_cluster_baseline_epoch_loss' % (opt.dataset), np.array(epoch_loss)) return max_epoch