def __train_epoch(self):
        self.model.train()
        losses = []
        progress = tqdm(enumerate(self.train_loader),
                        total=len(self.train_loader),
                        desc='Training',
                        file=sys.stdout)
        for batch_idx, data in progress:
            samples, targets = data
            samples1, samples2 = samples
            if self.cuda:
                samples1 = samples1.cuda()
                samples2 = samples2.cuda()
                targets = targets.cuda()

            self.optimizer.zero_grad()
            outputs = self.model((samples1, samples2))
            #output1, output2 = self.model((samples1, samples2))
            loss = self.criterion(outputs, targets.float())
            loss.backward()
            self.optimizer.step()

            losses.append(loss.item())
            progress.set_description('Mean Training Loss: {:.4f}'.format(
                np.mean(losses)))

        return np.mean(losses)
    def run_epoch(self, phase, data_loader, criterion):
        if phase == 'train':
            self.model.train()
        else:
            self.model.eval()
        running_loss = 0.
        for data_dict in data_loader:
            for name in data_dict:
                data_dict[name] = data_dict[name].to(device=self.device,
                                                     non_blocking=True)
            if phase == 'train':
                self.optimizer.zero_grad()
                with torch.enable_grad():
                    pr_decs = self.model(data_dict['input'])
                    loss = criterion(pr_decs, data_dict)
                    loss.backward()
                    self.optimizer.step()
            else:
                with torch.no_grad():
                    pr_decs = self.model(data_dict['input'])
                    loss = criterion(pr_decs, data_dict)

            running_loss += loss.item()
        epoch_loss = running_loss / len(data_loader)
        print('{} loss: {}'.format(phase, epoch_loss))
        return epoch_loss
Esempio n. 3
0
def train(train_loader, model, criterion, optimizer, args):
    # switch to train mode

    model.train()
    if args.freeze_BN:
        for m in model.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eval()

    run_loss = 0
    for i, (input, target) in enumerate(train_loader):
        if args.gpu is not None:
            input = input.cuda(args.gpu, non_blocking=True)
        target = target.cuda(args.gpu, non_blocking=True)

        # compute output
        output = model(input)
        loss = criterion(output, target)
        run_loss += loss.item()
        if i % num_avg_iter == 0:
            print('Training loss running avg', run_loss / float(num_avg_iter))
            run_loss = 0

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
Esempio n. 4
0
    def train(self):
        max_iter=80000
        lr=0.00002
        decay_ratio=0.333
        display=20
        snapshot=20000
        step_index=0
        stepvalues=[40000,60000,80000]
        g_steps=stepvalues[0]
        
        param_groups=[]
        for key, value in self.siamese.named_parameters():
            if value.requires_grad:
                param_groups.append({'params': value, 'lr': lr})
            
        optimizer = optim.SGD(param_groups, lr=lr, momentum=0.9)

        step_index=0
        step=0
        for i in range(max_iter):
            pair_samples, y_np=self.pair_selector.get_data()
            
            pos_samples=Variable(torch.FloatTensor(pair_samples[:,0,:,:,:]).cuda())   #[N,C,H,W]
            neg_samples=Variable(torch.FloatTensor(pair_samples[:,1,:,:,:]).cuda())
            y=Variable(torch.FloatTensor(y_np).cuda())
#            y=torch.FloatTensor(y).contiguous().cuda(async=True)
            
            pos_feat, neg_feat=self.siamese(pos_samples, neg_samples)
            
            loss, dist=self.siamese_loss(pos_feat,neg_feat,y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            rate=lr*np.power(decay_ratio,step/g_steps)
#            for param_group in optimizer.param_groups:
#                param_group['lr']=rate        
            if i%display==0:
                print('[Info][%d/%d] loss: %f, learn rate: %e'%(i,max_iter,loss, lr))
                dist=dist.data.cpu().numpy()
                pos_labels=(y_np==1)
                neg_labels=(y_np==0)
                
                pos_dist=np.mean(dist[pos_labels],axis=0) if len(np.nonzero(pos_labels)[0])>0 else 0 
                neg_dist=np.mean(dist[neg_labels],axis=0) if len(np.nonzero(neg_labels)[0])>0 else 0 
                print('pos pair dist: %f\nneg pair dist: %f'%(pos_dist,neg_dist))
            if i==stepvalues[step_index]:
                for param_group in optimizer.param_groups:
                    param_group['lr']=rate   
                print('learn rate decay: %e'%rate)
                step=0
                lr=rate
                g_steps=stepvalues[step_index+1]-stepvalues[step_index]
                step_index+=1
            if i>0 and i%snapshot==0:
                torch.save(self.siamese.state_dict(), 'models_siamese/model_iter_%d.pkl'%i)
                print('Snapshot to models_siamese/model_iter_%d.pkl'%i)
            step+=1
        torch.save(self.siamese.state_dict(), 'models_siamese/model_iter_%d.pkl'%max_iter)
Esempio n. 5
0
def train(args, model, device, train_loader, optimizer, loss_func, epoch):
    model.train()
    train_loss = 0
    running_loss = 0
    running_datasize = 0
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.to(device)
        targets = [target.to(device) for target in targets]
        optimizer.zero_grad()
        predictions = model(data)
        location_loss, confidence_loss = loss_func(predictions, targets)

        loss = location_loss + confidence_loss
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        running_loss += loss.item()
        running_datasize += 1

        if (batch_idx + 1) % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ([{:.0f}%)]\tLoss: {:.4e}'.format(
                epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
                100. * (batch_idx + 1) / len(train_loader),
                running_loss / running_datasize))
            running_loss = 0
            running_datasize = 0

    return train_loss
    def run_epoch(self, phase, data_loader, criterion):
        """封装一个epoch中的forward、loss、backward过程"""
        if phase == 'train':
            self.model.train()
        else:
            self.model.eval()
        running_loss = 0.
        # visualize the training process
        data_loader = iter(data_loader)
        for i in tqdm(range(len(data_loader))):
            data_dict = next(data_loader)
            for name in data_dict:
                data_dict[name] = data_dict[name].to(device=self.device, non_blocking=True)
            if phase == 'train':
                self.optimizer.zero_grad()
                with torch.enable_grad():
                    # 前向传播只调用了model,需要调查一下decoder调用的位置
                    pr_decs = self.model(data_dict['input'])
                    loss = criterion(pr_decs, data_dict)
                    loss.backward()
                    self.optimizer.step()
            else:
                with torch.no_grad():
                    pr_decs = self.model(data_dict['input'])
                    loss = criterion(pr_decs, data_dict)

            running_loss += loss.item()
        epoch_loss = running_loss / len(data_loader)
        print('{} loss: {}'.format(phase, epoch_loss))
        return epoch_loss
Esempio n. 7
0
def train_loop(dataloader, model, loss_fn, optimizer, scheduler=None):
    size = len(dataloader.dataset)

    for batch, dic in enumerate(dataloader):
        x = dic['x'].to(device)
        z = dic['z'].to(device)
        label = dic['label'].to(device)
        pred = model(x, z)
        # print(x)
        # print(z)
        # print(label)
        # print(pred)
        # print(pred)
        # print(label)
        loss = loss_fn(pred, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(x)
            print("loss : {:>7f}  [{:>5d}/{:>5d}]".format(loss, current, size))
        #break
    if scheduler:
        scheduler.step()
Esempio n. 8
0
 def train_batch(self, X, Y):
     X, Y = X.to(self.device), Y.to(self.device)
     outputs = self.model(X)
     loss = self.criterion(outputs, Y)
     self.optimizer.zero_grad()
     loss.backward()
     self.optimizer.step()
     return float(loss)
Esempio n. 9
0
def train(train_loader, model, criterion, optimizer, epoch):

    model.train()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    end = time.time()
    for i, (img, target) in enumerate(train_loader):
        # measure data loading time
        if img is None:
            continue
        data_time.update(time.time() - end)
        encoded_target = Variable(utils.soft_encode_ab(target).float(), requires_grad=False).cuda()
        var = Variable(img.float(), requires_grad=True).cuda()
        # compute output
        output = model(var)
        # record loss
        loss = criterion(output, encoded_target)
        if torch.isnan(loss):
            print('NaN value encountered in loss.')
            continue
        # measure accuracy and record loss
        #prec1, = accuracy(output.data, target)
        losses.update(loss.data, var.size(0))

        # compute gradient and do SGD step
        backwardTime = time.time()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)

        end = time.time()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses))
        if (i+1) % 5000 == 0:
            print("Saving checkpoint...")
            save_checkpoint({
                'epoch': epoch,
                'state_dict': model.state_dict(),
            }, args.reduced)
        if (i+1) % 1000 == 0:
            start = time.time()
            batch_num = np.maximum(args.batch_size//4,2)
            idx = i + epoch*len(train_loader)
            imgs = utils.getImages(img, target, output.detach().cpu(), batch_num)
            writer.add_image('data/imgs_gen', imgs, idx)
            print("Img conversion time: ", time.time() - start)
        writer.add_scalar('data/loss_train', losses.avg, i + epoch*len(train_loader))
Esempio n. 10
0
def train(trainloader, t_model, s_model, criterion, optimizer, epoch, use_cuda,
          args):
    # switch to train mode
    global kd_loss_fun, cmclloss_v1, indeploss, mclloss
    t_model.eval()
    s_model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    losses_kl = AverageMeter()
    losses_ce = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    for batch_idx, (batch_data) in enumerate(trainloader):
        # measure data loading time
        if len(batch_data) == 2:
            inputs, targets = batch_data
        else:
            inputs, targets, indexes = batch_data

        data_time.update(time.time() - end)
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda(async=True)
        inputs, targets = torch.autograd.Variable(
            inputs), torch.autograd.Variable(targets)
        # compute output
        t_outputs = t_model(inputs)
        s_outputs = s_model(inputs)
        t_prec1, t_prec5 = accuracy(t_outputs.data, targets.data, topk=(1, 5))
        # measure accuracy and record loss
        prec1, prec5 = accuracy(s_outputs.data, targets.data, topk=(1, 5))
        loss_kl = kd_loss_fun(s_outputs, t_outputs.detach(), targets)
        loss_ce = criterion(s_outputs, targets)
        loss = loss_kl
        losses.update(loss.item(), inputs.size(0))
        losses_kl.update(loss_kl.item(), inputs.size(0))
        losses_ce.update(loss_ce.item(), inputs.size(0))
        top1.update(prec1.item(), inputs.size(0))
        top5.update(prec5.item(), inputs.size(0))
        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        progress_bar(
            batch_idx, len(trainloader),
            'Loss: %.2f | KLloss: %.2f | ce_loss: %.2f | Top1: %.2f | Top5: %.2f | t_top1: %.2f | t_top5: %.2f'
            % (losses.avg, loss_kl, loss_ce, top1.avg, top5.avg, t_prec1,
               t_prec5))
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

    return (losses.avg, losses_kl.avg, losses_ce.avg, top1.avg, top5.avg)
Esempio n. 11
0
    def train(self):
        self.model.train()
        val_loss, val_psnr = self.evaluate(self.step - 1, self.start_epoch)
        print(
            "[*] Preliminary check: Epoch: {} Step: {} Validation Loss: {:.5f} PSNR: {:.3f}"
            .format(self.start_epoch, (self.step - 1), val_loss, val_psnr))
        print('-' * 40)

        # Resume training from stopped epoch
        for epoch in range(self.start_epoch, self.cfg.num_epochs):

            step_loss = 0
            start_time = time.time()

            for idx, (noisy, clean) in enumerate(self.train_loader, start=1):
                # Input/Target
                noisy = noisy.to(self.device, dtype=torch.float)
                clean = clean.to(self.device, dtype=torch.float)

                # BackProp
                self.optimizer.zero_grad()
                output = self.model(noisy)
                loss = self.criterion(output, clean)
                loss.backward()
                self.optimizer.step()

                # STATS
                step_loss += loss.item()
                if idx % self.cfg.verbose_step == 0:
                    val_loss, val_psnr = self.evaluate(self.step, epoch)
                    self.writer.add_scalar("Loss/Train",
                                           step_loss / self.cfg.verbose_step,
                                           self.step)
                    self.writer.add_scalar("Loss/Validation", val_loss,
                                           self.step)
                    self.writer.add_scalar(
                        "Stats/LR", self.optimizer.param_groups[0]['lr'],
                        self.step)
                    self.writer.add_scalar("Stats/PSNR", val_psnr, self.step)

                    print(
                        "[{}/{}/{}] Loss [T/V]: [{:.5f}/{:.5f}] PSNR: {:.3f} LR: {} Time: {:.1f} Output: [{}-{}]"
                        .format(epoch, self.step, idx,
                                (step_loss / self.cfg.verbose_step), val_loss,
                                val_psnr, self.optimizer.param_groups[0]['lr'],
                                (time.time() - start_time),
                                torch.min(output).item(),
                                torch.max(output).item()))

                    self.step += 1
                    if self.cfg.scheduler == "step":
                        self.lr_sch.step()
                    elif self.cfg.scheduler == "plateau":
                        self.lr_sch.step(metrics=val_loss)

                    step_loss, start_time = 0, time.time()
                    self.model.train()
Esempio n. 12
0
def train_model(model, criterion, optimizer_ft, scheduler, epoch):

    scheduler.step()
    lambda1 = sigmoid_rampup(epoch, args.LabelWt)
    train_loss = AverageMeter()
    data_time = AverageMeter()
    batch_time = AverageMeter()

    model.train()
    correct = 0
    total = 0
    end = time.time()
    for batch_idx, (inputs, targets, weights) in enumerate(dataloaders_train):
        if use_gpu:
            inputs = Variable(inputs.cuda())
            targets = Variable(targets.cuda())
            weights = Variable(weights.cuda())
        data_time.update(time.time() - end)

        optimizer_ft.zero_grad()

        outputs = model(inputs)

        if args.stage2:
            loss = criterion(outputs, targets, weights)
        else:
            loss = criterion(outputs, targets, lambda1)

        loss.backward()
        optimizer_ft.step()

        train_loss.update(loss.item(), inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        _, predicted = outputs.max(1)
        correct += predicted.eq(targets).sum().item()
        total += inputs.size(0)

        if batch_idx % 10 == 0:
            print('Epoch: [{}][{}/{}] '
                  'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                  'Data: {data_time.val:.3f} ({data_time.avg:.3f}) '
                  'Loss: {train_loss.val:.4f} ({train_loss.avg:.4f}) '
                  'Accu: {:.2f}'.format(epoch,
                                        batch_idx,
                                        len(dataloaders_train),
                                        100. * correct / total,
                                        batch_time=batch_time,
                                        data_time=data_time,
                                        train_loss=train_loss))

    writer.add_scalar('training acc (train)', 100. * correct / total, epoch)
    writer.add_scalar('loss', train_loss.avg, epoch)
Esempio n. 13
0
def train_model(model, criterion, optimizer, dataload, valdataloader, num_epochs=2):
    setDir(args.logdir)
    writer = SummaryWriter(args.logdir)
    max_val_acc = 0
    lr = args.lr
    for epoch in range(num_epochs):
        if epoch%5 == 0:
            lr = lr / 2
            optimizer = optim.Adam(model.parameters(), lr=lr)
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        dataset_size = len(dataload.dataset)
        epoch_loss = 0
        step = 0  # minibatch数
        val_loss = 0
        val_epoch_acc = 0
        val_step = 0
        for x, y in dataload:  # 分100次遍历数据集,每次遍历batch_size=4
            optimizer.zero_grad()  # 每次minibatch都要将梯度(dw,db,...)清零
            inputs = x.to(device)
            labels = y.to(device)
            outputs = model(inputs)# 前向传播

            loss = cross_entropy2d(outputs, torch.squeeze(labels, dim=1).long())  # 计算损失
            loss.backward()  # 梯度下降,计算出梯度
            optimizer.step()  # 更新参数一次:所有的优化器Optimizer都实现了step()方法来对所有的参数进行更新
            epoch_loss += loss.item()
            step += 1

            print("EPOCH:%d,%d/%d,train_loss:%0.3f" % (epoch, step, dataset_size // dataload.batch_size, loss.item()))
        for x, y in valdataloader:
            inputs = x.to(device)
            labels = y.to(device)
            outputs = model(inputs)# 前向传播

            val_acc = accuracy(outputs, torch.squeeze(labels, dim=1).long())
            loss = cross_entropy2d(outputs, torch.squeeze(labels, dim=1).long())  # 计算损失

            val_epoch_acc += float(val_acc.numpy())
            val_loss += loss.item()
            val_step += 1
        val_epoch_acc = val_epoch_acc / val_step

        # if val_epoch_acc > max_val_acc:
        #  torch.save(model.state_dict(), os.path.join(args.weight, 'weights_%d.pth' % epoch))  # 返回模型的所有内容
        #     max_val_acc = val_epoch_acc
        writer.add_scalars('train_epoch_loss', {'epoch_loss': epoch_loss}, epoch)
        writer.add_scalars('val_epoch_loss', {'val_loss': val_loss}, epoch)
        writer.add_scalars('val_epoch_acc', {'val_epoch_acc': val_epoch_acc}, epoch)
        print("epoch %d loss:%0.3f  val_loss:%0.3f, val_acc:%0.3f" % (epoch, epoch_loss, val_loss, val_epoch_acc))

        torch.save(model.state_dict(), os.path.join(args.weight, 'weights_%d.pth' % epoch))
        torch.cuda.empty_cache()
    writer.close()

    return model
Esempio n. 14
0
def train(epoch):
    fcn_model.train()  # tran mode
    total_loss = 0.
    for batch_idx, (imgs, labels) in enumerate(train_loader):
        N = imgs.size(0)
        if use_cuda:
            imgs = imgs.cuda()
            labels = labels.cuda()

        imgs_tensor = Variable(imgs)  # torch.Size([2, 3, 320, 320])
        labels_tensor = Variable(labels)  # torch.Size([2, 320, 320])
        out = fcn_model(imgs_tensor)  # torch.Size([2, 21, 320, 320])

        # with open('./result.txt', 'r+') as f:
        #     f.write(str(out.detach().numpy()))
        #     f.write("\n")

        loss = criterion(out, labels_tensor)
        loss /= N
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()  # update all arguments
        total_loss += loss.data[0]  # return float

        # if batch_idx == 2:
        #     break

        if (batch_idx) % 20 == 0:
            print('train epoch [%d/%d], iter[%d/%d], lr %.7f, aver_loss %.5f' %
                  (epoch, epoch_num, batch_idx, len(train_loader),
                   learning_rate, total_loss / (batch_idx + 1)))

        # # visiualize scalar
        # if epoch % 10 == 0:
        #     label_img = tools.labelToimg(labels[0])
        #     net_out = out[0].data.max(1)[1].squeeze_(0)
        #     out_img = tools.labelToimg(net_out)
        #     writer.add_scalar("loss", loss, epoch)
        #     writer.add_scalar("total_loss", total_loss, epoch)
        #     writer.add_scalars('loss/scalar_group', {"loss": epoch * loss,
        #                                              "total_loss": epoch * total_loss})
        #     writer.add_image('Image', imgs[0], epoch)
        #     writer.add_image('label', label_img, epoch)
        #     writer.add_image("out", out_img, epoch)

        assert total_loss is not np.nan
        assert total_loss is not np.inf

    # model save
    if (epoch) % 20 == 0:
        torch.save(fcn_model.state_dict(), './pretrained_models/model%d.pth' %
                   epoch)  # save for 5 epochs
    total_loss /= len(train_loader)
    print('train epoch [%d/%d] average_loss %.5f' %
          (epoch, epoch_num, total_loss))
Esempio n. 15
0
def train(epoch_idx, mAP):
    # return
    f.train()
    logging.info('In epoch {}:\n'.format(epoch_idx + 1))
    for batch_idx, (posv, posa, negv, nega, pos_label,
                    neg_label) in enumerate(train_loader):
        opt.zero_grad()
        # b,p,dim = axi.shape
        posv = posv.to(device)
        posa = posa.to(device)
        negv = negv.to(device)
        nega = nega.to(device)
        b1, ds, _, _ = posv.shape
        vfeat = torch.cat((posv, negv), 0)
        afeat = torch.cat((posa, nega), 0)

        pos_label = pos_label.to(device)
        neg_label = neg_label.to(device)
        label = torch.cat((pos_label, neg_label), 0).long().squeeze(-1)
        # pdb.set_trace()
        ins_scores, bag_predicts = f(vfeat, afeat)
        # print(ins_scores)
        loss = 0
        ahs = []
        ces = []
        for i in range(ds):
            bag_predict = bag_predicts[i]
            ins_score = ins_scores[i]
            celoss = CELoss(bag_predict, label[:, i])
            ahloss = AHLoss(ins_score[:b1, :], ins_score[b1:, :])
            # ahs.append(ahloss)
            # ces.append(celoss)
            if loss == 0:
                loss = celoss + ahloss
            else:
                loss = loss + celoss + ahloss
        # ahs = torch.stack(ahs)
        # ces = torch.stack(ces)
        # loss = torch.mean(ahs)+torch.mean(celoss)
        print(
            "In epoch {}, [{}/{}]: loss: {:.6f}, max avg_mAP: {:.4f}, current test mAP: {:.4f}"
            .format(epoch_idx + 1, batch_idx, len(train_loader), loss.item(),
                    maxavgMap, avg_mAP))

        print("current mAP: {};".format(mAP))
        print("max mAP: {};".format(maxMAP))

        # recoder.update('loss', loss.data, epoch_idx*len(train_loader)+batch_idx)
        loss.backward()
        opt.step()
        # loss = attloss+att_visual_i_loss+att_audio_i_loss+att_visual_j_loss+att_audio_j_loss #0.6543
        recoder.update('celoss', celoss.item(), epoch_idx)
        recoder.update('ahloss', ahloss.item(), epoch_idx)

        recoder.save()
Esempio n. 16
0
    def iterate(self,
                epoch: int,
                phase: str):

        self.net.train(phase == "train")
        dataloader = self.dataloaders[phase]

        # self.meter.on_epoch_begin(epoch, phase)
        for itr, (images, targets) in tqdm(enumerate(dataloader), total=len(dataloader)):

            images = images.to(self.device).float()
            N = images.shape[0]
            np_logits, hv_logits, nc_logits = self.net(images)

            np_targets = utils.get_np_targets(targets[:, 0, :, :])
            hv_targets = utils.get_hv_targets(targets[:, 0, :, :])
            nc_targets = utils.get_nc_targets(targets[:, 1, :, :])

            np_targets = np_targets.to(self.device)
            hv_targets = hv_targets.to(self.device)
            nc_targets = nc_targets.to(self.device)
            assert np_targets.shape == (N, 256, 256) and hv_targets.shape == (N, 2, 256, 256) \
                   and nc_targets.shape == (N, 256, 256)

            loss, loss_np, loss_hv, loss_nc = self.hoverloss(np_logits, np_targets,
                                                             hv_logits, hv_targets,
                                                             nc_logits, nc_targets)
            if phase == "train":
                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
                self.scheduler.step(epoch)
            # Update metrics for this batch
            with torch.no_grad():
                loss = loss.detach()
                loss_np = loss_np.detach()
                loss_nc = loss_nc.detach()
                loss_hv = loss_hv.detach()
                self.epoch_loss['loss'].append(loss.item())
                self.epoch_loss['loss_np'].append(2*loss_np.item())
                self.epoch_loss['loss_nc'].append(loss_nc.item())
                self.epoch_loss['loss_hv'].append(40*loss_hv.item())
        self.store[phase]['loss'].append(sum(self.epoch_loss['loss']) / len(self.epoch_loss['loss']))
        self.store[phase]['loss_np'].append(sum(self.epoch_loss['loss_np']) / len(self.epoch_loss['loss_np']))
        self.store[phase]['loss_nc'].append(sum(self.epoch_loss['loss_nc']) / len(self.epoch_loss['loss_nc']))
        self.store[phase]['loss_hv'].append(sum(self.epoch_loss['loss_hv']) / len(self.epoch_loss['loss_hv']))
        self.epoch_loss['loss'] = [0]
        self.epoch_loss['loss_np'] = [0]
        self.epoch_loss['loss_nc'] = [0]
        self.epoch_loss['loss_hv'] = [0]
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        return self.store[phase]['loss'][-1]
Esempio n. 17
0
def train(model, criterion, optimizer, input_img_gt):
    model.train()
    D = model(input_img_gt['img'])
    loss = criterion(D, input_img_gt['gt'])
    with torch.no_grad():
        dsc = dscloss(D, input_img_gt['gt'])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss.detach().cpu().numpy(), dsc.detach().cpu().numpy()
Esempio n. 18
0
    def train(model, config, epoch):
        model.class_classifier.train()
        model.feature.train()

        for step, (features,
                   labels) in enumerate(config['source_train_loader']):
            if torch.cuda.is_available():
                features, labels = features.cuda(), labels.cuda()

            optimizer.zero_grad()
            preds = model.class_classify(features)
            loss = criterion(preds, labels)
            loss.backward()
            optimizer.step()
Esempio n. 19
0
def train(epoch):
    fcn_model.train()  # tran mode
    total_loss = 0.
    st = time.time()
    for batch_idx, (imgs, labels, Image_Path) in enumerate(train_loader):
        # train_batch += 1
        if use_cuda:
            imgs = imgs.cuda()
            labels = labels.cuda()
        # batch_idx += 1
        imgs_tensor = Variable(imgs)  # torch.Size([2, 3, 320, 320])
        target = Variable(labels)  # torch.Size([2, 320, 320])
        out = fcn_model(imgs_tensor)  # torch.Size([2, 21, 320, 320])

        loss = criterion(out, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()  # update all arguments
        total_loss += loss.item()  # return float

        if (batch_idx) % 20 == 0:
            ed = time.time()
            print(
                'train epoch [%d/%d], iter[%d/%d], lr %.7f, aver_loss %.5f, time_use = %.1f'
                % (epoch, epochs, batch_idx, len(train_loader), learning_rate,
                   total_loss / (batch_idx + 1), ed - st))
            st = ed
            # # visiualize scalar
            # label_img = tools.labelToimg(labels[0])
            # net_out = out[0].data.max(1)[1].squeeze_(0)
            # out_img = tools.labelToimg(net_out)
            # writer.add_scalar("loss", loss, train_batch)
            # writer.add_scalar("total_loss", total_loss, train_batch)
            # writer.add_scalars('loss/scalar_group', {"loss": train_batch * loss,
            #                                             "total_loss": train_batch * total_loss})
            # writer.add_image('Image', imgs[0], epoch)
            # writer.add_image('label', label_img, epoch)
            # writer.add_image("out", out_img, epoch)

        assert total_loss is not np.nan
        assert total_loss is not np.inf

    torch.save(fcn_model.state_dict(),
               './models/temp.pth')  # save for 5 epochs
    total_loss /= len(train_loader)
    print('train epoch [%d/%d] average_loss %.5f' %
          (epoch, epochs, total_loss))

    return total_loss
Esempio n. 20
0
File: main.py Progetto: zzg-tju/QGCN
def train(training_data_loader, optimizer, model, criterion, epoch):
    model.train()
    currtime = time.time()
    for iteration, batch in enumerate(training_data_loader, 1):
        source, target = trainPrepare(batch)
        output = model(source)
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if iteration % 100 == 0 or iteration == len(training_data_loader):
            usetime = time.time() - currtime
            currtime = time.time()
            print(
                f"===> Epoch[{epoch+1}]({iteration}/{len(training_data_loader)}): Loss: {loss.item():.6f}, Time: {usetime:.4f}"
            )
Esempio n. 21
0
def train(epoch):
    fcn_model.train()
    total_loss = 0.
    for batch_idx, (imgs, labels) in enumerate(train_loader):
        N = imgs.size(0)
        if use_cuda:
            imgs = imgs.cuda()
            labels = labels.cuda()

        imgs = Variable(imgs)
        labels = Variable(labels)

        out = fcn_model(imgs)

        loss = criterion(out, labels)
        loss /= N

        # visiualize scalar
        writer.add_scalar("loss", loss, batch_idx)
        writer.add_scalar("total_loss", total_loss, batch_idx)
        writer.add_scalars('loss/scalar_group', {
            "loss": batch_idx * loss,
            "total_loss": batch_idx * total_loss
        })
        writer.add_image('Image', imgs, batch_idx)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.data[0]  # return float

        if (batch_idx) % 20 == 0:
            print('train epoch [%d/%d], iter[%d/%d], lr %.5f, aver_loss %.5f' %
                  (epoch, epoch_num, batch_idx, len(train_loader),
                   learning_rate, total_loss / (batch_idx + 1)))

        # model save
        if (epoch) % 5 == 0:
            torch.save(fcn_model.state_dict(), 'params.pth')

        assert total_loss is not np.nan
        assert total_loss is not np.inf

    total_loss /= len(train_loader)
    print('train epoch [%d/%d] average_loss %.5f' %
          (epoch, epoch_num, total_loss))
Esempio n. 22
0
def train(epoch_idx, mAP):
    f.train()
    logging.info('In epoch {}:\n'.format(epoch_idx + 1))
    for batch_idx, (posv, posa, negv, nega, pos_label,
                    neg_label) in enumerate(train_loader):
        opt.zero_grad()
        # b,p,dim = axi.shape
        posv = posv.to(device)
        posa = posa.to(device)
        negv = negv.to(device)
        nega = nega.to(device)
        b1, _, _ = posv.shape
        vfeat = torch.cat((posv, negv), 0)
        afeat = torch.cat((posa, nega), 0)

        pos_label = pos_label.to(device)
        neg_label = neg_label.to(device)
        label = torch.cat((pos_label, neg_label), 0).long().view(-1)
        # pdb.set_trace()
        ins_scores, bag_predict = f(vfeat, afeat)
        # print(ins_scores)
        celoss = CELoss(bag_predict, label)

        ahloss = AHLoss(ins_scores[:b1, :], ins_scores[b1:, :])
        # pdb.set_trace()
        loss = celoss + ahloss
        # loss = ahloss
        if args.dataset != "youtube":
            print(
                "Domain: {}; In epoch {}, [{}/{}]: loss: {:.6f}, max mAP_5: {:.4f}, current mAP_5: {:.4f}"
                .format(args.domain, epoch_idx + 1, batch_idx,
                        len(train_loader), loss.item(), max_mAP_5, mAP_5))
        else:
            print(
                "Domain: {}; In epoch {}, [{}/{}]: loss: {:.6f}, max test mAP: {:.4f}, current test mAP: {:.4f}"
                .format(args.domain, epoch_idx + 1, batch_idx,
                        len(train_loader), loss.item(), max_mAP, mAP))
        # recoder.update('loss', loss.data, epoch_idx*len(train_loader)+batch_idx)
        loss.backward()
        opt.step()
        # loss = attloss+att_visual_i_loss+att_audio_i_loss+att_visual_j_loss+att_audio_j_loss #0.6543
        recoder.update('celoss', celoss.item(), epoch_idx)
        recoder.update('ahloss', ahloss.item(), epoch_idx)

        recoder.save()
Esempio n. 23
0
    def training(self, epoch):
        pbar = tqdm(total=self.n_train, desc=f'Epoch {epoch + 1}/{self.num_epoch}', unit='img',
            bar_format='{l_bar}%s{bar:10}%s{r_bar}{bar:-10b}' % (Fore.RED, Fore.RESET))
        mean_loss, mean_score = 0, 0

        self.net.train()
        n_iter = len(self.loader_train)
        for k, btchs in enumerate(self.loader_train):
            imgs = btchs[0].to(device=self.dvc_main, dtype=self.dtype)
            labels = btchs[1].to(device=self.dvc_main, dtype=self.dtype)

            self.scheduler.step(epoch + k / n_iter)
            self.optim.zero_grad()

            preds = self.net(imgs)
            loss = self.criterion(preds, labels)
            loss.backward()

            self.optim.step()

            with torch.no_grad():
                img_dt = imgs.data
                label_dt = labels.data
                pred_dt = preds.data

                mean_score += F1Score(pred_dt, label_dt)
                mean_loss += loss.item()

                lrs = f"{self.scheduler.get_last_lr()[0]:.3f}"
                pbar.set_postfix(**{self.name_loss: mean_loss / (k + 1),
                                    'F1Score': mean_score / (k + 1),
                                    'LRs' : lrs})
                pbar.update(imgs.shape[0])
                if k == 0:
                    img_dict = {'Train/': img_dt,
                                'Train/true': label_dt,
                                'Train/pred': pred_dt}
                    self.writing(epoch, self.writer_main, img_dict, opt='image')
        
        scalar_dict = {self.name_loss: mean_loss / (n_iter + 1),
                        'F1Score': mean_score / (n_iter + 1)}
        pbar.write(_term_move_up(), end='\r')
        self.writing(epoch, self.writer_main, scalar_dict, opt='scalar')
        pbar.close()
Esempio n. 24
0
    def __train_epoch(self):
        self.model.train()
        losses = []
        accuracies = []
        TN = FN = TP = FP = 0
        progress = tqdm(enumerate(self.train_loader),
                        total=len(self.train_loader),
                        desc='Training',
                        file=sys.stdout)
        for batch_idx, data in progress:
            samples, targets = data
            if self.cuda:
                samples = samples.cuda()
                targets = targets.cuda()

            self.optimizer.zero_grad()
            outputs = self.model(samples)
            loss = self.criterion(outputs, targets)
            loss.backward()
            self.optimizer.step()

            losses.append(loss.item())
            targets = targets.data.cpu()
            _, predicted = torch.max(outputs.data, 1)
            predicted = predicted.data.cpu()
            perf = self.__perf_measure(targets, predicted)
            TN += perf[2]
            FN += perf[3]
            TP += perf[0]
            FP += perf[1]

            acc = (TP + TN) / (FP + FN + TP +
                               TN) if FP + FN + TP + TN > 0 else 0
            precision = TP / (TP + FP) if TP + FP > 0 else 0
            recall = TP / (TP + FN) if TP + FN > 0 else 0
            f1 = 2 * (precision * recall) / (
                precision + recall) if precision + recall > 0 else 0
            accuracies.append(acc)
            progress.set_description(
                'Training Loss: {:.4f} | Accuracy: {:.4f} | F1: {:.4f} | Precision: {:.4f} | Recall: {:.4f} | TP: {} | TN: {} | FP: {} | FN: {}'
                .format(loss.item(), acc, f1, precision, recall, TP, TN, FP,
                        FN))

        return np.mean(losses)
Esempio n. 25
0
def forward_step(net, mid_net, loss_fn, loader, args):
    pair_total = 0
    catfeat = []
    for i in range(args.loss_step):
        catfeat.append([])
    label = []
    epoch_end = False
    while pair_total < args.max_pair / args.worker:
        pair_total, epoch_end = embed_step(pair_total,
                                           net,
                                           mid_net,
                                           loss_fn,
                                           catfeat,
                                           label,
                                           loader,
                                           args,
                                           volatile=args.fix_net)
    losses = []
    for k in range(len(catfeat)):
        if args.fix_net:
            for feat in catfeat[k]:
                if isinstance(feat, tuple):
                    feat[0].volatile = False
                    feat[0].requires_grad = False
                    feat[1].volatile = False
                    feat[1].requires_grad = False
                else:
                    feat.volatile = False
                    feat.requires_grad = False
        losses.append(loss_fn[k](catfeat[k], label))
    loss = sum(losses)
    loss_total = loss.data[0]
    correct = []
    for k in range(len(loss_fn)):
        correct.append(loss_fn[k].check_result(label))
    total = len(label)
    net.zero_grad()
    if mid_net != None:
        mid_net.zero_grad()
    for l in loss_fn:
        l.zero_grad()
    loss.backward()
    return loss_total, correct, total, epoch_end
Esempio n. 26
0
    def train_emb(self, epoch, batch_data, ids=None, *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        # compute the embeddings
        img_emb, cap_emb, cap_lens, ids = self.forward_emb(epoch, batch_data)

        # measure accuracy and record loss
        self.optimizer.zero_grad()
        loss = self.forward_loss(epoch, img_emb, cap_emb, cap_lens, ids)

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
Esempio n. 27
0
def train(epoch, model, loss_fn, train_loader, optimizer):
    model.train()
    # Horovod: set epoch to sampler for shuffling.
    train_loader.sampler.set_epoch(epoch)
    for batch_idx, (data, target) in tqdm(enumerate(train_loader), total=len(train_loader), ascii=True):
        for key in data:
          if type(data[key][0]) != np.str_:
            data[key] = data[key].cuda()
        target = target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            # Horovod: use train_sampler to determine the number of examples in
            # this worker's partition.
            logging.info('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.sampler),
                100. * batch_idx / len(train_loader), loss.item()))
Esempio n. 28
0
def train(epoch_idx, mAP):
    # return
    f.train()
    logging.info('In epoch {}:\n'.format(epoch_idx + 1))
    for batch_idx, (posv, posa, negv, nega, pos_label,
                    neg_label) in enumerate(train_loader):
        opt.zero_grad()
        # b,p,dim = axi.shape
        posv = posv.to(device)
        posa = posa.to(device)
        negv = negv.to(device)
        nega = nega.to(device)
        b1, ds, _, _ = posv.shape
        vfeats = torch.cat((posv, negv), 0)
        afeats = torch.cat((posa, nega), 0)

        pos_label = pos_label.to(device)
        neg_label = neg_label.to(device)
        labels = torch.cat((pos_label, neg_label), 0).long().squeeze(-1)
        # pdb.set_trace()
        for dm in range(ds):
            vfeat, afeat, label = vfeats[:,
                                         dm, :, :], afeats[:,
                                                           dm, :, :], labels[:,
                                                                             dm]
            ins_score, bag_predict = networks[dm](vfeat, afeat)
            celoss = CELoss(bag_predict, label)
            ahloss = AHLoss(ins_score[:b1, :], ins_score[b1:, :])

            loss = celoss + ahloss
            print(
                "Domain: {}; In epoch {}, [{}/{}]: loss: {:.6f}, max test mAP: {:.4f}, current test mAP: {:.4f}"
                .format(domains[dm], epoch_idx + 1, batch_idx,
                        len(train_loader), loss.item(), MaxmAP[dm], mAP[dm]))
            # recoder.update('loss', loss.data, epoch_idx*len(train_loader)+batch_idx)
            loss.backward()
            opts[dm].step()
            # loss = attloss+att_visual_i_loss+att_audio_i_loss+att_visual_j_loss+att_audio_j_loss #0.6543
            recoder.update(domains[dm] + ' celoss', celoss.item(), epoch_idx)
            recoder.update(domains[dm] + ' ahloss', ahloss.item(), epoch_idx)
            recoder.save()
Esempio n. 29
0
def train_fn(data_loader, model, optimizer, device, scheduler):
    model.train()

    for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
        ids = d["ids"]
        token_type_ids = d["token_type_ids"]
        mask = d["mask"]
        targets = d["targets"]

        ids = ids.to(device, dtype=torch.long)
        token_type_ids = token_type_ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        targets = targets.to(device, dtype=torch.float)

        optimizer.zero_grad()
        outputs = model(ids=ids, mask=mask, token_type_ids=token_type_ids)

        loss = loss.loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        scheduler.step()
Esempio n. 30
0
def train(model, data_loader, optimizer, scheduler, i):
    model.train()
    fin_loss = 0.0
    tk = tqdm(data_loader, desc="Epoch" + " [TRAIN] " + str(i + 1))

    for t, data in enumerate(tk):
        for k, v in data.items():
            data[k] = v.cuda()
        optimizer.zero_grad()
        _, loss = model(**data)
        loss.backward()
        optimizer.step()
        fin_loss += loss.item()

        tk.set_postfix({
            'loss': '%.6f' % float(fin_loss / (t + 1)),
            'LR': optimizer.param_groups[0]['lr']
        })

    scheduler.step()
    return fin_loss / len(data_loader)