Beispiel #1
0
    def train(self, epoch, trainloader, print_every=100):
        '''
        method for training
        '''
        loss_batch = 0
        if epoch % 10 == 0 and epoch > 0:
            adjust_lr(self.optimizer, self.lr)
        for b_idx, (train_data, train_labels) in enumerate(trainloader):
            if self.use_gpu and str(self.device) == 'cuda:0':
                train_data = train_data.cuda(non_blocking=True)
                train_labels = train_labels.cuda()

            # Forward Pass
            train_preds = self.model.forward(train_data)
            loss = self.model.loss(train_preds, train_labels)
            if self.l2:
                loss = self.l2_regularization(loss, self.l2)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            if b_idx % print_every == 0:
                print('Train Epoch: {0} [{1}/{2} ({3:.0f}%)]\t Loss {4:.6f}'.
                      format(epoch, b_idx * len(train_data),
                             len(trainloader.dataset),
                             100. * b_idx / len(trainloader), loss))

            loss_batch += loss.item()
        loss_batch /= len(trainloader)
        self.train_loss.append(loss_batch)
Beispiel #2
0
def train(epoch,
          model,
          criterion,
          opt,
          scheduler,
          tr_loader,
          device,
          logger,
          schdl_type='cyclic'):
    model.train()
    ep_loss = 0
    ep_acc = 0
    print(
        '[INFO][TRAINING][clean_training] \t Epoch {} started.'.format(epoch))
    for batch_idx, (inpt, targets) in enumerate(tqdm(tr_loader)):
        inpt, targets = inpt.to(device), targets.to(device)
        output = model(inpt)
        loss = criterion(output, targets)
        opt.zero_grad()
        with amp.scale_loss(loss, opt) as scaled_loss:
            scaled_loss.backward()
        opt.step()
        ep_loss += loss.item()
        ep_acc += (output.max(1)[1] == targets).sum().item() / len(targets)
        if schdl_type == 'cyclic':
            utils.adjust_lr(opt, scheduler, logger, epoch * batch_idx)
    if schdl_type != 'cyclic':
        utils.adjust_lr(opt, scheduler, logger, epoch)
    logger.log_train(epoch, ep_loss / len(tr_loader),
                     (ep_acc / len(tr_loader)) * 100, "clean_training")
 def adjust_lr(self, ep):
   utils.adjust_lr(
     param_groups=self.optimReID.param_groups,
     base_lrs=[cfg.pre_reid_ft_base_lr,
               cfg.pre_reid_ft_base_lr,
               cfg.pre_reid_fc_weight_base_lr,
               cfg.pre_reid_fc_bias_base_lr],
     decay_epochs=cfg.pre_reid_lr_decay_epochs, epoch=ep, verbose=True)
Beispiel #4
0
def train(epoch,
          model,
          criterion,
          opt,
          scheduler,
          cnfg,
          tr_loader,
          device,
          logger,
          schdl_type='cyclic'):
    model.train()
    ep_loss = 0
    ep_acc = 0
    print(
        '[INFO][TRAINING][clean_training] \t Epoch {} started.'.format(epoch))
    for batch_idx, (inpt, targets) in enumerate(tqdm(tr_loader)):
        inpt, targets = inpt.to(device), targets.to(device)
        l_limit, u_limit = pgd.get_limits(device)
        delta = pgd.train_pgd(model,
                              device,
                              criterion,
                              inpt,
                              targets,
                              epsilon=cnfg['pgd']['epsilon'],
                              alpha=cnfg['pgd']['alpha'],
                              iter=cnfg['pgd']['iter'],
                              opt=opt,
                              restart=cnfg['pgd']['restarts'],
                              d_init=cnfg['pgd']['delta-init'],
                              l_limit=l_limit,
                              u_limit=u_limit)
        output = model(inpt + delta)
        loss = criterion(output, targets)
        opt.zero_grad()
        with amp.scale_loss(loss, opt) as scaled_loss:
            scaled_loss.backward()
        opt.step()
        ep_loss += loss.item()
        ep_acc += (output.max(1)[1] == targets).sum().item() / len(targets)
        if schdl_type == 'cyclic':
            utils.adjust_lr(opt, scheduler, logger, epoch * batch_idx)
    if schdl_type != 'cyclic':
        utils.adjust_lr(opt, scheduler, logger, epoch)
    print('ce ba', len(tr_loader))
    logger.log_train(epoch, ep_loss / len(tr_loader),
                     (ep_acc / len(tr_loader)) * 100, "pgd_training")
Beispiel #5
0
    def step(self, profiler):
        gvar = self.gvar
        opt = self.opt

        self.optimizer.zero_grad()

        pg_used = gvar.gest_used
        loss = gvar.grad(self.niters)
        if gvar.gest_used != pg_used:
            logging.info('Optimizer reset.')
            self.gest_used = gvar.gest_used
            utils.adjust_lr(self, opt)
            self.reset()
        self.optimizer.step()
        profiler.toc('optim')

        profiler.end()
        return loss
Beispiel #6
0
def train(epoch, criterion_list, optimizer):
    train_loss = 0.
    train_loss_cls = 0.
    train_loss_div = 0.
    top1_num = 0
    top5_num = 0
    total = 0

    lr = adjust_lr(optimizer, epoch, args)
    start_time = time.time()
    criterion_cls = criterion_list[0]
    criterion_div = criterion_list[1]

    net.train()
    for batch_idx, (input, target) in enumerate(trainloader):
        batch_start_time = time.time()
        
        input = input.cuda()
        target = target.cuda()
        input, targets_a, targets_b, lam = mixup_data(input, target, 0.4)

        logit = net(input)
        #loss_cls = criterion_cls(logit, target)
        loss_cls = mixup_criterion(CrossEntropyLoss_label_smooth, logit, targets_a, targets_b, lam)
        loss = loss_cls

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item() / len(trainloader)
        train_loss_cls += loss_cls.item() / len(trainloader)

        top1, top5 = correct_num(logit, target, topk=(1, 5))
        top1_num += top1
        top5_num += top5
        total += target.size(0)
        
        
        print('Epoch:{},batch_idx:{}/{}'.format(epoch, batch_idx, len(trainloader)),  'acc:', top1_num.item() / total, 'duration:', time.time()-batch_start_time)
    
    print('Epoch:{}\t lr:{:.5f}\t duration:{:.3f}'
                '\n train_loss:{:.5f}\t train_loss_cls:{:.5f}'
                '\n top1_acc: {:.4f} \t top5_acc:{:.4f}'
                .format(epoch, lr, time.time() - start_time,
                        train_loss, train_loss_cls,
                        (top1_num/total).item(), (top5_num/total).item()))

    with open(log_txt, 'a+') as f:
        f.write('Epoch:{}\t lr:{:.5f}\t duration:{:.3f}'
                '\ntrain_loss:{:.5f}\t train_loss_cls:{:.5f}'
                '\ntop1_acc: {:.4f} \t top5_acc:{:.4f} \n'
                .format(epoch, lr, time.time() - start_time,
                        train_loss, train_loss_cls,
                        (top1_num/total).item(), (top5_num/total).item()))
Beispiel #7
0
    def step(self, profiler):
        gvar = self.gvar
        opt = self.opt
        model = self.model

        self.optimizer.zero_grad()

        # Frequent snaps
        inits = list(map(int, opt.g_osnap_iter.split(',')[0:2]))
        every = int(opt.g_osnap_iter.split(',')[-1])

        if (((self.niters - opt.gvar_start) % every == 0
             or self.niters in inits) and self.niters >= opt.gvar_start):
            print(self.niters)

            if opt.g_estim == 'nuq' and opt.nuq_method != 'none':
                stats = gvar.gest.snap_online_mean(model)
                if opt.nuq_parallel == 'ngpu':
                    for qdq in gvar.gest.qdq:
                        qdq.set_mean_variance(stats)
                else:
                    gvar.gest.qdq.set_mean_variance(stats)

            if opt.nuq_method == 'amq' or opt.nuq_method == 'alq' or opt.nuq_method == 'alq_nb' or opt.nuq_method == 'amq_nb':
                if opt.nuq_parallel == 'ngpu':
                    for qdq in gvar.gest.qdq:
                        qdq.update_levels()
                else:
                    gvar.gest.qdq.update_levels()

        pg_used = gvar.gest_used
        loss = gvar.grad(self.niters)
        if gvar.gest_used != pg_used:
            logging.info('Optimizer reset.')
            self.gest_used = gvar.gest_used
            utils.adjust_lr(self, opt)
            self.reset()
        self.optimizer.step()
        profiler.toc('optim')

        profiler.end()
        return loss
def train_second_stage(viz, writer, dataloader, front_net_thick, front_net_thin, fusion_net, optimizer, base_lr, criterion, device, power, epoch, num_epochs=100):
    dt_size = len(dataloader.dataset)
    epoch_loss = 0
    step = 0
    for sample in dataloader:
        step += 1
        img = sample[0].to(device)
        gt = sample[1].to(device)
        with torch.no_grad(): 
            thick_pred = front_net_thick(img)
            thin_pred= front_net_thin(img)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward
        fusion_pred = fusion_net(img[:, :1, :, :], thick_pred, thin_pred)
        viz.img(name="images", img_=img[0, :, :, :])
        viz.img(name="labels", img_=gt[0, :, :, :])
        viz.img(name="prediction", img_=fusion_pred[0, :, :, :])
        loss = criterion(fusion_pred, gt)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        
        # 当前batch图像的loss
        niter = epoch * len(dataloader) + step
        writer.add_scalars("train_loss", {"train_loss": loss.item()}, niter)
        print("%d / %d, train loss: %0.4f" % (step, (dt_size - 1) // dataloader.batch_size + 1, loss.item()))
        viz.plot("train loss", loss.item())
        
        # 写入当前lr
        current_lr = get_lr(optimizer)
        viz.plot("learning rate", current_lr)
        writer.add_scalars("learning_rate", {"lr": current_lr}, niter)
    
    print("epoch %d loss: %0.4f" % (epoch, epoch_loss))
    print("current learning rate: %f" % current_lr)
    
    adjust_lr(optimizer, base_lr, epoch, num_epochs, power=power)
    
    return fusion_net
def train_first_stage(viz, writer, dataloader, net, optimizer, base_lr, thin_criterion, thick_criterion, device, power, epoch, num_epochs=100):
    dt_size = len(dataloader.dataset)
    epoch_loss = 0
    step = 0
    for sample in dataloader:
        step += 1
        img = sample[0].to(device)
        thin_gt = sample[2].to(device)
        thick_gt = sample[3].to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward
        thick_pred, thin_pred, _ = net(img)
        viz.img(name="images", img_=img[0, :, :, :])
        viz.img(name="thin labels", img_=thin_gt[0, :, :, :])
        viz.img(name="thick labels", img_=thick_gt[0, :, :, :])
        viz.img(name="thin prediction", img_=thin_pred[0, :, :, :])
        viz.img(name="thick prediction", img_=thick_pred[0, :, :, :])
        loss = thin_criterion(thin_pred, thin_gt) + thick_criterion(thick_pred, thick_gt)  # 可加权
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        
        # 当前batch图像的loss
        niter = epoch * len(dataloader) + step
        writer.add_scalars("train_loss", {"train_loss": loss.item()}, niter)
        print("%d / %d, train loss: %0.4f" % (step, (dt_size - 1) // dataloader.batch_size + 1, loss.item()))
        viz.plot("train loss", loss.item())
        
        # 写入当前lr
        current_lr = get_lr(optimizer)
        viz.plot("learning rate", current_lr)
        writer.add_scalars("learning_rate", {"lr": current_lr}, niter)
    
    print("epoch %d loss: %0.4f" % (epoch, epoch_loss))
    print("current learning rate: %f" % current_lr)
    
    adjust_lr(optimizer, base_lr, epoch, num_epochs, power=power)
    
    return net
def train(epoch):
    model.train()
    adjust_lr(optimizer, epoch, args.lr, decay_rate=0.2)
    for batch_idx, data in enumerate(train_loader):
        raw_data = data[-1]
        data = [Variable(_, requires_grad=False).cuda() for _ in data[:-1]]
        prev_canvas, final_canvas, inst, target_obj, act = data
        ref_obj = None
        optimizer.zero_grad()
        loss = loss_fn(
            model(inst, prev_canvas, final_canvas,
                  (target_obj, ref_obj, True)), inst[:, 1:])
        loss.backward()
        clip_gradient(optimizer, 0.1)
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f})'.format(
                epoch, batch_idx * args.batch_size, len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))
    torch.save(
        model.state_dict(),
        'instructor_seq2seq_target_canvas_concat/model_{}.pth'.format(epoch))
Beispiel #11
0
def train(epoch):
    model.train()
    adjust_lr(optimizer, epoch, args.lr, decay_rate=0.2)
    for batch_idx, data in enumerate(train_loader):
        raw_data = data[-1]
        data = [Variable(_, requires_grad=False).cuda() for _ in data[:-1]]
        prev_canvas, inst, next_obj, final_canvas, ref_obj = data
        optimizer.zero_grad()
        loss = loss_fn(
            model(inst, prev_canvas, final_canvas, (next_obj, ref_obj, True)),
            inst[:, 1:])
        policy_loss = (-model.saved_log_probs * model.rewards).sum()
        # policy_loss = 0
        # for i in range(len(model.saved_log_probs)):
        #     policy_loss += (-model.saved_log_probs[i] * model.rewards[:, i]).sum()
        (loss + policy_loss).backward()
        clip_gradient(optimizer, 0.1)
        optimizer.step()
        # del model.saved_log_probs[:]
        model.saved_log_probs = None
        model.sampled_actions = None
        model.rewards = None
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f})'.format(
                epoch, batch_idx * args.batch_size, len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data[0]))
    # torch.save(model.state_dict(), 'models_topdown_3att_att64_hardatt/model_{}.pth'.format(epoch))
    # torch.save(optimizer.state_dict(), 'models_topdown_3att_att64_hardatt/optimizer_{}.pth'.format(epoch))
    torch.save(
        model.state_dict(),
        'models_topdown_3att_att64_content_planning_absmix_reinforce_running_baseline/model_{}.pth'
        .format(epoch))
    torch.save(
        optimizer.state_dict(),
        'models_topdown_3att_att64_content_planning_absmix_reinforce_running_baseline/optimizer_{}.pth'
        .format(epoch))
Beispiel #12
0
        }]
    else:
        param_groups = model.parameters()
    optimizer = torch.optim.SGD(param_groups,
                                lr=lr,
                                momentum=0.9,
                                weight_decay=5e-4,
                                nesterov=True)
    print('Start Training.')
    loss_record = 0.
    acc_record = 0.
    best_acc = 0.
    bat_start = 0
    for it in range(n_iters):
        print('\rExtractor | iter %05d' % (it + 1), end='')
        adjust_lr(it, lr, optimizer, lr_step_size=1000)
        ''' Grab a batch from X and Y. '''
        batch, label, bat_start = grab_batch(X, Y, bat_start, batch_size)

        x = Variable(torch.from_numpy(batch.astype(float)).float().cuda())
        y = Variable(torch.from_numpy(label).long().cuda())
        ''' Feedforward and Backward. '''
        outputs = model(x)
        loss = criterion(outputs, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_record += loss.data.cpu().numpy()[0]
        oo = outputs.data.cpu().numpy()
        oc = np.argmax(oo, axis=1)
        acc = np.sum(oc == label) / batch_size
Beispiel #13
0
    dict_best_top5 = {'Epoch': 0, 'Top5': 100.}

    if opt.resume:
        state_dict = torch.load(opt.path_model)
        model.load_state_dict(state_dict['state_dict'])
        optim.load_state_dict(state_dict['optimizer'])

        dict_best_top1.update({'Epoch': opt.epoch_top1, 'Top1': opt.top1})
        dict_best_top5.update({'Epoch': opt.epoch_top5, 'Top5': opt.top5})

    st = datetime.now()
    iter_total = 0
    top1_hist = list(100 for i in range(100))
    top5_hist = list(100 for i in range(100))  # to see 100 latest top5 error
    for epoch in range(opt.epoch_recent, opt.epochs):
        adjust_lr(optim, epoch, opt.lr, milestones=milestones, gamma=0.1)
        list_loss = list()
        model.train()
        for input, label in tqdm(data_loader):
            iter_total += 1
            input, label = input.to(device), label.to(device)

            output = model(input)

            loss = criterion(output, label)
            optim.zero_grad()
            loss.backward()
            optim.step()

            top1, top5 = cal_top1_and_top5(output, label)
            top1_hist[iter_total % 100] = np.float(top1)
def main_worker(gpu, args):
    """
    @param: gpu - index of the gpu on a single node, here its range is [0, args.gpus-1]
    """

    # IMPORTANT: we need to set the random seed in each process so that the models are initialized with the same weights
    # Reference: https://yangkky.github.io/2019/07/08/distributed-pytorch-tutorial.html
    # torch.cuda.manual_seed(args.seed)

    # for distributed training, rank needs to be global rank among all processes
    rank = args.node_rank * args.gpus + gpu

    dist.init_process_group(backend=args.dist_backend, \
                            init_method=args.dist_url, \
                            world_size=args.world_size, \
                            rank=rank)

    # build model
    densenet = DenseNet121(in_channels=3, growth_rate=args.growth_rate, \
                           compression_rate=args.compression_rate, \
                           num_classes=args.num_classes)

    # torch.cuda.device(gpu)

    # densenet.cuda(gpu)
    densenet.cuda()

    # densenet = nn.parallel.DistributedDataParallel(densenet, device_ids=[gpu])
    densenet = nn.parallel.DistributedDataParallel(densenet)

    # Reference: https://github.com/pytorch/examples/blob/master/imagenet/main.py
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), normalize
    ])

    trainset = torchvision.datasets.ImageFolder(root=os.path.join(
        args.dataset_root, 'train'),
                                                transform=train_transform)

    val_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(), normalize
    ])

    valset = torchvision.datasets.ImageFolder(root=os.path.join(
        args.dataset_root, 'val'),
                                              transform=val_transform)

    train_sampler = torch.utils.data.distributed.DistributedSampler(
        trainset, num_replicas=args.world_size, rank=rank)

    args.batch_size = int(args.batch_size / args.gpus)
    args.num_workers = int(args.num_workers / args.gpus)

    train_data = torch.utils.data.DataLoader(
        trainset,
        batch_size=args.batch_size,
        shuffle=False,  # when sampler is specified, shuffle should be False
        num_workers=args.num_workers,
        pin_memory=True,
        sampler=train_sampler)

    val_data = torch.utils.data.DataLoader(valset,
                                           batch_size=args.batch_size,
                                           shuffle=False,
                                           num_workers=args.num_workers,
                                           pin_memory=True)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(densenet.parameters(), lr=args.lr, momentum=args.momentum, \
                            weight_decay=args.weight_decay)

    global best_prec1

    # Reference: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/2
    # this is useful for cudnn finding optimal set of algorithms for particular configurations
    # and accelerate training when the input sizes do not change over iteration.
    cudnn.backend = True

    for epoch in range(args.epochs):

        train_sampler.set_epoch(epoch)

        adjust_lr(args, optimizer, epoch)

        train(densenet, train_data, criterion, optimizer, epoch, args)

        if args.tensorboard:
            log_value('train_loss', losses.avg, epoch)
            log_value('train_acc', top1.avg, epoch)

        # validate the model every epoch
        prec1 = validate(args, val_data, densenet, criterion, epoch)

        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': densenet.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict()
            }, is_best)
Beispiel #15
0
                                alpha=0.99,
                                eps=1e-8,
                                weight_decay=0.0,
                                momentum=0.0)
max_pck = config.max_pck
weights = [
    config.s * config.s * config.num_kpt,
    config.s * config.s * config.num_kpt * 2
]

evaluate(config, model)

for epoch in range(config.start_epoch, config.end_epoch):

    model = model.train()
    lr = adjust_lr(optimizer, epoch, config.decay, config.lr_gamma)
    batch_loss, batch_hloss, batch_xloss, batch_yloss, batch_acc, batch = 0., 0., 0., 0., 0., 0.
    for (idx, (img, label, offset)) in enumerate(trainloader):
        if config.cuda:
            img = img.cuda()
            label = label.cuda()
            offset = offset.cuda()
        img = img.float()

        out1_1, out1_2 = model(img)
        optimizer.zero_grad()
        heat_loss = heat_criterion(out1_1, label)
        offx_loss = offset_criterion(out1_2[:, :config.num_kpt] * label,
                                     offset[:, :config.num_kpt])
        offy_loss = offset_criterion(out1_2[:, config.num_kpt:] * label,
                                     offset[:, config.num_kpt:])
def main_worker(local_rank, ngpus, args):
    best_prec1 = .0

    dist.init_process_group(backend=args.dist_backend,
                            init_method=args.dist_url)

    print(f'local_rank: {local_rank}\n')

    torch.cuda.set_device(local_rank)

    # IMPORTANT: we need to set the random seed in each process so that the models are initialized with the same weights
    # Reference: https://yangkky.github.io/2019/07/08/distributed-pytorch-tutorial.html
    # torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    # build model
    densenet = DenseNet121(in_channels=3, growth_rate=args.growth_rate, \
                           compression_rate=args.compression_rate, \
                           num_classes=args.num_classes)

    densenet.cuda(local_rank)

    densenet = nn.parallel.DistributedDataParallel(densenet, \
                                                    device_ids=[local_rank], \
                                                    output_device=local_rank)

    # Reference: https://github.com/pytorch/examples/blob/master/imagenet/main.py
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(args.image_width),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), normalize
    ])
    trainset = torchvision.datasets.ImageFolder(root=os.path.join(
        args.dataset_root, 'train'),
                                                transform=train_transform)

    val_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(args.image_width),
        transforms.ToTensor(), normalize
    ])
    valset = torchvision.datasets.ImageFolder(root=os.path.join(
        args.dataset_root, 'val'),
                                              transform=val_transform)

    # num_replicas: int, Number of processes participating in distributed training. By default, world_size is retrieved from the current distributed group.
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        trainset, num_replicas=ngpus)
    batch_size = args.batch_size // ngpus
    num_workers = args.num_workers // ngpus

    train_data = DataLoader(
        trainset,
        batch_size=batch_size,
        shuffle=False,  # when sampler is specified, shuffle should be False
        num_workers=num_workers,
        pin_memory=True,
        sampler=train_sampler)

    val_data = DataLoader(valset,
                          batch_size=batch_size,
                          shuffle=False,
                          num_workers=num_workers,
                          pin_memory=True)

    criterion = nn.CrossEntropyLoss().cuda(local_rank)
    optimizer = optim.SGD(densenet.parameters(), lr=args.lr, momentum=args.momentum, \
                            weight_decay=args.weight_decay)

    # Reference: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/2
    # this is useful for cudnn finding optimal set of algorithms for particular configurations
    # and accelerate training when the input sizes do not change over iteration.
    cudnn.backend = True

    for epoch in range(args.epochs):

        train_sampler.set_epoch(epoch)

        adjust_lr(args, optimizer, epoch)

        losses, top1, top5 = train(densenet, train_data, criterion, optimizer,
                                   epoch, local_rank, args)

        if args.tensorboard:
            log_value('train_loss', losses.avg, epoch)
            log_value('top1_acc', top1.avg, epoch)
            log_value('top5_acc', top5.avg, epoch)

        # validate the model every epoch
        prec1 = validate(args, val_data, densenet, criterion, epoch)

        is_best = prec1.avg > best_prec1
        best_prec1 = max(prec1.avg, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': densenet.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict()
            }, is_best)
def train(dataname, num_dims, num_inc=2, cap=1000, lr=0.001, name_log='default', num_epochs=60, batch_size=64, device='gpu', lr_schedule=False, optimizer='adam', dir_data='./', num_workers=4):

    dir_root = os.path.join(dir_data, dataname)
    dir_model = os.path.join(dir_root, 'model')
    dir_logs = os.path.join(dir_root, 'logs', name_log)

    logging.info("start training.")

    logging.info("delete present data pool")
    if os.path.exists(os.path.join(dir_root, 'data_pool.pkl')):
        os.remove(os.path.join(dir_root, 'data_pool.pkl'))

    with open(os.path.join(dir_root, 'classes.txt'), 'r') as f:
        label_list = f.readlines()[0].strip().split(' ')
    logging.info("total number of classes is {}".format(len(label_list)))
    print(label_list)
    label_sep_list = [label_list[i:i+num_inc] for i in range(0, len(label_list), num_inc)]
    num_total_classes = len(label_list)
    num_now_classes = 0

    # load model
    model = BenchMark(num_dims, 2)
    save_model(model, dir_model)
    logging.info("define the representer model")

    # define DataPool
    data_pool = DataPool(dir_data=dir_data, cap=cap, dataname=dataname)

    for num_inc, label_sep in enumerate(label_sep_list):
        num_now_classes += len(label_sep)
        acc_best = 0

        # load stored model trained using old classes's data
        model = load_model(model, num_now_classes, dir_model) 
        model.train()
        model = model.to(device)
        logging.info("reload the old model.")

        # define logger
        logger = SummaryWriter(dir_logs)

        # dataloader of old and new datasets 
        train_dataset_old = data_pool.load_data_pool()
        train_dataset_new = load_data(dir_data, dataname, 'train', label_sep)
        train_dataset = concat_datasets([train_dataset_old, train_dataset_new])
        train_dataloader = load_dataloader(train_dataset, batch_size, num_workers)
        test_dataset = load_data(dir_data, dataname, 'test', data_pool.classes+label_sep)
        test_dataloader = load_dataloader(test_dataset, 16, num_workers)

        # define loss function
        criterion = nn.CrossEntropyLoss()
        if optimizer == 'adam':
            optimizer = optim.Adam(model.parameters(), lr=lr)


        # train the representer
        for epoch in range(num_epochs):

            model.train() 
            sum_loss = 0
            
            # lr schedule
            if lr_schedule:
                adjust_lr(optimizer, lr, epoch)

            for i, (train_batch, label_batch) in enumerate(train_dataloader):

                train_batch = train_batch.to(device)
                label_batch = label_batch.to(device)
                output_batch = model(train_batch)

                loss = criterion(output_batch, label_batch)
                sum_loss += loss

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            acc = acc_cal(model, test_dataloader)
            acc_best = acc

            logging.info("Classes: {}/{}, Epoch: {}/{}, Loss: {:.4f}, Acc: {:.4f}".format(num_now_classes, num_total_classes, epoch+1, num_epochs, sum_loss.data, acc))

            logger.add_scalars('data/Classes_{}'.format(num_now_classes), {'loss': sum_loss.data}, epoch)
            logger.add_scalars('data/Classes_{}'.format(num_now_classes), {'acc': acc}, epoch)

        logger.add_scalars('data/acc_incremental', {'acc_incremental': acc_best}, num_inc) 

        # save samples to data pool
        num_everyclass = int(data_pool.cap/num_now_classes)
        data_pool.add_data(model, train_dataset_new, num_everyclass, device) 

        # save model
        save_model(model, dir_model)
        logger.close()
Beispiel #18
0
        optimizer_Encoder.step()
        optimizer_Decoder.step()



        if i % 10 == 0 or i == total_step:
            print('{} Epoch [{:03d}/{:03d}], Step [{:04d}/{:04d}], loss: {:0.4f}, sal1 loss: {:0.4f}, edge loss: {:0.4f}, sal2 loss: {:0.4f}, prior loss: {:0.4f}'.
                  format(datetime.now(), epoch, args.epoch, i, total_step, loss.data, sal_loss1.data, edge_loss.data, sal_loss2.data, pri_loss.data))

            visualize_prediction(torch.sigmoid(sal1), './show/', "sal1")
            visualize_prediction(torch.sigmoid(edge_map), './show/', "edge")
            visualize_prediction(torch.sigmoid(sal2), './show/', "sal2")

    save_path = 'save_models/finetune/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if epoch % 1 == 0:
        torch.save(Encoder.state_dict(), save_path + 'scribble_Encoder' + '_%d'  % epoch  + '.pth')
        torch.save(Decoder.state_dict(), save_path + 'scribble_Decoder' + '_%d'  % epoch  + '.pth')

print("start training!!!")

for epoch in range(1, args.epoch+1):
    adjust_lr(optimizer_Encoder, epoch, args.decay_rate, args.decay_epoch)
    adjust_lr(optimizer_Decoder, epoch, args.decay_rate, args.decay_epoch)

    train(train_dataloader, Encoder, Decoder, optimizer_Encoder, optimizer_Decoder, epoch)


Beispiel #19
0
def trainer(model,
            optimizer,
            criterion,
            scheduler,
            train_loader,
            val_loader,
            tqdm_length,
            log_flag=False):
    best_acc = 0.5
    best_ap = 0
    best_FOR = 0
    best_ok_ap = 0
    best_ng_ap = 0

    best_ap_epoch = []
    best_acc_epoch = []
    best_FOR_epoch = []
    save_names = []

    for epoch in range(config.max_epoch):
        batch_avg_loss = 0

        bar = tqdm(enumerate(train_loader), total=tqdm_length)
        for ii, (data, label) in bar:
            image = data.cuda()
            target = label.cuda()

            optimizer.zero_grad()
            logits = model(image)
            loss = criterion(logits, target)
            loss.backward()
            optimizer.step()

            cur_loss = loss.item()
            batch_avg_loss += cur_loss
            cur_lr = optimizer.state_dict()["param_groups"][0]["lr"]
            batch_loss = batch_avg_loss / (ii + 1)
            bar.set_description(f'{epoch} loss:{cur_loss:.2e} lr:{cur_lr:.2e}')

        if scheduler is None:
            utils.adjust_lr(optimizer, epoch)
        else:
            scheduler.step()

        val_accuracy, y_true, y_score = val(model, val_loader)
        if config.num_class == 2:
            # confusion matrix
            confusion_matrix = metrics.confusion_matrix(
                y_true, np.argmax(y_score, 1))
            # AP
            # ok_val_ap, ng_val_ap, mAP = utils.get_AP_metric(y_true, y_score)
            mulit_class_ap = ClassifierEvalMulticlass.compute_ap(
                y_true, y_score)
            ng_val_ap = mulit_class_ap[0]
            ok_val_ap = mulit_class_ap[1]
            mAP = (ng_val_ap + ok_val_ap) / 2
            # FOR
            final_metric_dict = utils.get_FOR_metric(y_true, y_score)

            ok_y_score = y_score[:, 1]
            ok_p_at_r = ClassifierEvalBinary.compute_p_at_r(
                y_true, ok_y_score, 1)

            ng_y_true = np.array(y_true).astype("bool")
            ng_y_true = (1 - ng_y_true).astype(np.int)
            ng_y_score = y_score[:, 0]
            ng_p_at_r = ClassifierEvalBinary.compute_p_at_r(
                ng_y_true, ng_y_score, 1)

            print(
                f'Acc: {val_accuracy:.2f}\t OK_AP:{ok_val_ap:.2f}\t NG_AP: {ng_val_ap:.2f}\t mAP: {mAP:.2f}'
            )
            print(
                f'BEST Acc: {best_acc:.2f}\t OK_AP: {best_ok_ap:.2f}\t NG_AP: {best_ng_ap:.2f}\t mAP: {best_ap:.2f}'
            )
            print(confusion_matrix)
            print(mulit_class_ap)
            print(f'ok_p_at_r: {ok_p_at_r}, ng_p_at_r: {ng_p_at_r}')
            print(final_metric_dict)

            save_path = f'./checkpoints/{config.model["name"]}'
            save_name = f'{epoch}_acc_{val_accuracy:.4f}_p@r_{ng_p_at_r}_FOR_{final_metric_dict["FOR"]:.4F}.pth'
            save_names.append(save_name)
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            torch.save(model, f'{save_path}/{save_name}')

            if final_metric_dict['FOR'] > best_FOR:
                best_FOR = final_metric_dict['FOR']
                best_FOR_epoch.append(epoch)

            if val_accuracy > best_acc:
                best_acc = val_accuracy
                best_acc_epoch.append(epoch)

            if mAP > best_ap:
                best_ap = mAP
                best_ap_epoch.append(epoch)

            best_ok_ap = max(ok_val_ap, best_ok_ap)
            best_ng_ap = max(ng_val_ap, best_ng_ap)
        else:
            mulit_class_ap = ClassifierEvalMulticlass.compute_ap(
                y_true, y_score)
            confusion_matrix = metrics.confusion_matrix(
                y_true, np.argmax(y_score, 1))

            save_path = f'./checkpoints/{config.model["name"]}'
            epoch_index = epoch + 1
            save_name = f'{epoch_index:03d}_acc_{val_accuracy:.4f}.pth'
            save_names.append(save_name)
            if not os.path.exists(save_path):
                os.makedirs(save_path)
            torch.save(model, f'{save_path}/{save_name}')

            print(val_accuracy)
            print(mulit_class_ap)
            print(confusion_matrix)

    if log_flag:
        cur_time = time.strftime('%m%d_%H_%M')
        log_file_name = f"{config.model['name']}_{cur_time}.txt"
        utils.write_log(log_file_name, best_FOR_epoch, best_acc_epoch,
                        best_ap_epoch, save_names)
                                 align_corners=True)
            pred_post_init = generator.forward(images)

            sal_loss = structure_loss(pred_post_init, gts)

            sal_loss.backward()
            generator_optimizer.step()

            visualize_prediction_init(torch.sigmoid(pred_post_init))
            visualize_gt(gts)

            if rate == 1:
                loss_record.update(sal_loss.data, opt.batchsize)

        if i % 10 == 0 or i == total_step:
            print(
                '{} Epoch [{:03d}/{:03d}], Step [{:04d}/{:04d}], Gen Loss: {:.4f}'
                .format(datetime.now(), epoch, opt.epoch, i, total_step,
                        loss_record.show()))

    adjust_lr(generator_optimizer, opt.lr_gen, epoch, opt.decay_rate,
              opt.decay_epoch)

    save_path = 'models/Resnet/'

    if not os.path.exists(save_path):
        os.makedirs(save_path)
    if epoch % opt.epoch == 0:
        torch.save(generator.state_dict(),
                   save_path + 'Model' + '_%d' % epoch + '_gen.pth')
Beispiel #21
0
def train():
    args = parse_args()

    args_msg = [
        '  %s: %s' % (name, value) for (name, value) in vars(args).items()
    ]
    logger.info('args:\n' + '\n'.join(args_msg))

    ckpt_path = "models_chunk_twin_context"
    os.system("mkdir -p {}".format(ckpt_path))
    logger = init_logging("chunk_model", "{}/train.log".format(ckpt_path))

    csv_file = open(args.csv_file, 'w', newline='')
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(header)

    batch_size = args.batch_size
    device = torch.device("cuda:0")

    reg_weight = args.reg_weight

    ctc_crf_base.init_env(args.den_lm_fst_path, gpus)

    model = CAT_Chunk_Model(args.feature_size, args.hdim, args.output_unit,
                            args.dropout, args.lamb, reg_weight)

    lr = args.origin_lr
    optimizer = optim.Adam(model.parameters(), lr=lr)
    epoch = 0
    prev_cv_loss = np.inf
    if args.checkpoint:
        checkpoint = torch.load(args.checkpoint)
        epoch = checkpoint['epoch']
        lr = checkpoint['lr']
        prev_cv_loss = checkpoint['cv_loss']
        model.load_state_dict(checkpoint['model'])

    model.cuda()
    model = nn.DataParallel(model)
    model.to(device)

    reg_model = CAT_RegModel(args.feature_size, args.hdim, args.output_unit,
                             args.dropout, args.lamb)

    loaded_reg_model = torch.load(args.regmodel_checkpoint)
    reg_model.load_state_dict(loaded_reg_model)

    reg_model.cuda()
    reg_model = nn.DataParallel(reg_model)
    reg_model.to(device)

    prev_epoch_time = timeit.default_timer()

    model.train()
    reg_model.eval()
    while True:
        # training stage
        epoch += 1
        gc.collect()

        if epoch > 2:
            cate_list = list(range(1, args.cate, 1))
            random.shuffle(cate_list)
        else:
            cate_list = range(1, args.cate, 1)

        for cate in cate_list:
            pkl_path = args.tr_data_path + "/" + str(cate) + ".pkl"
            if not os.path.exists(pkl_path):
                continue
            tr_dataset = SpeechDatasetMemPickel(pkl_path)

            jitter = random.randint(-args.jitter_range, args.jitter_range)
            chunk_size = args.default_chunk_size + jitter

            tr_dataloader = DataLoader(tr_dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=0,
                                       collate_fn=PadCollateChunk(chunk_size))

            train_chunk_model(model, reg_model, tr_dataloader, optimizer,
                              epoch, chunk_size, TARGET_GPUS, args, logger)

        # cv stage
        model.eval()
        cv_losses_sum = []
        cv_cls_losses_sum = []
        count = 0
        cate_list = range(1, args.cate, 1)
        for cate in cate_list:
            pkl_path = args.dev_data_path + "/" + str(cate) + ".pkl"
            if not os.path.exists(pkl_path):
                continue
            cv_dataset = SpeechDatasetMemPickel(pkl_path)
            cv_dataloader = DataLoader(cv_dataset,
                                       batch_size=batch_size,
                                       shuffle=False,
                                       num_workers=0,
                                       collate_fn=PadCollateChunk(
                                           args.default_chunk_size))
            validate_count = validate_chunk_model(model, reg_model,
                                                  cv_dataloader, epoch,
                                                  cv_losses_sum,
                                                  cv_cls_losses_sum, args,
                                                  logger)
            count += validate_count
        cv_loss = np.sum(np.asarray(cv_losses_sum)) / count
        cv_cls_loss = np.sum(np.asarray(cv_cls_losses_sum)) / count
        # save model
        save_ckpt(
            {
                'cv_loss': cv_loss,
                'model': model.module.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr': lr,
                'epoch': epoch
            }, epoch < args.min_epoch or cv_loss <= prev_cv_loss, ckpt_path,
            "model.epoch.{}".format(epoch))

        csv_row = [
            epoch, (timeit.default_timer() - prev_epoch_time) / 60, lr, cv_loss
        ]
        prev_epoch_time = timeit.default_timer()
        csv_writer.writerow(csv_row)
        csv_file.flush()
        plot_train_figure(args.csv_file, args.figure_file)

        if epoch < args.min_epoch or cv_loss <= prev_cv_loss:
            prev_cv_loss = cv_loss

        lr = adjust_lr(optimizer, args.origin_lr, lr, cv_loss, prev_cv_loss,
                       epoch, args.min_epoch)
        if (lr < args.stop_lr):
            print("rank {} lr is too slow, finish training".format(args.rank),
                  datetime.datetime.now(),
                  flush=True)
            break
        model.train()

    ctc_crf_base.release_env(gpus)
Beispiel #22
0
def main():
    opt = get_opt()
    tb_logger.configure(opt.logger_name, flush_secs=5, opt=opt)
    logfname = os.path.join(opt.logger_name, 'log.txt')
    logging.basicConfig(filename=logfname,
                        format='%(asctime)s %(message)s',
                        level=logging.INFO)
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))

    logging.info(str(opt.d))

    torch.manual_seed(opt.seed)
    if opt.cuda:
        # TODO: remove deterministic
        torch.backends.cudnn.deterministic = True
        torch.cuda.manual_seed(opt.seed)
        np.random.seed(opt.seed)
    # helps with wide-resnet by reducing memory and time 2x
    cudnn.benchmark = True

    train_loader, test_loader, train_test_loader = get_loaders(opt)

    if opt.epoch_iters == 0:
        opt.epoch_iters = int(
            np.ceil(1. * len(train_loader.dataset) / opt.batch_size))
    opt.maxiter = opt.epoch_iters * opt.epochs
    if opt.g_epoch:
        opt.gvar_start *= opt.epoch_iters
        opt.g_optim_start = (opt.g_optim_start * opt.epoch_iters) + 1

    model = models.init_model(opt)

    optimizer = OptimizerFactory(model, train_loader, tb_logger, opt)
    epoch = 0
    save_checkpoint = utils.SaveCheckpoint()

    # optionally resume from a checkpoint
    if not opt.noresume:
        model_path = os.path.join(opt.logger_name, opt.ckpt_name)
        if os.path.isfile(model_path):
            print("=> loading checkpoint '{}'".format(model_path))
            checkpoint = torch.load(model_path)
            best_prec1 = checkpoint['best_prec1']
            optimizer.gvar.load_state_dict(checkpoint['gvar'])
            optimizer.niters = checkpoint['niters']
            epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['model'])
            save_checkpoint.best_prec1 = best_prec1
            print("=> loaded checkpoint '{}' (epoch {}, best_prec {})".format(
                model_path, epoch, best_prec1))
        else:
            print("=> no checkpoint found at '{}'".format(model_path))

    if opt.niters > 0:
        max_iters = opt.niters
    else:
        max_iters = opt.epochs * opt.epoch_iters

    if opt.untrain_steps > 0:
        untrain(model, optimizer.gvar, opt)

    while optimizer.niters < max_iters:
        optimizer.epoch = epoch
        utils.adjust_lr(optimizer, opt)
        ecode = train(tb_logger, epoch, train_loader, model, optimizer, opt,
                      test_loader, save_checkpoint, train_test_loader)
        if ecode == -1:
            break
        epoch += 1
    tb_logger.save_log()
Beispiel #23
0
def cross_train():

    #Basic parameters
    gpus = FLAG.gpus
    batch_size = FLAG.batch_size
    epoches = FLAG.epoch
    init_lr = FLAG.lr
    LOG_INTERVAL = 10
    TEST_INTERVAL = 2
    source_name = FLAG.source
    target_name = FLAG.target
    model_name = FLAG.arch
    adapt_mode = FLAG.adapt_mode
    l2_decay = 5e-4

    #Loading dataset
    if FLAG.isLT:
        source_train,target_train,target_test,classes = cross_dataset_LT(FLAG)
    else:
        source_train,target_train,target_test,classes = my_cross_dataset(FLAG)
    source_train_loader = torch.utils.data.DataLoader(dataset=source_train,batch_size=batch_size,
                    shuffle=True,num_workers=8,drop_last=True)
    target_train_loader = torch.utils.data.DataLoader(dataset=target_train,batch_size=batch_size,
                    shuffle=True,num_workers=8,drop_last=True)
    target_test_loader = torch.utils.data.DataLoader(dataset=target_test,batch_size=batch_size,
                    shuffle=False,num_workers=8)
    #Define model
    if adapt_mode == 'ddc':
        cross_model = models.DDCNet(FLAG)
        
        #adapt_loss_function = mmd_linear
        adapt_loss_function = mmd_rbf_noaccelerate
        #print(model)

    elif adapt_mode == 'coral':
        cross_model = models.DeepCoral(FLAG)
        adapt_loss_function = CORAL

    elif adapt_mode == 'mmd':
        cross_model = models.DDCNet(FLAG)
        adapt_loss_function = mmd_linear

    else:
        print('The adaptive model name is wrong !')
    
    if len(gpus)>1:
        gpus = gpus.split(',')
        gpus = [int(v) for v in gpus]
        cross_model = nn.DataParallel(cross_model,device_ids=gpus)

    cross_model.to(DEVICE)
    #Define Optimizer
    if len(gpus)>1:
        optimizer = optim.SGD([{'params':cross_model.module.sharedNet.parameters()},
                            {'params':cross_model.module.cls_fc.parameters(),'lr':init_lr}],
                            lr=init_lr/10,momentum=0.9,weight_decay=l2_decay)

    else:
        optimizer = optim.SGD([{'params':cross_model.sharedNet.parameters()},
                            {'params':cross_model.cls_fc.parameters(),'lr':init_lr}],
                            lr=init_lr/10,momentum=0.9,weight_decay=l2_decay)
    #print(optimizer.param_groups)
    #loss function
    criterion = torch.nn.CrossEntropyLoss()
    #Training
    
    best_result = 0.0
    #Model store
    model_dir = os.path.join('./cross_models/',adapt_mode+'-'+source_name+'2'+target_name+'-'+model_name)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    #Tensorboard configuration
    log_dir = os.path.join('./cross_logs/',adapt_mode+'-'+source_name+'2'+target_name+'-'+model_name)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    writer = SummaryWriter(logdir=log_dir)

    for epoch in range(1,epoches+1):
        cross_model.train()
        len_source_loader= len(source_train_loader)
        len_target_loader = len(target_train_loader)
        iter_source = iter(source_train_loader)
        iter_target = iter(target_train_loader)

        if len_target_loader <= len_source_loader:
            iter_num = len_target_loader
            which_dataset = True
        else:
            iter_num = len_source_loader
            which_dataset = False
        #Adaptive learning rate
        optimizer = adjust_lr(optimizer,epoch,FLAG)
        writer.add_scalar('data/SharedNet lr',optimizer.param_groups[0]['lr'],epoch)
        running_loss = 0.0
        for i in range(1,iter_num+1):

            if which_dataset:
                target_data,_ = next(iter_target)
                if i % len_target_loader == 0:
                    iter_source = iter(source_train_loader)
                source_data,source_label = next(iter_source)
            else:
                source_data,source_label = next(iter_source)
                if i % len_source_loader == 0:
                    iter_target = iter(target_train_loader)
                target_data,_ = next(iter_target)

            input_source_data,input_source_label = source_data.to(DEVICE),source_label.to(DEVICE).squeeze()
            input_target_data = target_data.to(DEVICE)

            optimizer.zero_grad()


            label_source_pred,source_output,target_output = cross_model(input_source_data, input_target_data)
            loss_adapt = adapt_loss_function(source_output,target_output)
            loss_cls = criterion(label_source_pred,input_source_label)
            lambda_1 = 2 / (1 + math.exp(-10 * (epoch) / epoches)) - 1
            loss = loss_cls + lambda_1 * loss_adapt
            
            
            if i%5 ==0:
                n_iter = (epoch-1)*len_target_loader+i
                writer.add_scalar('data/adapt loss',loss_adapt,n_iter)
                writer.add_scalar('data/cls loss',loss_cls,n_iter)
                writer.add_scalar('data/total loss',loss,n_iter)
                #print(optimizer.param_groups[0]['lr'])

            loss.backward()
            optimizer.step()

            #Print statistics
            running_loss += loss.item()
            if i%LOG_INTERVAL == 0: #Print every 30 mini-batches
                print('Epoch:[{}/{}],Batch:[{}/{}] loss: {}'.format(epoch,epoches,i,len_target_loader,running_loss/LOG_INTERVAL))
                running_loss = 0

        if epoch%TEST_INTERVAL ==0:   #Every 2 epoches
            
            acc_test,class_corr,class_total=cross_test(cross_model,target_test_loader,epoch)
            #log test acc
            writer.add_scalar('data/test accuracy',acc_test,epoch)
            #Store the best model
            if acc_test>best_result:
                model_path = os.path.join(model_dir,
                            '{}-{}-{}-epoch_{}-accval_{}.pth'.format(source_name,target_name,model_name,epoch,round(acc_test,3)))
                torch.save(cross_model,model_path)
                #log results for classes
                log_path = model_path = os.path.join(model_dir,
                            '{}-{}-{}-epoch_{}-accval_{}.csv'.format(source_name,target_name,model_name,epoch,round(acc_test,3)))
                log_to_csv(log_path,classes,class_corr,class_total)
                best_result = acc_test
            else:
                print('The results in this epoch cannot exceed the best results !')

    writer.close()
Beispiel #24
0
def main():
    # data
    div1, div2 = 800, 900
    batch_size = 20
    num_workers = 0
    data_path = 'data/class1_data.pkl'
    # train
    num_epoch = 100
    lr = 1e-3
    lr_step = 50  #
    momentum = 0.9
    weight_decay = 1e-3
    # model
    hidden_dim = 32  # make it a number smaller than feature_dim
    model_check = 'model/checkpoint.pth.tar'
    model_best = 'model/bestmodel.pth.tar'
    # result
    print_freq = 20
    loss_best = 1e5

    # --------------------------------------------------------------------------
    # prepare dataset
    data, (len_seq, num_frame, num_joint, num_coor) = make_dataset(data_path)
    # data[:div] for trainning, data[800:900] for validation
    # and the rest for testing
    train_set = data[:div1]
    val_set = data[div1:div2]
    # train_loader shuffle
    train_loader = DataLoader(dataset=train_set,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=num_workers)
    val_loader = DataLoader(dataset=val_set,
                            batch_size=1,
                            shuffle=False,
                            num_workers=num_workers)

    # --------------------------------------------------------------------------
    # model settings
    feature_dim = num_joint * num_coor  # 16 * 3 = 48
    model = LSTMpred(feature_dim, hidden_dim)
    print(model)

    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(),
                          lr=lr,
                          momentum=momentum,
                          weight_decay=weight_decay)

    # --------------------------------------------------------------------------
    # run

    for epoch in range(num_epoch):
        adjust_lr(lr, optimizer, epoch, lr_step)

        print('Epoch: {0}/{1} [training stage]'.format(epoch, num_epoch))
        train(train_loader, model, criterion, optimizer, print_freq)

        print('Epoch: {0}/{1} [validation stage]'.format(epoch, num_epoch))
        loss = val(val_loader, model, criterion, print_freq)

        is_best = loss < loss_best
        loss_best = min(loss_best, loss)
        save_checkpoint(
            {
                'epoch': epoch,
                'arch': 'LSTMpred',
                'state_dict': model.state_dict(),
                'loss_best': loss_best,
                'optimizer': optimizer.state_dict(),
            }, is_best, model_check, model_best)
Beispiel #25
0
        loss1 = CE(atts, gts)
        loss2 = CE(dets, gts)
        loss = loss1 + loss2
        loss.backward()

        clip_gradient(optimizer, opt.clip)
        optimizer.step()

        if i % 400 == 0 or i == total_step:
            print(
                '{} Epoch [{:03d}/{:03d}], Step [{:04d}/{:04d}], Loss1: {:.4f} Loss2: {:0.4f}'
                .format(datetime.now(), epoch, opt.epoch, i, total_step,
                        loss1.data, loss2.data))

    if opt.is_ResNet:
        save_path = 'models/CPD_Resnet/'
    else:
        save_path = 'models/CPD_VGG/'

    if not os.path.exists(save_path):
        os.makedirs(save_path)
    if (epoch + 1) % 5 == 0:
        torch.save(model.state_dict(),
                   save_path + opt.trainset + '_w.pth' + '.%d' % epoch)


print("Let's go!")
for epoch in range(1, opt.epoch):
    adjust_lr(optimizer, opt.lr, epoch, opt.decay_rate, opt.decay_epoch)
    train(train_loader, model, optimizer, epoch)
Beispiel #26
0
if opt.cuda:
    train_loader = get_loader(image_root, gt_root, batchsize=opt.batchsize, num_workers=3, pin_memory=True)
    model.cuda()
else:
    train_loader = get_loader(image_root, gt_root, batchsize=opt.batchsize, num_workers=3, pin_memory=False)

total_step = len(train_loader)

params = model.parameters()
optimizer = torch.optim.Adam(params, lr=opt.lr)
crit = torch.nn.BCEWithLogitsLoss()

print("Let's go!")
for epoch in range(1, opt.epoch + 1):
    model.train()
    adjust_lr(optimizer, opt.lr, epoch)
    for i, pack in enumerate(train_loader, start=1):
        optimizer.zero_grad()
        # Load data
        images, gts = pack
        images = Variable(images)
        gts = Variable(gts)
        if opt.cuda:
            images = images.cuda()
            gts = gts.cuda()
        # Forward
        res = model(images)
        # Merge losses
        loss = crit(res, gts)
        # Backward and update
        loss.backward()