def forward(self, predictions, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            targets (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data, priors = predictions
        num = loc_data.size(0)
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)

        # conf_t = torch.zeros(num,num_priors).long()


        for idx in range(num):
            target = targets[idx]
            truths = target[:, :-1].data
            labels = target[:, -1].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)

        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)

        pos = conf_t > 0

        num_pos = pos.sum(dim=1, keepdim=True)


        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)


        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))

        # Hard Negative Mining
        # loss_c[pos] = 0  # filter out pos boxes for now
        # loss_c = loss_c.view(num, -1)

        # Hard Negative Mining
        loss_c = loss_c.view(num, -1)
        loss_c[pos] = 0


        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)

        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
        targets_weighted = conf_t[(pos+neg).gt(0)]

        if USE_FL:
            alpha = np.array([[0.25], [0.75], [0.75], [0.75], [0.75],
                              [0.75], [0.75], [0.75], [0.75], [0.75],
                              [0.75], [0.75], [0.75], [0.75], [0.75],
                              [0.75], [0.75], [0.75], [0.75], [0.75], [0.75]])
            alpha = torch.Tensor(alpha)
            compute_c_loss = focal_loss.FocalLoss(alpha=alpha, gamma=2, class_num=num_classes, size_average=False)
            loss_c = compute_c_loss(conf_p, targets_weighted)
        else:
            loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = num_pos.data.sum()

        loss_l /= N
        loss_c /= N

        # print("N",N,"\t","loss_l",loss_l,"\t","loss_c",loss_c)

        return loss_l, loss_c
Esempio n. 2
0
    def forward(self, predictions, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size【batch_size, num_priors, num_classes】 3维度
                loc shape: torch.size【batch_size, num_priors, 4】 3维度
                priors shape: torch.size【num_priors,4】

            targets (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data, priors = predictions  # 【prediction包括net预测的位置信息,预测的类别,所有的先验框】
        num = loc_data.size(0)  # batch_size每次输入的图片数
        priors = priors[:loc_data.size(1), :]  # priors里面包括所有的先验prior框[8732,4] # feel no use
        num_priors = (priors.size(0))  # 8732 anchors的数量
        num_classes = self.num_classes  # 类别数

        # match priors (default boxes) and ground truth boxes
        # ##下面的loc_t和conf_t是生成的随机的
        loc_t = torch.Tensor(num, num_priors, 4) # [batch_size,8732,4] 每张图片有8732个先验框,每个先验框有四个数值[中心点xy,高,宽]
        # 用来记录每一个default box的类别,0类就是负样本
        conf_t = torch.LongTensor(num, num_priors)  # [batch_size,8732] 每张图片生成8732个先验框 每个先验框有一个置信度的的值
        for idx in range(num):  # 对每个batch_size里每一张图进行遍历
            # target里面是五维度tensor,最后个维度是label
            truths = targets[idx][:, :-1].data  # position 真实的ground_truth方框信息 targets是5维数据【前4维表示位置信息,最后1维表示类别】
            labels = targets[idx][:, -1].data  # labels 真实的回归框标签信息
            defaults = priors.data  # [8732,4] default box在同一尺度下的坐标是不变的,与batch无关

            # 【MATCH函数】参数输入【阈值,ground_truth,设置的先验框prior,variance方差?,真实标签,位置预测,类别预测,遍历每个batch中的图片顺序】
            match(self.threshold, truths, defaults, self.variance, labels,loc_t, conf_t, idx)
            # match这个函数给每个ground truth匹配了最好的priors,给每个priors匹配最好的ground truth
            # 经过encode后的offset([g_cx cy, g_wh])->loc_t,top class label for each prior->conf_t
            # match函数最后更新 loc_t, conf_t 【编码之后的位置信息和类别信息】
            # loc_t 【batch_size, 8732, 4】
            # conf_t【batch_size, 8732】
        if self.use_gpu:  # 将编码后的位置信息和类别信息放在GPU上
            loc_t = loc_t.cuda()  # 【loc_t里面是一个batch中所有图片的位置信息,每张图片有(8732,4)】 Tensor:【batch_size,7843,4】
            conf_t = conf_t.cuda()  # Tensor: 【batch_size,8732】
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)  # #Tensor:【batch_size,7843,4】 encoded offsets to learn
        conf_t = Variable(conf_t, requires_grad=False)
        # #Tensor: 【batch_size,8732】 top class label for each prior conf_t是标签值

        pos = conf_t > 0  # 只有大于0的才被认为不是背景,而是存在目标 pos=bool型 pos=Tensor:【batch_size,8732】
        num_pos = pos.sum(dim=1, keepdim=True)  # num_pos记录的是8732个框中是存在目标的方框 选择为正样本的数量???

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        # loc_loss是只考虑正样本的  loc_data是预测的tensor
        # ## pos_idx是bool型【batch_size,8732,4】,记录的是每张图片中生成的prior中是目标是True 背景是False
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        # 首先将pos的最后个维度添加个'1' 再将bool型的pos【batch_size,8732】->【batch_size,8732,4】
        loc_p = loc_data[pos_idx].view(-1, 4)  # ## 由net预测的存在目标的区域目标 loc_p (p代表positive) 【前景目标区域的个数,4】
        loc_t = loc_t[pos_idx].view(-1, 4)  # ## 由实际GT 编码出来的loc_t
        # 输入的loc_p是指真实编码后的ground_truth 和 网络的预测位置结果 通过L1函数计算损失
        '''
        【loss_l】即为位置损失值
        '''
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)  # ##输入参数1:网络net的位置预测 输入参数2:真实GT编码后的位置信息
# ############################################################################################################################################
# ############################################################################################################################################
        '''【难例挖掘】'''
        # 【conf_data】: torch.size(batch_size,num_priors,num_classes)
        batch_conf = conf_data.view(-1, self.num_classes)  # 【batch_size*8732行,num_classes列】  一个batch_size中所有prior的数量
        # 【参照论文中conf计算方式】
        # ## conf_t.view(-1, 1) 【batch_size*8732行, 1列】 与GT匹配之后的置信度的值
        # ## batch_conf 【batch_size*8732行,num_classes列】 每个prior中N类别的置信度
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))  # ##将预测信息按照论文中的公式编码【难懂】
        # 得到的loss_c  torch.Size([batch_size*8732, 1])

        # 【Hard Negative Mining】
        # loss_c[pos.view(-1, 1)] = 0###上面两行被同时注释掉
        loss_c = loss_c.view(num, -1)  # ##这里和下面一行调换了 loss=【torch.Size([batch_size, 8732])】
        loss_c[pos] = 0  # ##将正例样本的损失置为0,背景样本的loss不是0 pos(bool型)=Tensor:【batch_size,8732】
        _, loss_idx = loss_c.sort(1, descending=True)  # _ 里面存 放每行由大到小的数列, loss_idx 降序后的元素在原本每行中的index
        _, idx_rank = loss_idx.sort(1)  # ##idx_rank [batch_size ,8732]
        # ## 第一次sort:得到的index是按顺序排的索引   第两次sort:得到原Tensor的损失从大到小的映射,排第几的数字变为排名【难懂但看懂了】
        # ## 总结:正样本为默认框与真实框根据iou匹配得到,负样本为分类loss值排序得到。
        # ## 先将 pos bool型(True,False)转化为(1,0) num_pos:【batch_size, 1】 每一行记录的是batch中 每一张图片中有目标的prior数量
        num_pos = pos.long().sum(1, keepdim=True)
        # ## max=pos.size(1)-1 表示最多有多少个prior,每张图片中的负样本数不能超过每张图片中最大的prior数
        # ## negpos_ratio*num_pos 表示负样本数是正样本数的3倍
        num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)  # num_neg返回的是 torch.Size([batch_size, 1])
        # ## 【num_pos,num_neg】均为【batch_size, 1】 分别记录了每张图片中正样本和负样本的数目 比例 1:3

        # ## neg(bool型)【batch_size, 8732】 选取了每张图片中 排名前(对应负样本数量)的 设置为True
        neg = idx_rank < num_neg.expand_as(idx_rank)
        # 置信度的损失包括 正/负样本都包括损失
        # 因为pos 和 neg 都是bool型 因此 pos_idx 和 neg_idx 也是bool型
        # ## pos_idx 和 neg_idx 均为【batch_size, 8732 ,num_classes】
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)

        # ## conf_p:【batch_size*8732 , num_classes】
        # ## conf_p  包括 正/负样本都要算入损失
        conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes)
        # ## Net在每个prior框中分别预测每一类别结果【batch_size*8732 , num_classes】
        targets_weighted = conf_t[(pos+neg).gt(0)]  # ## 含有GT信息【batch_size,8732】
        '''
        【loss_c】即为类别损失值
        '''
        # ##参数1:conf_p 是Net在每个prior框中分别预测每一类别结果
        # ##参数2:targets_weighted 是存储的标签值long形式
        # ##【FocalLoss函数是针对类别损失部分 【问题1】:正样本/负样本不均衡 【问题2】:难易样本本身对损失函数的贡献不一样】
        # ##-------------------------------------------------------------------------------------------------
        compute_c_loss = focal_loss.FocalLoss(alpha=None, gamma=2, class_num=num_classes, size_average=False)
        loss_c = compute_c_loss(conf_p, targets_weighted)
        # ##下面是原本的损失函数 若引入FocalLoss那么就注释掉这一行
        # loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)  ###【难懂没懂】  ************
        # ## Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
        # ##-------------------------------------------------------------------------------------------------
        
        N = num_pos.data.sum()  # ## N:一个batch中的所有图片的目标总数
        N=N.double()
        loss_l = loss_l.double()  # 上面加入double()下面也添加了一行
        loss_c = loss_c.double()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
def main(args):
    Batch_Size = args.batch_size
    train_path = args.train_path
    val_path = args.val_path
    data_dir = ''

    data_transforms = {
        'train':
        transforms.Compose([
            #transforms.ColorJitter(brightness=0.1),
            transforms.Scale(args.scale),
            transforms.RandomSizedCrop(int(args.scale * 0.875)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val':
        transforms.Compose([
            transforms.Scale(args.scale),
            transforms.CenterCrop(int(args.scale * 0.875)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    train_list = []
    val_list = []
    train_img_list = open(train_path)
    val_img_list = open(train_path)
    for lines in train_img_list:
        train_list.append(lines)
    for lines in val_img_list:
        val_list.append(lines)

    criterion_focalloss_class = focal_loss.FocalLoss(ignore_label=-1,
                                                     gamma=5,
                                                     class_num=2059)
    criterion_focalloss_year = focal_loss.FocalLoss(ignore_label=-1,
                                                    gamma=5,
                                                    class_num=3031)
    criterion_focalloss_color = focal_loss.FocalLoss(ignore_label=-1,
                                                     gamma=5,
                                                     class_num=11)
    criterion_focalloss_type = focal_loss.FocalLoss(ignore_label=-1,
                                                    gamma=5,
                                                    class_num=46)

    criterion = nn.CrossEntropyLoss(ignore_index=-1)
    dets = dict()
    train_data_loader = caffe_dataset.ImgListLoader(data_dir, train_path, " ",
                                                    data_transforms['train'])
    val_data_loader = caffe_dataset.ImgListLoader(data_dir, val_path, " ",
                                                  data_transforms['val'])

    dets['train'] = train_data_loader
    dets['val'] = val_data_loader
    #dset_list = dict()

    #dset_loaders = {x: torch.utils.data.DataLoader(lmdb_loader[x], batch_size=Batch_Size,
    #                                               shuffle=True, num_workers=8)
    #                for x in ['train','val']}
    #dset_sizes = {'train': len(train_list),'val':len(val_list)}

    dset_loaders = {}
    dset_loaders['train'] = torch.utils.data.DataLoader(
        dets['train'],
        batch_size=Batch_Size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True)
    dset_loaders['val'] = torch.utils.data.DataLoader(
        dets['val'],
        batch_size=8,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True)

    train_list = []
    val_list = []
    train_img_list = open(train_path)
    val_img_list = open(val_path)

    for lines in train_img_list:
        train_list.append(lines)
    for lines in val_img_list:
        val_list.append(lines)

    dset_sizes = {'train': len(train_list), 'val': len(val_list)}

    use_gpu = torch.cuda.is_available()

    model_ft = resnext_50_multi_conv_baseline_new.resnext50_fg_car(
        pretrained=True,
        cropsize=int(args.scale * 0.875),
        model_dir=args.model_dir,
        class_num=args.class_num)
    # optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)
    optimizer_ft = optim.Adam(
        [{
            'params': model_ft.resnext_car_multitask.parameters()
        }, {
            'params': model_ft.classifier.parameters(),
            'lr': 1e-3
        }, {
            'params': model_ft.embedding.parameters(),
            'lr': 1e-3
        }],
        lr=1e-5)

    #     optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, model_ft.parameters()), lr=0.1)

    if use_gpu:
        model_ft = model_ft.cuda()
        #  model_ft_parallel = torch.nn.DataParallel(model_ft,device_ids=[2,3]).cuda(1)

        model_ft_parallel = nn.DataParallel(model_ft, device_ids=[0, 1, 2, 3])
    criterion = nn.CrossEntropyLoss(ignore_index=-1)
    criterion2 = nn.BCEWithLogitsLoss()
    # Observe that all parameters are being optimized

    ######################################################################
    # Train and evaluate
    # ^^^^^^^^^^^^^^^^^^

    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    args.save_dir = args.save_dir + subdir
    if not os.path.exists("./output/" + args.save_dir):
        os.makedirs("./output/" + args.save_dir)
    if use_gpu:
        model_best = train_model(model_ft_parallel, criterion, criterion2,
                                 optimizer_ft, exp_lr_scheduler, dset_loaders,
                                 args.epoch, dset_sizes, args)
    else:
        model_best = train_model(model_ft, criterion, criterion2, optimizer_ft,
                                 exp_lr_scheduler, dset_loaders, args.epoch,
                                 dset_sizes, args)
    torch.save(
        model_best.module.state_dict(),
        "./output/" + args.save_dir + "/best-train-model.pth".format(epoch))
Esempio n. 4
0
def main(opts):
    # training script
    if opts.use_cuda is not None:
        cuda_dev = int(opts.use_cuda)
    else:
        cuda_dev = None
    torch.manual_seed(opts.seed)
    lr_decay = float(opts.lr_decay)
    start_epoch = None

    # initialize data loaders
    datasets = {
        p: amt_dataset.AMT_Dataset(os.getcwd() + "/" + opts.data_dir,
                                   "labels.csv", p)
        for p in ("train", "val")
    }
    dataloaders = {
        p: DataLoader(datasets[p],
                      batch_size=opts.batch_size,
                      shuffle=True,
                      num_workers=2,
                      collate_fn=lambda b: list(list(l) for l in zip(*b)))
        for p in ("train", "val")
    }

    print('\ndataset sizes:\t{}\t{}\n'.format(
        *[(p, len(d)) for (p, d) in dataloaders.items()]))

    if opts.arch == 'baseline':
        import baseline
        net = baseline.LanguageModeler(rnn_size=opts.rnn_size, rnn_layers=1)
    elif opts.arch == 'cnn':
        import amt_cnn as cnn
        net = cnn.AMT_CNN(use_cuda=opts.use_cuda, max_w=opts.max_w)

    if opts.load:
        saved_state = torch.load(opts.load, map_location='cpu')
        net.load_state_dict(saved_state)
        epoch_string = opts.load.split('epoch')[-1]
        start_epoch = extract_first_number(epoch_string)

    if cuda_dev is not None:
        net = net.cuda(cuda_dev)

    os.makedirs(opts.model_weights, exist_ok=True)
    sys.stdout.flush()

    optim = torch.optim.SGD(net.parameters(),
                            float(opts.init_lr),
                            momentum=0.9)

    left_pad = opts.left_pad
    fl_gamma = opts.focal_gamma

    if opts.pos_w is None:
        print('Focal loss, gamma={}'.format(fl_gamma), file=sys.stderr)
        loss_function = focal_loss.FocalLoss(gamma=fl_gamma)
    else:
        try:
            npr = float(opts.pos_w)
        except:
            try:
                with open(opts.pos_w, 'r') as fp:
                    for line in fp.readlines():
                        line = line.strip().split(',')
                        if line[0] == 'all':
                            npr = float(line[1])
            except:
                print('error: cannot interpret --pos_w value: {}'.format(
                    opts.pos_w),
                      file=sys.stderr)
                exit(1)

        if npr == 0.:
            pw = None
        else:
            pw = torch.ones(88) * npr

        print('BCE loss, positive weight:', file=sys.stderr)
        print(pw, file=sys.stderr)

        if cuda_dev is not None and pw is not None:
            pw = pw.cuda(cuda_dev)

        loss_function = nn.BCEWithLogitsLoss(pos_weight=pw)

    sys.stderr.flush()

    train(net,
          dataloaders,
          optim,
          loss_function,
          start_epoch=start_epoch,
          num_epochs=opts.max_epochs,
          model_dir=opts.model_weights,
          cuda_dev=cuda_dev,
          max_w=opts.max_w,
          left_pad=left_pad,
          lr_decay=lr_decay)
Esempio n. 5
0
                                            num_classes=4)
    modelEb3 = EfficientNet.from_pretrained('efficientnet-b3',
                                            in_channels=3,
                                            num_classes=4)
    modelRes18 = models.resnet18(pretrained=True)
    num_ftrs = modelRes18.fc.in_features
    modelRes18.fc = nn.Linear(num_ftrs, 4)  # the last fc layer

    modelEb4.to(device)
    modelEb3.to(device)
    modelRes18.to(device)

    model = ensemble.Ensemble(modelEb4, modelEb3, modelRes18).to(device)
    bind_model(model, device)

    criterion = focal_loss.FocalLoss(device).to(device)

    optimizerEb4 = torch.optim.Adam(modelEb4.parameters(), lr=learning_rate)
    optimizerEb3 = torch.optim.Adam(modelEb3.parameters(), lr=learning_rate)
    optimizerRes18 = torch.optim.Adam(modelRes18.parameters(),
                                      lr=learning_rate)
    scheduler_cosineEb4 = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizerEb4, cosine_epo)
    scheduler_cosineEb3 = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizerEb3, cosine_epo)
    scheduler_cosineRes18 = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizerRes18, cosine_epo)

    if ifpause:  ## for test mode
        print('Inferring Start ...')
        nsml.paused(scope=locals())