Example #1
0
def save_fc(fp, fc_model):
    if fc_model.bias.is_cuda:
        convert2cpu(fc_model.bias.data).numpy().tofile(fp)
        convert2cpu(fc_model.weight.data).numpy().tofile(fp)
    else:
        fc_model.bias.data.numpy().tofile(fp)
        fc_model.weight.data.numpy().tofile(fp)
Example #2
0
def save_conv(fp, conv_model):
    if conv_model.bias.is_cuda:
        convert2cpu(conv_model.bias.data).numpy().tofile(fp)
        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
    else:
        conv_model.bias.data.numpy().tofile(fp)
        conv_model.weight.data.numpy().tofile(fp)
def save_deform_conv(fp, conv_model):
    if conv_model.weight.is_cuda:
        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
        convert2cpu(conv_model.layer_1.weight.data).numpy().tofile(fp)
    else:
        conv_model.weight.data.numpy().tofile(fp)
        conv_model.layer_1.weight.data.numpy().tofile(fp)
Example #4
0
def save_conv(fp, conv_model):
    """ Save convolutional model """
    if conv_model.bias.is_cuda:
        convert2cpu(conv_model.bias.data).numpy().tofile(fp)
        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
    else:
        conv_model.bias.data.numpy().tofile(fp)
        conv_model.weight.data.numpy().tofile(fp)
Example #5
0
def save_conv_bn(fp, conv_model, bn_model):
    if bn_model.bias.is_cuda:
        convert2cpu(bn_model.bias.data).numpy().tofile(fp)
        convert2cpu(bn_model.weight.data).numpy().tofile(fp)
        convert2cpu(bn_model.running_mean).numpy().tofile(fp)
        convert2cpu(bn_model.running_var).numpy().tofile(fp)
        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
    else:
        bn_model.bias.data.numpy().tofile(fp)
        bn_model.weight.data.numpy().tofile(fp)
        bn_model.running_mean.numpy().tofile(fp)
        bn_model.running_var.numpy().tofile(fp)
        conv_model.weight.data.numpy().tofile(fp)
Example #6
0
def save_conv_bn(fp, conv_model, bn_model):
    """ Save batch normalized convolutional model """
    if bn_model.bias.is_cuda:
        convert2cpu(bn_model.bias.data).numpy().tofile(fp)
        convert2cpu(bn_model.weight.data).numpy().tofile(fp)
        convert2cpu(bn_model.running_mean).numpy().tofile(fp)
        convert2cpu(bn_model.running_var).numpy().tofile(fp)
        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
    else:
        bn_model.bias.data.numpy().tofile(fp)
        bn_model.weight.data.numpy().tofile(fp)
        bn_model.running_mean.numpy().tofile(fp)
        bn_model.running_var.numpy().tofile(fp)
        conv_model.weight.data.numpy().tofile(fp)
Example #7
0
def save_conv_target_class(fp, conv_model, targetclass, numclass):
    print(
        'save weight with the new target number classes: '.format(targetclass))
    if targetclass < numclass:
        ### the way yolov3 calculate is (numclass + 5)*3
        differ = (numclass - targetclass) * 3
    else:
        differ = (targetclass - numclass) * 3

    print('differ: ', differ)
    if conv_model.bias.is_cuda:
        convert2cpu(conv_model.bias.data).numpy().tofile(fp)
        convert2cpu(conv_model.bias.data[:differ]).numpy().tofile(fp)

        convert2cpu(conv_model.weight.data).numpy().tofile(fp)
        convert2cpu(conv_model.weight.data[:differ]).numpy().tofile(fp)
    else:
        conv_model.bias.data.numpy().tofile(fp)
        conv_model.bias.data[:differ].numpy().tofile(fp)

        conv_model.weight.data.numpy().tofile(fp)
        conv_model.weight.data[:differ].numpy().tofile(fp)
Example #8
0
    def forward(self, x, y = None):
        ind = -2
        self.loss = None
        self.interParam = []

        self.bn_weight_params = []

        outputs = dict()
        out_predicts = []
        for block in self.blocks:
            ind = ind + 1
            #if ind > 0:
            #    return x

            if block['type'] == 'net':
                continue
            elif block['type'] in ['convolutional', 'deconvolutional', 'maxpool', 'reorg', 'upsample', 'avgpool', 'softmax', 'connected']:
                x = self.models[ind](x)

                if block['type'] == 'convolutional'  and ind >= 53:
                    for module_name, module in self.models[ind].named_children():
                        if module_name.startswith('bn'):
                            for param_name, param in module.named_parameters():
                                if param_name == 'weight':
                                    self.bn_weight_params.append(convert2cpu(param))

                outputs[ind] = x
            elif block['type'] == 'route':
                layers = block['layers'].split(',')
                layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers]
                layerlen = len(layers)
                assert (layerlen >= 1)
                x = outputs[layers[0]]
                if layerlen > 1:
                    for i in range(1, layerlen):
                        x = torch.cat((x,outputs[layers[i]]), 1)
                outputs[ind] = x
            elif block['type'] == 'shortcut':
                from_layer = int(block['from'])
                activation = block['activation']
                from_layer = from_layer if from_layer > 0 else from_layer + ind
                x1 = outputs[from_layer]
                x2 = outputs[ind-1]
                x  = x1 + x2
                if activation == 'leaky':
                    x = F.leaky_relu(x, 0.1, inplace=True)
                elif activation == 'relu':
                    x = F.relu(x, inplace=True)
                outputs[ind] = x
            elif block['type'] == 'region':
                continue
                if self.loss:
                    self.loss = self.loss + self.models[ind](x)
                else:
                    self.loss = self.models[ind](x)
                outputs[ind] = None
            elif block['type'] in ['yolo', 'pose', 'pose-2d', 'pose-ind', 'pose-part', 'pose-seg', 'pose-3dr', 'pose-3drseg', 'pose-pnp']:
                layerId = ("L%03d" % int(ind))
                if self.training:
                    loss, param = self.models[ind](x, y, [self.seen])
                    #self.no_reg_loss = loss.item()

                    # Compute regularization
                    #all_bn_weights = torch.cat([x.view(-1) for x in self.bn_weight_params])
                    #l1_regularization = torch.norm(all_bn_weights, 1)
                    #self.l1_reg_only = l1_regularization.item()

                    #loss = torch.add(loss, l1_regularization, alpha=self.bn_regularization_lambda)

                    #print('no_reg_loss: ' + str(self.no_reg_loss) + '\tl1_reg_only: ' + str(self.l1_reg_only), '\tregularized_loss: ' + str(self.regularized_loss))
                    if self.loss:
                        self.loss = self.loss + loss
                    else:
                        self.loss = loss
                    self.interParam.append([layerId, block['type'], param])
                else:
                    pred = self.models[ind](x, None)
                    out_predicts.append([layerId, block['type'], pred])
            elif block['type'] == 'cost':
                continue
            else:
                print('unknown type %s' % (block['type']))
        if self.training:
            return self.loss, self.interParam
        else:
            return out_predicts
Example #9
0
    def forward(self, output, target):
        #output : BxAs*(4+1+num_classes)*H*W
        mask_tuple = self.get_mask_boxes(output)
        t0 = time.time()
        nB = output.data.size(0)  # batch size
        nA = mask_tuple['n'].item()  # num_anchors
        nC = self.num_classes
        nH = output.data.size(2)
        nW = output.data.size(3)
        anchor_step = mask_tuple['a'].size(0) // nA
        anchors = mask_tuple['a'].view(nA, anchor_step).to(self.device)
        cls_anchor_dim = nB * nA * nH * nW

        output = output.view(nB, nA, (5 + nC), nH, nW)
        cls_grid = torch.linspace(5, 5 + nC - 1, nC).long().to(self.device)
        ix = torch.LongTensor(range(0, 5)).to(self.device)
        pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device)

        coord = output.index_select(2, ix[0:4]).view(
            nB * nA, -1, nH * nW).transpose(0, 1).contiguous().view(
                -1, cls_anchor_dim)  # x, y, w, h
        coord[0:2] = coord[0:2].sigmoid()
        conf = output.index_select(2, ix[4]).view(cls_anchor_dim).sigmoid()

        cls = output.index_select(2, cls_grid)
        cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(
            cls_anchor_dim, nC).to(self.device)

        t1 = time.time()
        grid_x = torch.linspace(0, nW - 1, nW).repeat(
            nB * nA, nH, 1).view(cls_anchor_dim).to(self.device)
        grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(
            nB * nA, 1, 1).view(cls_anchor_dim).to(self.device)
        anchor_w = anchors.index_select(1, ix[0]).repeat(
            nB, nH * nW).view(cls_anchor_dim)
        anchor_h = anchors.index_select(1, ix[1]).repeat(
            nB, nH * nW).view(cls_anchor_dim)

        pred_boxes[0] = coord[0] + grid_x
        pred_boxes[1] = coord[1] + grid_y
        pred_boxes[2] = coord[2].exp() * anchor_w
        pred_boxes[3] = coord[3].exp() * anchor_h
        # for build_targets. it works faster on CPU than on GPU
        pred_boxes = convert2cpu(
            pred_boxes.transpose(0, 1).contiguous().view(-1, 4)).detach()

        t2 = time.time()
        nGT, nRecall, nRecall75, obj_mask, noobj_mask, coord_mask, tcoord, tconf, tcls = \
            self.build_targets(pred_boxes, target.detach(), anchors.detach(), nA, nH, nW)

        tcls = tcls.view(cls_anchor_dim, nC).to(self.device)

        nProposals = int((conf > 0.25).sum())

        tcoord = tcoord.view(4, cls_anchor_dim).to(self.device)
        tconf = tconf.view(cls_anchor_dim).to(self.device)

        conf_mask = (obj_mask + noobj_mask).view(cls_anchor_dim).to(
            self.device)
        obj_mask = obj_mask.view(cls_anchor_dim).to(self.device)
        coord_mask = coord_mask.view(cls_anchor_dim).to(self.device)

        t3 = time.time()
        loss_coord = nn.MSELoss(reduction='sum')(coord * coord_mask,
                                                 tcoord * coord_mask) / nB
        loss_conf = nn.BCELoss(reduction='sum')(conf * conf_mask,
                                                tconf * conf_mask) / nB
        loss_cls = nn.BCEWithLogitsLoss(reduction='sum')(cls, tcls) / nB
        loss = loss_coord + loss_conf + loss_cls

        t4 = time.time()
        if False:
            print('-' * 30)
            print('        activation : %f' % (t1 - t0))
            print(' create pred_boxes : %f' % (t2 - t1))
            print('     build targets : %f' % (t3 - t2))
            print('       create loss : %f' % (t4 - t3))
            print('             total : %f' % (t4 - t0))
        print(
            '%d: Layer(%03d) nGT %3d, nRC %3d, nRC75 %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f'
            % (self.seen, self.nth_layer, nGT, nRecall, nRecall75, nProposals,
               loss_coord, loss_conf, loss_cls, loss))
        if math.isnan(loss.item()):
            print(conf, tconf)
            sys.exit(0)
        return loss
Example #10
0
def extract(grad):
    global saved_grad
    saved_grad = convert2cpu(grad.data)
Example #11
0
    def forward(self, output, target):
        #output : BxAs*(4+1+num_classes)*H*W
        t0 = time.time()
        nB = output.data.size(0)    # batch size
        nA = self.num_anchors
        nC = self.num_classes
        nH = output.data.size(2)
        nW = output.data.size(3)
        cls_anchor_dim = nB*nA*nH*nW

        if not isinstance(self.anchors, torch.Tensor):
            self.anchors = torch.FloatTensor(self.anchors).view(self.num_anchors, self.anchor_step).to(self.device)

        output = output.view(nB, nA, (5+nC), nH, nW)
        cls_grid = torch.linspace(5,5+nC-1,nC).long().to(self.device)
        ix = torch.LongTensor(range(0,5)).to(self.device)
        pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device)

        coord = output.index_select(2, ix[0:4]).view(nB*nA, -1, nH*nW).transpose(0,1).contiguous().view(-1,cls_anchor_dim)  # x, y, w, h
        coord[0:2] = coord[0:2].sigmoid()                                   # x, y
        conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid()
        cls  = output.index_select(2, cls_grid)
        cls  = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(cls_anchor_dim, nC)

        t1 = time.time()
        grid_x = torch.linspace(0, nW-1, nW).repeat(nB*nA, nH, 1).view(cls_anchor_dim).to(self.device)
        grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(cls_anchor_dim).to(self.device)
        anchor_w = self.anchors.index_select(1, ix[0]).repeat(1, nB*nH*nW).view(cls_anchor_dim)
        anchor_h = self.anchors.index_select(1, ix[1]).repeat(1, nB*nH*nW).view(cls_anchor_dim)

        pred_boxes[0] = coord[0] + grid_x
        pred_boxes[1] = coord[1] + grid_y
        pred_boxes[2] = coord[2].exp() * anchor_w
        pred_boxes[3] = coord[3].exp() * anchor_h
        # for build_targets. it works faster on CPU than on GPU
        pred_boxes = convert2cpu(pred_boxes.transpose(0,1).contiguous().view(-1,4)).detach()

        t2 = time.time()
        nGT, nRecall, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = \
            self.build_targets(pred_boxes, target.detach(), nH, nW)

        cls_mask = (cls_mask == 1)
        tcls = tcls[cls_mask].long().view(-1)
        cls_mask = cls_mask.view(-1, 1).repeat(1,nC).to(self.device)
        cls = cls[cls_mask].view(-1, nC)

        nProposals = int((conf > 0.25).sum())

        tcoord = tcoord.view(4, cls_anchor_dim).to(self.device)
        tconf, tcls = tconf.to(self.device), tcls.to(self.device)
        coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(self.device), conf_mask.sqrt().to(self.device)

        t3 = time.time()
        loss_coord = self.coord_scale * nn.MSELoss(size_average=False)(coord*coord_mask, tcoord*coord_mask)/2
        # sqrt(object_scale)/2 is almost equal to 1.
        loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2 
        loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls) if cls.size(0) > 0 else 0
        loss = loss_coord + loss_conf + loss_cls
        t4 = time.time()
        if False:
            print('-'*30)
            print('        activation : %f' % (t1 - t0))
            print(' create pred_boxes : %f' % (t2 - t1))
            print('     build targets : %f' % (t3 - t2))
            print('       create loss : %f' % (t4 - t3))
            print('             total : %f' % (t4 - t0))
        print('%d: nGT %3d, nRC %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f' 
            % (self.seen, nGT, nRecall, nProposals, loss_coord, loss_conf, loss_cls, loss))
        if math.isnan(loss.item()):
            print(conf, tconf)
            sys.exit(0)
        return loss
Example #12
0
def save_fc(fp, fc_model):
    # print('fc mode:')
    # print(fc_model)
    # fc_model.bias.data.numpy().tofile(fp)
    convert2cpu(fc_model.weight.data).numpy().tofile(fp)
Example #13
0
    def forward(self, output, target):
        #output : BxAs*(4+1+num_classes)*H*W
        mask_tuple = self.get_mask_boxes(output)
        t0 = time.time()
        nB = output.data.size(0)  # batch size
        nA = mask_tuple['n'].item()  # num_anchors
        nC = self.num_classes
        nF = self.num_props
        nH = output.data.size(2)
        nW = output.data.size(3)
        anchor_step = mask_tuple['a'].size(0) // nA
        anchors = mask_tuple['a'].view(nA, anchor_step).to(self.device)
        cls_anchor_dim = nB * nA * nH * nW

        # print 'shape of out put: ', output.shape, nC, nF
        # print '-'*100
        output = output.view(nB, nA, (5 + nC + nF), nH, nW)
        cls_grid = torch.linspace(5, 5 + nC - 1, nC).long().to(self.device)
        prop_grid = torch.linspace(5 + nC, 5 + nC + nF - 1,
                                   nF).long().to(self.device)
        ix = torch.LongTensor(range(0, 5)).to(self.device)
        pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device)

        coord = output.index_select(2, ix[0:4]).view(
            nB * nA, -1, nH * nW).transpose(0, 1).contiguous().view(
                -1, cls_anchor_dim)  # x, y, w, h
        coord[0:2] = coord[0:2].sigmoid()  # x, y
        conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid()
        cls = output.index_select(2, cls_grid)
        prop = output.index_select(2, prop_grid)
        cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(
            cls_anchor_dim, nC)
        prop = prop.view(nB * nA, nF,
                         nH * nW).transpose(1, 2).contiguous().view(
                             cls_anchor_dim, nF)

        t1 = time.time()
        grid_x = torch.linspace(0, nW - 1, nW).repeat(
            nB * nA, nH, 1).view(cls_anchor_dim).to(self.device)
        grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(
            nB * nA, 1, 1).view(cls_anchor_dim).to(self.device)
        anchor_w = anchors.index_select(1, ix[0]).repeat(
            1, nB * nH * nW).view(cls_anchor_dim)
        anchor_h = anchors.index_select(1, ix[1]).repeat(
            1, nB * nH * nW).view(cls_anchor_dim)

        pred_boxes[0] = coord[0] + grid_x
        pred_boxes[1] = coord[1] + grid_y
        pred_boxes[2] = coord[2].exp() * anchor_w
        pred_boxes[3] = coord[3].exp() * anchor_h
        # for build_targets. it works faster on CPU than on GPU
        pred_boxes = convert2cpu(
            pred_boxes.transpose(0, 1).contiguous().view(-1, 4)).detach()

        t2 = time.time()
        nGT, nRecall, nRecall75, coord_mask, conf_mask, cls_mask, prop_mask, tcoord, tconf, tcls, tprop = \
            self.build_targets(pred_boxes, target.detach(), anchors.detach(), nA, nH, nW)

        cls_mask = (cls_mask == 1)
        tcls = tcls[cls_mask].long().view(-1)
        cls_mask = cls_mask.view(-1, 1).repeat(1, nC).to(self.device)
        cls = cls[cls_mask].view(-1, nC)

        prop_masks = (prop_mask == 1)
        pro_mask = prop_masks.view(-1, 1).repeat(1, nF).to(self.device)
        propties = prop[pro_mask].view(-1, nF)
        loss_prop = 0.0
        if propties.size(0) > 0:
            for i in range(nF):
                tpropty = tprop[i]
                tpropty = tpropty[prop_masks].view(-1).to(self.device)
                propty = propties[:, i].sigmoid()
                # loss_prop += nn.BCELoss(size_average=False)(propty, tpropty)
                # propty = propties[:, i]
                loss_prop += nn.MSELoss(size_average=False)(propty, tpropty)

        nProposals = int((conf > 0.25).sum())
        tcoord = tcoord.view(4, cls_anchor_dim).to(self.device)
        tconf, tcls = tconf.to(self.device), tcls.to(self.device)
        coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(
            self.device), conf_mask.to(self.device)

        t3 = time.time()
        # n = torch.sum(coord_mask, 0)
        n = nGT / nB
        n = n / nA
        n = 0
        if n > 0:
            loss_coord = nn.MSELoss(size_average=False)(
                coord * coord_mask, tcoord * coord_mask) / n
            loss_coord = loss_coord**2
            loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask,
                                                       tconf * conf_mask) / n

            loss_cls = nn.CrossEntropyLoss(
                size_average=False)(cls, tcls) if cls.size(0) > 0 else 0
            loss_cls = 100 * loss_cls / n
            # loss_conf = torch.sqrt(loss_conf)
            # loss_conf = loss_conf / n
        else:
            loss_coord = nn.MSELoss(size_average=False)(
                coord * coord_mask, tcoord * coord_mask) / 2
            loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask,
                                                       tconf * conf_mask)
            # loss_coord = nn.MSELoss(size_average=False)(coord*coord_mask, tcoord*coord_mask)/2
            # loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)
            loss_cls = nn.CrossEntropyLoss(
                size_average=False)(cls, tcls) if cls.size(0) > 0 else 0

        coe_coord = 10.0
        coe_conf = 100.0
        #coe = [1.0, 5.5, 0.5, 0.3]
        coe = [1.0, 0.0, 0.0, 0.0]
        if loss_coord > 5:
            coe = [2.0, 1.0, 2.0, 0.0]
        elif loss_conf > 5:
            coe = [1.0, 1.0, 10.0, 0.0]
        else:
            # coe = [1.0, 1.0, 1.0, 1.0]
            coe = [1.0, 2.0, 1.0, 1.0]
            # if loss_cls > 1:
            #     coe = [0.5, 0.0, 0.5, 0.0]

            # else:
            #     coe = [0.4, 0.1, 0.1, 0.4]
        loss = coe[0] * loss_coord + coe[1] * loss_conf + coe[
            2] * loss_cls + coe[3] * loss_prop
        # loss = loss_coord + loss_conf + loss_cls # + loss_prop
        losses = [loss_coord, loss_conf, loss_cls, loss_prop]
        losses = [str(round(float(s), 5)) for s in losses]
        self.csv_write.write(','.join(losses) + '\n')
        t4 = time.time()
        if False:
            print('-' * 30)
            print('        activation : %f' % (t1 - t0))
            print(' create pred_boxes : %f' % (t2 - t1))
            print('     build targets : %f' % (t3 - t2))
            print('       create loss : %f' % (t4 - t3))
            print('             total : %f' % (t4 - t0))
        print(
            '[%s] %d: Layer(%03d) nGT %3d, nRC %3d, nRC75 %3d, nPP %3d, |loss: box =%6.3f, conf =%6.3f, class =%6.3f, prop =%6.3f, |total %7.3f|, num =%d'
            % (time.strftime('%H:%M:%S %d,%b.', time.localtime()), self.seen,
               self.nth_layer, nGT, nRecall, nRecall75, nProposals, loss_coord,
               loss_conf, loss_cls, loss_prop, loss, n))
        if math.isnan(loss.item()):
            print(conf, tconf)
            sys.exit(0)
        return loss
Example #14
0
def region_loss(output, target, config):

    anchors = config['anchors']
    n_b = output.data.size(0)
    n_a = len(anchors) // 2
    n_c = config['num_classes']
    n_h = output.data.size(2)
    n_w = output.data.size(3)

    output = output.view(n_b, n_a, (5 + n_c), n_h, n_w)
    x = F.sigmoid(
        output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(
            n_b, n_a, n_h, n_w))
    y = F.sigmoid(
        output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(
            n_b, n_a, n_h, n_w))
    w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(
        n_b, n_a, n_h, n_w)
    h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(
        n_b, n_a, n_h, n_w)
    conf = F.sigmoid(
        output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(
            n_b, n_a, n_h, n_w))
    cls = output.index_select(
        2, Variable(torch.linspace(5, 5 + n_c - 1, n_c).long().cuda()))
    cls = cls.view(n_b * n_a, n_c,
                   n_h * n_w).transpose(1, 2).contiguous().view(
                       n_b * n_a * n_h * n_w, n_c)

    pred_boxes = torch.cuda.FloatTensor(4, n_b * n_a * n_h * n_w)
    grid_x = torch.linspace(0, n_w - 1, n_w).repeat(n_h, 1).repeat(
        n_b * n_a, 1, 1).view(n_b * n_a * n_h * n_w).cuda()
    grid_y = torch.linspace(0, n_h - 1, n_h).repeat(n_w, 1).t().repeat(
        n_b * n_a, 1, 1).view(n_b * n_a * n_h * n_w).cuda()
    anchor_w = torch.Tensor(anchors).view(n_a, 2).index_select(
        1, torch.LongTensor([0])).cuda()
    anchor_h = torch.Tensor(anchors).view(n_a, 2).index_select(
        1, torch.LongTensor([1])).cuda()
    anchor_w = anchor_w.repeat(n_b,
                               1).repeat(1, 1,
                                         n_h * n_w).view(n_b * n_a * n_h * n_w)
    anchor_h = anchor_h.repeat(n_b,
                               1).repeat(1, 1,
                                         n_h * n_w).view(n_b * n_a * n_h * n_w)

    pred_boxes[0] = x.data + grid_x
    pred_boxes[1] = y.data + grid_y
    pred_boxes[2] = torch.exp(w.data) * anchor_w
    pred_boxes[3] = torch.exp(h.data) * anchor_h
    pred_boxes = convert2cpu(
        pred_boxes.transpose(0, 1).contiguous().view(-1, 4))

    n_gt, n_correct, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf,\
    tcls = build_targets(pred_boxes, target.data, anchors, n_a, n_h, n_w,
                         config)
    cls_mask = (cls_mask == 1)
    n_proposals = int((conf > .25).sum().data[0])

    tx = Variable(tx.cuda())
    ty = Variable(ty.cuda())
    tw = Variable(tw.cuda())
    th = Variable(th.cuda())
    tconf = Variable(tconf.cuda())
    tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())

    coord_mask = Variable(coord_mask.cuda())
    conf_mask = Variable(conf_mask.cuda().sqrt())
    cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, n_c).cuda())
    cls = cls[cls_mask].view(-1, n_c)

    coord_scale = config['coord_scale']
    class_scale = config['class_scale']
    loss_x = coord_scale * nn.MSELoss(size_average=False)(x * coord_mask,
                                                          tx * coord_mask) / 2
    loss_y = coord_scale * nn.MSELoss(size_average=False)(y * coord_mask,
                                                          ty * coord_mask) / 2
    loss_w = coord_scale * nn.MSELoss(size_average=False)(w * coord_mask,
                                                          tw * coord_mask) / 2
    loss_h = coord_scale * nn.MSELoss(size_average=False)(h * coord_mask,
                                                          th * coord_mask) / 2
    loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask,
                                               tconf * conf_mask) / 2
    loss_cls = class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
    loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
    print('nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f,'
          ' conf %f, cls %f, total %f' %
          (n_gt, n_correct, n_proposals, loss_x.data[0], loss_y.data[0],
           loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0],
           loss.data[0]))

    return loss
Example #15
0
    def forward(self, output, target):
        # Parameters
        t0 = time.time()
        nB = output.data.size(0)
        nA = self.num_anchors
        nC = self.num_classes
        nH = output.data.size(2)
        nW = output.data.size(3)

        # Activation
        output = output.view(nB, nA, (19+nC), nH, nW)
        x0     = torch.sigmoid(output.index_select(2, torch.cuda.LongTensor([0])).view(nB, nA, nH, nW))
        y0     = torch.sigmoid(output.index_select(2, torch.cuda.LongTensor([1])).view(nB, nA, nH, nW))
        x1     = output.index_select(2, torch.cuda.LongTensor([2])).view(nB, nA, nH, nW)
        y1     = output.index_select(2, torch.cuda.LongTensor([3])).view(nB, nA, nH, nW)
        x2     = output.index_select(2, torch.cuda.LongTensor([4])).view(nB, nA, nH, nW)
        y2     = output.index_select(2, torch.cuda.LongTensor([5])).view(nB, nA, nH, nW)
        x3     = output.index_select(2, torch.cuda.LongTensor([6])).view(nB, nA, nH, nW)
        y3     = output.index_select(2, torch.cuda.LongTensor([7])).view(nB, nA, nH, nW)
        x4     = output.index_select(2, torch.cuda.LongTensor([8])).view(nB, nA, nH, nW)
        y4     = output.index_select(2, torch.cuda.LongTensor([9])).view(nB, nA, nH, nW)
        x5     = output.index_select(2, torch.cuda.LongTensor([10])).view(nB, nA, nH, nW)
        y5     = output.index_select(2, torch.cuda.LongTensor([11])).view(nB, nA, nH, nW)
        x6     = output.index_select(2, torch.cuda.LongTensor([12])).view(nB, nA, nH, nW)
        y6     = output.index_select(2, torch.cuda.LongTensor([13])).view(nB, nA, nH, nW)
        x7     = output.index_select(2, torch.cuda.LongTensor([14])).view(nB, nA, nH, nW)
        y7     = output.index_select(2, torch.cuda.LongTensor([15])).view(nB, nA, nH, nW)
        x8     = output.index_select(2, torch.cuda.LongTensor([16])).view(nB, nA, nH, nW)
        y8     = output.index_select(2, torch.cuda.LongTensor([17])).view(nB, nA, nH, nW)
        conf   = torch.sigmoid(output.index_select(2, torch.cuda.LongTensor([18])).view(nB, nA, nH, nW))
        mycls    = output.index_select(2, torch.cuda.linspace(19,19+nC-1,nC))
        mycls    = mycls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
        t1     = time.time()

        # Create pred boxes
        pred_corners = torch.cuda.FloatTensor(18, nB*nA*nH*nW)
        grid_x = torch.linspace(0, nW-1, nW).repeat(nH,1).repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
        grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda()
        pred_corners[0]  = (x0.data.view_as(grid_x) + grid_x) / nW
        pred_corners[1]  = (y0.data.view_as(grid_y) + grid_y) / nH
        pred_corners[2]  = (x1.data.view_as(grid_x) + grid_x) / nW
        pred_corners[3]  = (y1.data.view_as(grid_y) + grid_y) / nH
        pred_corners[4]  = (x2.data.view_as(grid_x) + grid_x) / nW
        pred_corners[5]  = (y2.data.view_as(grid_y) + grid_y) / nH
        pred_corners[6]  = (x3.data.view_as(grid_x) + grid_x) / nW
        pred_corners[7]  = (y3.data.view_as(grid_y) + grid_y) / nH
        pred_corners[8]  = (x4.data.view_as(grid_x) + grid_x) / nW
        pred_corners[9]  = (y4.data.view_as(grid_y) + grid_y) / nH
        pred_corners[10] = (x5.data.view_as(grid_x) + grid_x) / nW
        pred_corners[11] = (y5.data.view_as(grid_y) + grid_y) / nH
        pred_corners[12] = (x6.data.view_as(grid_x) + grid_x) / nW
        pred_corners[13] = (y6.data.view_as(grid_y) + grid_y) / nH
        pred_corners[14] = (x7.data.view_as(grid_x) + grid_x) / nW
        pred_corners[15] = (y7.data.view_as(grid_y) + grid_y) / nH
        pred_corners[16] = (x8.data.view_as(grid_x) + grid_x) / nW
        pred_corners[17] = (y8.data.view_as(grid_y) + grid_y) / nH
        gpu_matrix = pred_corners.transpose(0,1).contiguous().view(-1,18)
        pred_corners = utils.convert2cpu(gpu_matrix)
        t2 = time.time()

        # Build targets
        nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx0, tx1, tx2, tx3, tx4, tx5, tx6, tx7, tx8, ty0, ty1, ty2, ty3, ty4, ty5, ty6, ty7, ty8, tconf, tcls = \
                       build_targets(pred_corners, target.data, self.anchors, nA, nC, nH, nW, self.noobject_scale, self.object_scale, self.thresh, self.seen)
        cls_mask   = (cls_mask == 1)
        nProposals = int((conf > 0.25).sum().data[0])
        tx0        = tx0.cuda()
        ty0        = ty0.cuda()
        tx1        = tx1.cuda()
        ty1        = ty1.cuda()
        tx2        = tx2.cuda()
        ty2        = ty2.cuda()
        tx3        = tx3.cuda()
        ty3        = ty3.cuda()
        tx4        = tx4.cuda()
        ty4        = ty4.cuda()
        tx5        = tx5.cuda()
        ty5        = ty5.cuda()
        tx6        = tx6.cuda()
        ty6        = ty6.cuda()
        tx7        = tx7.cuda()
        ty7        = ty7.cuda()
        tx8        = tx8.cuda()
        ty8        = ty8.cuda()
        tconf      = tconf.cuda()
        tcls       = tcls[cls_mask].long().cuda()
        coord_mask = coord_mask.cuda()
        conf_mask  = conf_mask.cuda().sqrt()
        cls_mask   = cls_mask.view(-1, 1).repeat(1,nC).cuda()
        mycls        = mycls[cls_mask].view(-1, nC)  
        t3 = time.time()

        # Create loss
        loss_x0    = self.coord_scale * nn.MSELoss(size_average=False)(x0*coord_mask, tx0*coord_mask)/2.0
        loss_y0    = self.coord_scale * nn.MSELoss(size_average=False)(y0*coord_mask, ty0*coord_mask)/2.0
        loss_x1    = self.coord_scale * nn.MSELoss(size_average=False)(x1*coord_mask, tx1*coord_mask)/2.0
        loss_y1    = self.coord_scale * nn.MSELoss(size_average=False)(y1*coord_mask, ty1*coord_mask)/2.0
        loss_x2    = self.coord_scale * nn.MSELoss(size_average=False)(x2*coord_mask, tx2*coord_mask)/2.0
        loss_y2    = self.coord_scale * nn.MSELoss(size_average=False)(y2*coord_mask, ty2*coord_mask)/2.0
        loss_x3    = self.coord_scale * nn.MSELoss(size_average=False)(x3*coord_mask, tx3*coord_mask)/2.0
        loss_y3    = self.coord_scale * nn.MSELoss(size_average=False)(y3*coord_mask, ty3*coord_mask)/2.0
        loss_x4    = self.coord_scale * nn.MSELoss(size_average=False)(x4*coord_mask, tx4*coord_mask)/2.0
        loss_y4    = self.coord_scale * nn.MSELoss(size_average=False)(y4*coord_mask, ty4*coord_mask)/2.0
        loss_x5    = self.coord_scale * nn.MSELoss(size_average=False)(x5*coord_mask, tx5*coord_mask)/2.0
        loss_y5    = self.coord_scale * nn.MSELoss(size_average=False)(y5*coord_mask, ty5*coord_mask)/2.0
        loss_x6    = self.coord_scale * nn.MSELoss(size_average=False)(x6*coord_mask, tx6*coord_mask)/2.0
        loss_y6    = self.coord_scale * nn.MSELoss(size_average=False)(y6*coord_mask, ty6*coord_mask)/2.0
        loss_x7    = self.coord_scale * nn.MSELoss(size_average=False)(x7*coord_mask, tx7*coord_mask)/2.0
        loss_y7    = self.coord_scale * nn.MSELoss(size_average=False)(y7*coord_mask, ty7*coord_mask)/2.0
        loss_x8    = self.coord_scale * nn.MSELoss(size_average=False)(x8*coord_mask, tx8*coord_mask)/2.0
        loss_y8    = self.coord_scale * nn.MSELoss(size_average=False)(y8*coord_mask, ty8*coord_mask)/2.0
        loss_conf  = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2.0
        loss_x     = loss_x0 + loss_x1 + loss_x2 + loss_x3 + loss_x4 + loss_x5 + loss_x6 + loss_x7 + loss_x8 
        loss_y     = loss_y0 + loss_y1 + loss_y2 + loss_y3 + loss_y4 + loss_y5 + loss_y6 + loss_y7 + loss_y8 
        
        loss_cls   = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
        loss   = loss_x + loss_y + loss_conf + loss_cls
        print('%d: nGT %d, recall %d, proposals %d, loss: x0: %f x %f, y0: %f y %f, conf %f, cls %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x0.data[0], loss_x.data[0], loss_y0.data[0], loss_y.data[0], loss_conf.data[0], loss_cls.data[0], loss.data[0]))
        #else:
        #    loss   = loss_x + loss_y + loss_conf
        #    print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, conf %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_conf.data[0], loss.data[0]))

        t4 = time.time()

        if False:
            print('-----------------------------------')
            print('          activation : %f' % (t1 - t0))
            print(' create pred_corners : %f' % (t2 - t1))
            print('       build targets : %f' % (t3 - t2))
            print('         create loss : %f' % (t4 - t3))
            print('               total : %f' % (t4 - t0))

        return loss