def save_fc(fp, fc_model): if fc_model.bias.is_cuda: convert2cpu(fc_model.bias.data).numpy().tofile(fp) convert2cpu(fc_model.weight.data).numpy().tofile(fp) else: fc_model.bias.data.numpy().tofile(fp) fc_model.weight.data.numpy().tofile(fp)
def save_conv(fp, conv_model): if conv_model.bias.is_cuda: convert2cpu(conv_model.bias.data).numpy().tofile(fp) convert2cpu(conv_model.weight.data).numpy().tofile(fp) else: conv_model.bias.data.numpy().tofile(fp) conv_model.weight.data.numpy().tofile(fp)
def save_deform_conv(fp, conv_model): if conv_model.weight.is_cuda: convert2cpu(conv_model.weight.data).numpy().tofile(fp) convert2cpu(conv_model.layer_1.weight.data).numpy().tofile(fp) else: conv_model.weight.data.numpy().tofile(fp) conv_model.layer_1.weight.data.numpy().tofile(fp)
def save_conv(fp, conv_model): """ Save convolutional model """ if conv_model.bias.is_cuda: convert2cpu(conv_model.bias.data).numpy().tofile(fp) convert2cpu(conv_model.weight.data).numpy().tofile(fp) else: conv_model.bias.data.numpy().tofile(fp) conv_model.weight.data.numpy().tofile(fp)
def save_conv_bn(fp, conv_model, bn_model): if bn_model.bias.is_cuda: convert2cpu(bn_model.bias.data).numpy().tofile(fp) convert2cpu(bn_model.weight.data).numpy().tofile(fp) convert2cpu(bn_model.running_mean).numpy().tofile(fp) convert2cpu(bn_model.running_var).numpy().tofile(fp) convert2cpu(conv_model.weight.data).numpy().tofile(fp) else: bn_model.bias.data.numpy().tofile(fp) bn_model.weight.data.numpy().tofile(fp) bn_model.running_mean.numpy().tofile(fp) bn_model.running_var.numpy().tofile(fp) conv_model.weight.data.numpy().tofile(fp)
def save_conv_bn(fp, conv_model, bn_model): """ Save batch normalized convolutional model """ if bn_model.bias.is_cuda: convert2cpu(bn_model.bias.data).numpy().tofile(fp) convert2cpu(bn_model.weight.data).numpy().tofile(fp) convert2cpu(bn_model.running_mean).numpy().tofile(fp) convert2cpu(bn_model.running_var).numpy().tofile(fp) convert2cpu(conv_model.weight.data).numpy().tofile(fp) else: bn_model.bias.data.numpy().tofile(fp) bn_model.weight.data.numpy().tofile(fp) bn_model.running_mean.numpy().tofile(fp) bn_model.running_var.numpy().tofile(fp) conv_model.weight.data.numpy().tofile(fp)
def save_conv_target_class(fp, conv_model, targetclass, numclass): print( 'save weight with the new target number classes: '.format(targetclass)) if targetclass < numclass: ### the way yolov3 calculate is (numclass + 5)*3 differ = (numclass - targetclass) * 3 else: differ = (targetclass - numclass) * 3 print('differ: ', differ) if conv_model.bias.is_cuda: convert2cpu(conv_model.bias.data).numpy().tofile(fp) convert2cpu(conv_model.bias.data[:differ]).numpy().tofile(fp) convert2cpu(conv_model.weight.data).numpy().tofile(fp) convert2cpu(conv_model.weight.data[:differ]).numpy().tofile(fp) else: conv_model.bias.data.numpy().tofile(fp) conv_model.bias.data[:differ].numpy().tofile(fp) conv_model.weight.data.numpy().tofile(fp) conv_model.weight.data[:differ].numpy().tofile(fp)
def forward(self, x, y = None): ind = -2 self.loss = None self.interParam = [] self.bn_weight_params = [] outputs = dict() out_predicts = [] for block in self.blocks: ind = ind + 1 #if ind > 0: # return x if block['type'] == 'net': continue elif block['type'] in ['convolutional', 'deconvolutional', 'maxpool', 'reorg', 'upsample', 'avgpool', 'softmax', 'connected']: x = self.models[ind](x) if block['type'] == 'convolutional' and ind >= 53: for module_name, module in self.models[ind].named_children(): if module_name.startswith('bn'): for param_name, param in module.named_parameters(): if param_name == 'weight': self.bn_weight_params.append(convert2cpu(param)) outputs[ind] = x elif block['type'] == 'route': layers = block['layers'].split(',') layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers] layerlen = len(layers) assert (layerlen >= 1) x = outputs[layers[0]] if layerlen > 1: for i in range(1, layerlen): x = torch.cat((x,outputs[layers[i]]), 1) outputs[ind] = x elif block['type'] == 'shortcut': from_layer = int(block['from']) activation = block['activation'] from_layer = from_layer if from_layer > 0 else from_layer + ind x1 = outputs[from_layer] x2 = outputs[ind-1] x = x1 + x2 if activation == 'leaky': x = F.leaky_relu(x, 0.1, inplace=True) elif activation == 'relu': x = F.relu(x, inplace=True) outputs[ind] = x elif block['type'] == 'region': continue if self.loss: self.loss = self.loss + self.models[ind](x) else: self.loss = self.models[ind](x) outputs[ind] = None elif block['type'] in ['yolo', 'pose', 'pose-2d', 'pose-ind', 'pose-part', 'pose-seg', 'pose-3dr', 'pose-3drseg', 'pose-pnp']: layerId = ("L%03d" % int(ind)) if self.training: loss, param = self.models[ind](x, y, [self.seen]) #self.no_reg_loss = loss.item() # Compute regularization #all_bn_weights = torch.cat([x.view(-1) for x in self.bn_weight_params]) #l1_regularization = torch.norm(all_bn_weights, 1) #self.l1_reg_only = l1_regularization.item() #loss = torch.add(loss, l1_regularization, alpha=self.bn_regularization_lambda) #print('no_reg_loss: ' + str(self.no_reg_loss) + '\tl1_reg_only: ' + str(self.l1_reg_only), '\tregularized_loss: ' + str(self.regularized_loss)) if self.loss: self.loss = self.loss + loss else: self.loss = loss self.interParam.append([layerId, block['type'], param]) else: pred = self.models[ind](x, None) out_predicts.append([layerId, block['type'], pred]) elif block['type'] == 'cost': continue else: print('unknown type %s' % (block['type'])) if self.training: return self.loss, self.interParam else: return out_predicts
def forward(self, output, target): #output : BxAs*(4+1+num_classes)*H*W mask_tuple = self.get_mask_boxes(output) t0 = time.time() nB = output.data.size(0) # batch size nA = mask_tuple['n'].item() # num_anchors nC = self.num_classes nH = output.data.size(2) nW = output.data.size(3) anchor_step = mask_tuple['a'].size(0) // nA anchors = mask_tuple['a'].view(nA, anchor_step).to(self.device) cls_anchor_dim = nB * nA * nH * nW output = output.view(nB, nA, (5 + nC), nH, nW) cls_grid = torch.linspace(5, 5 + nC - 1, nC).long().to(self.device) ix = torch.LongTensor(range(0, 5)).to(self.device) pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device) coord = output.index_select(2, ix[0:4]).view( nB * nA, -1, nH * nW).transpose(0, 1).contiguous().view( -1, cls_anchor_dim) # x, y, w, h coord[0:2] = coord[0:2].sigmoid() conf = output.index_select(2, ix[4]).view(cls_anchor_dim).sigmoid() cls = output.index_select(2, cls_grid) cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view( cls_anchor_dim, nC).to(self.device) t1 = time.time() grid_x = torch.linspace(0, nW - 1, nW).repeat( nB * nA, nH, 1).view(cls_anchor_dim).to(self.device) grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat( nB * nA, 1, 1).view(cls_anchor_dim).to(self.device) anchor_w = anchors.index_select(1, ix[0]).repeat( nB, nH * nW).view(cls_anchor_dim) anchor_h = anchors.index_select(1, ix[1]).repeat( nB, nH * nW).view(cls_anchor_dim) pred_boxes[0] = coord[0] + grid_x pred_boxes[1] = coord[1] + grid_y pred_boxes[2] = coord[2].exp() * anchor_w pred_boxes[3] = coord[3].exp() * anchor_h # for build_targets. it works faster on CPU than on GPU pred_boxes = convert2cpu( pred_boxes.transpose(0, 1).contiguous().view(-1, 4)).detach() t2 = time.time() nGT, nRecall, nRecall75, obj_mask, noobj_mask, coord_mask, tcoord, tconf, tcls = \ self.build_targets(pred_boxes, target.detach(), anchors.detach(), nA, nH, nW) tcls = tcls.view(cls_anchor_dim, nC).to(self.device) nProposals = int((conf > 0.25).sum()) tcoord = tcoord.view(4, cls_anchor_dim).to(self.device) tconf = tconf.view(cls_anchor_dim).to(self.device) conf_mask = (obj_mask + noobj_mask).view(cls_anchor_dim).to( self.device) obj_mask = obj_mask.view(cls_anchor_dim).to(self.device) coord_mask = coord_mask.view(cls_anchor_dim).to(self.device) t3 = time.time() loss_coord = nn.MSELoss(reduction='sum')(coord * coord_mask, tcoord * coord_mask) / nB loss_conf = nn.BCELoss(reduction='sum')(conf * conf_mask, tconf * conf_mask) / nB loss_cls = nn.BCEWithLogitsLoss(reduction='sum')(cls, tcls) / nB loss = loss_coord + loss_conf + loss_cls t4 = time.time() if False: print('-' * 30) print(' activation : %f' % (t1 - t0)) print(' create pred_boxes : %f' % (t2 - t1)) print(' build targets : %f' % (t3 - t2)) print(' create loss : %f' % (t4 - t3)) print(' total : %f' % (t4 - t0)) print( '%d: Layer(%03d) nGT %3d, nRC %3d, nRC75 %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f' % (self.seen, self.nth_layer, nGT, nRecall, nRecall75, nProposals, loss_coord, loss_conf, loss_cls, loss)) if math.isnan(loss.item()): print(conf, tconf) sys.exit(0) return loss
def extract(grad): global saved_grad saved_grad = convert2cpu(grad.data)
def forward(self, output, target): #output : BxAs*(4+1+num_classes)*H*W t0 = time.time() nB = output.data.size(0) # batch size nA = self.num_anchors nC = self.num_classes nH = output.data.size(2) nW = output.data.size(3) cls_anchor_dim = nB*nA*nH*nW if not isinstance(self.anchors, torch.Tensor): self.anchors = torch.FloatTensor(self.anchors).view(self.num_anchors, self.anchor_step).to(self.device) output = output.view(nB, nA, (5+nC), nH, nW) cls_grid = torch.linspace(5,5+nC-1,nC).long().to(self.device) ix = torch.LongTensor(range(0,5)).to(self.device) pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device) coord = output.index_select(2, ix[0:4]).view(nB*nA, -1, nH*nW).transpose(0,1).contiguous().view(-1,cls_anchor_dim) # x, y, w, h coord[0:2] = coord[0:2].sigmoid() # x, y conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid() cls = output.index_select(2, cls_grid) cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(cls_anchor_dim, nC) t1 = time.time() grid_x = torch.linspace(0, nW-1, nW).repeat(nB*nA, nH, 1).view(cls_anchor_dim).to(self.device) grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(cls_anchor_dim).to(self.device) anchor_w = self.anchors.index_select(1, ix[0]).repeat(1, nB*nH*nW).view(cls_anchor_dim) anchor_h = self.anchors.index_select(1, ix[1]).repeat(1, nB*nH*nW).view(cls_anchor_dim) pred_boxes[0] = coord[0] + grid_x pred_boxes[1] = coord[1] + grid_y pred_boxes[2] = coord[2].exp() * anchor_w pred_boxes[3] = coord[3].exp() * anchor_h # for build_targets. it works faster on CPU than on GPU pred_boxes = convert2cpu(pred_boxes.transpose(0,1).contiguous().view(-1,4)).detach() t2 = time.time() nGT, nRecall, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = \ self.build_targets(pred_boxes, target.detach(), nH, nW) cls_mask = (cls_mask == 1) tcls = tcls[cls_mask].long().view(-1) cls_mask = cls_mask.view(-1, 1).repeat(1,nC).to(self.device) cls = cls[cls_mask].view(-1, nC) nProposals = int((conf > 0.25).sum()) tcoord = tcoord.view(4, cls_anchor_dim).to(self.device) tconf, tcls = tconf.to(self.device), tcls.to(self.device) coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(self.device), conf_mask.sqrt().to(self.device) t3 = time.time() loss_coord = self.coord_scale * nn.MSELoss(size_average=False)(coord*coord_mask, tcoord*coord_mask)/2 # sqrt(object_scale)/2 is almost equal to 1. loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2 loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls) if cls.size(0) > 0 else 0 loss = loss_coord + loss_conf + loss_cls t4 = time.time() if False: print('-'*30) print(' activation : %f' % (t1 - t0)) print(' create pred_boxes : %f' % (t2 - t1)) print(' build targets : %f' % (t3 - t2)) print(' create loss : %f' % (t4 - t3)) print(' total : %f' % (t4 - t0)) print('%d: nGT %3d, nRC %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f' % (self.seen, nGT, nRecall, nProposals, loss_coord, loss_conf, loss_cls, loss)) if math.isnan(loss.item()): print(conf, tconf) sys.exit(0) return loss
def save_fc(fp, fc_model): # print('fc mode:') # print(fc_model) # fc_model.bias.data.numpy().tofile(fp) convert2cpu(fc_model.weight.data).numpy().tofile(fp)
def forward(self, output, target): #output : BxAs*(4+1+num_classes)*H*W mask_tuple = self.get_mask_boxes(output) t0 = time.time() nB = output.data.size(0) # batch size nA = mask_tuple['n'].item() # num_anchors nC = self.num_classes nF = self.num_props nH = output.data.size(2) nW = output.data.size(3) anchor_step = mask_tuple['a'].size(0) // nA anchors = mask_tuple['a'].view(nA, anchor_step).to(self.device) cls_anchor_dim = nB * nA * nH * nW # print 'shape of out put: ', output.shape, nC, nF # print '-'*100 output = output.view(nB, nA, (5 + nC + nF), nH, nW) cls_grid = torch.linspace(5, 5 + nC - 1, nC).long().to(self.device) prop_grid = torch.linspace(5 + nC, 5 + nC + nF - 1, nF).long().to(self.device) ix = torch.LongTensor(range(0, 5)).to(self.device) pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device) coord = output.index_select(2, ix[0:4]).view( nB * nA, -1, nH * nW).transpose(0, 1).contiguous().view( -1, cls_anchor_dim) # x, y, w, h coord[0:2] = coord[0:2].sigmoid() # x, y conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid() cls = output.index_select(2, cls_grid) prop = output.index_select(2, prop_grid) cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view( cls_anchor_dim, nC) prop = prop.view(nB * nA, nF, nH * nW).transpose(1, 2).contiguous().view( cls_anchor_dim, nF) t1 = time.time() grid_x = torch.linspace(0, nW - 1, nW).repeat( nB * nA, nH, 1).view(cls_anchor_dim).to(self.device) grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat( nB * nA, 1, 1).view(cls_anchor_dim).to(self.device) anchor_w = anchors.index_select(1, ix[0]).repeat( 1, nB * nH * nW).view(cls_anchor_dim) anchor_h = anchors.index_select(1, ix[1]).repeat( 1, nB * nH * nW).view(cls_anchor_dim) pred_boxes[0] = coord[0] + grid_x pred_boxes[1] = coord[1] + grid_y pred_boxes[2] = coord[2].exp() * anchor_w pred_boxes[3] = coord[3].exp() * anchor_h # for build_targets. it works faster on CPU than on GPU pred_boxes = convert2cpu( pred_boxes.transpose(0, 1).contiguous().view(-1, 4)).detach() t2 = time.time() nGT, nRecall, nRecall75, coord_mask, conf_mask, cls_mask, prop_mask, tcoord, tconf, tcls, tprop = \ self.build_targets(pred_boxes, target.detach(), anchors.detach(), nA, nH, nW) cls_mask = (cls_mask == 1) tcls = tcls[cls_mask].long().view(-1) cls_mask = cls_mask.view(-1, 1).repeat(1, nC).to(self.device) cls = cls[cls_mask].view(-1, nC) prop_masks = (prop_mask == 1) pro_mask = prop_masks.view(-1, 1).repeat(1, nF).to(self.device) propties = prop[pro_mask].view(-1, nF) loss_prop = 0.0 if propties.size(0) > 0: for i in range(nF): tpropty = tprop[i] tpropty = tpropty[prop_masks].view(-1).to(self.device) propty = propties[:, i].sigmoid() # loss_prop += nn.BCELoss(size_average=False)(propty, tpropty) # propty = propties[:, i] loss_prop += nn.MSELoss(size_average=False)(propty, tpropty) nProposals = int((conf > 0.25).sum()) tcoord = tcoord.view(4, cls_anchor_dim).to(self.device) tconf, tcls = tconf.to(self.device), tcls.to(self.device) coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to( self.device), conf_mask.to(self.device) t3 = time.time() # n = torch.sum(coord_mask, 0) n = nGT / nB n = n / nA n = 0 if n > 0: loss_coord = nn.MSELoss(size_average=False)( coord * coord_mask, tcoord * coord_mask) / n loss_coord = loss_coord**2 loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / n loss_cls = nn.CrossEntropyLoss( size_average=False)(cls, tcls) if cls.size(0) > 0 else 0 loss_cls = 100 * loss_cls / n # loss_conf = torch.sqrt(loss_conf) # loss_conf = loss_conf / n else: loss_coord = nn.MSELoss(size_average=False)( coord * coord_mask, tcoord * coord_mask) / 2 loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) # loss_coord = nn.MSELoss(size_average=False)(coord*coord_mask, tcoord*coord_mask)/2 # loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask) loss_cls = nn.CrossEntropyLoss( size_average=False)(cls, tcls) if cls.size(0) > 0 else 0 coe_coord = 10.0 coe_conf = 100.0 #coe = [1.0, 5.5, 0.5, 0.3] coe = [1.0, 0.0, 0.0, 0.0] if loss_coord > 5: coe = [2.0, 1.0, 2.0, 0.0] elif loss_conf > 5: coe = [1.0, 1.0, 10.0, 0.0] else: # coe = [1.0, 1.0, 1.0, 1.0] coe = [1.0, 2.0, 1.0, 1.0] # if loss_cls > 1: # coe = [0.5, 0.0, 0.5, 0.0] # else: # coe = [0.4, 0.1, 0.1, 0.4] loss = coe[0] * loss_coord + coe[1] * loss_conf + coe[ 2] * loss_cls + coe[3] * loss_prop # loss = loss_coord + loss_conf + loss_cls # + loss_prop losses = [loss_coord, loss_conf, loss_cls, loss_prop] losses = [str(round(float(s), 5)) for s in losses] self.csv_write.write(','.join(losses) + '\n') t4 = time.time() if False: print('-' * 30) print(' activation : %f' % (t1 - t0)) print(' create pred_boxes : %f' % (t2 - t1)) print(' build targets : %f' % (t3 - t2)) print(' create loss : %f' % (t4 - t3)) print(' total : %f' % (t4 - t0)) print( '[%s] %d: Layer(%03d) nGT %3d, nRC %3d, nRC75 %3d, nPP %3d, |loss: box =%6.3f, conf =%6.3f, class =%6.3f, prop =%6.3f, |total %7.3f|, num =%d' % (time.strftime('%H:%M:%S %d,%b.', time.localtime()), self.seen, self.nth_layer, nGT, nRecall, nRecall75, nProposals, loss_coord, loss_conf, loss_cls, loss_prop, loss, n)) if math.isnan(loss.item()): print(conf, tconf) sys.exit(0) return loss
def region_loss(output, target, config): anchors = config['anchors'] n_b = output.data.size(0) n_a = len(anchors) // 2 n_c = config['num_classes'] n_h = output.data.size(2) n_w = output.data.size(3) output = output.view(n_b, n_a, (5 + n_c), n_h, n_w) x = F.sigmoid( output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view( n_b, n_a, n_h, n_w)) y = F.sigmoid( output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view( n_b, n_a, n_h, n_w)) w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view( n_b, n_a, n_h, n_w) h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view( n_b, n_a, n_h, n_w) conf = F.sigmoid( output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view( n_b, n_a, n_h, n_w)) cls = output.index_select( 2, Variable(torch.linspace(5, 5 + n_c - 1, n_c).long().cuda())) cls = cls.view(n_b * n_a, n_c, n_h * n_w).transpose(1, 2).contiguous().view( n_b * n_a * n_h * n_w, n_c) pred_boxes = torch.cuda.FloatTensor(4, n_b * n_a * n_h * n_w) grid_x = torch.linspace(0, n_w - 1, n_w).repeat(n_h, 1).repeat( n_b * n_a, 1, 1).view(n_b * n_a * n_h * n_w).cuda() grid_y = torch.linspace(0, n_h - 1, n_h).repeat(n_w, 1).t().repeat( n_b * n_a, 1, 1).view(n_b * n_a * n_h * n_w).cuda() anchor_w = torch.Tensor(anchors).view(n_a, 2).index_select( 1, torch.LongTensor([0])).cuda() anchor_h = torch.Tensor(anchors).view(n_a, 2).index_select( 1, torch.LongTensor([1])).cuda() anchor_w = anchor_w.repeat(n_b, 1).repeat(1, 1, n_h * n_w).view(n_b * n_a * n_h * n_w) anchor_h = anchor_h.repeat(n_b, 1).repeat(1, 1, n_h * n_w).view(n_b * n_a * n_h * n_w) pred_boxes[0] = x.data + grid_x pred_boxes[1] = y.data + grid_y pred_boxes[2] = torch.exp(w.data) * anchor_w pred_boxes[3] = torch.exp(h.data) * anchor_h pred_boxes = convert2cpu( pred_boxes.transpose(0, 1).contiguous().view(-1, 4)) n_gt, n_correct, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf,\ tcls = build_targets(pred_boxes, target.data, anchors, n_a, n_h, n_w, config) cls_mask = (cls_mask == 1) n_proposals = int((conf > .25).sum().data[0]) tx = Variable(tx.cuda()) ty = Variable(ty.cuda()) tw = Variable(tw.cuda()) th = Variable(th.cuda()) tconf = Variable(tconf.cuda()) tcls = Variable(tcls.view(-1)[cls_mask].long().cuda()) coord_mask = Variable(coord_mask.cuda()) conf_mask = Variable(conf_mask.cuda().sqrt()) cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, n_c).cuda()) cls = cls[cls_mask].view(-1, n_c) coord_scale = config['coord_scale'] class_scale = config['class_scale'] loss_x = coord_scale * nn.MSELoss(size_average=False)(x * coord_mask, tx * coord_mask) / 2 loss_y = coord_scale * nn.MSELoss(size_average=False)(y * coord_mask, ty * coord_mask) / 2 loss_w = coord_scale * nn.MSELoss(size_average=False)(w * coord_mask, tw * coord_mask) / 2 loss_h = coord_scale * nn.MSELoss(size_average=False)(h * coord_mask, th * coord_mask) / 2 loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2 loss_cls = class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls) loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls print('nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f,' ' conf %f, cls %f, total %f' % (n_gt, n_correct, n_proposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss.data[0])) return loss
def forward(self, output, target): # Parameters t0 = time.time() nB = output.data.size(0) nA = self.num_anchors nC = self.num_classes nH = output.data.size(2) nW = output.data.size(3) # Activation output = output.view(nB, nA, (19+nC), nH, nW) x0 = torch.sigmoid(output.index_select(2, torch.cuda.LongTensor([0])).view(nB, nA, nH, nW)) y0 = torch.sigmoid(output.index_select(2, torch.cuda.LongTensor([1])).view(nB, nA, nH, nW)) x1 = output.index_select(2, torch.cuda.LongTensor([2])).view(nB, nA, nH, nW) y1 = output.index_select(2, torch.cuda.LongTensor([3])).view(nB, nA, nH, nW) x2 = output.index_select(2, torch.cuda.LongTensor([4])).view(nB, nA, nH, nW) y2 = output.index_select(2, torch.cuda.LongTensor([5])).view(nB, nA, nH, nW) x3 = output.index_select(2, torch.cuda.LongTensor([6])).view(nB, nA, nH, nW) y3 = output.index_select(2, torch.cuda.LongTensor([7])).view(nB, nA, nH, nW) x4 = output.index_select(2, torch.cuda.LongTensor([8])).view(nB, nA, nH, nW) y4 = output.index_select(2, torch.cuda.LongTensor([9])).view(nB, nA, nH, nW) x5 = output.index_select(2, torch.cuda.LongTensor([10])).view(nB, nA, nH, nW) y5 = output.index_select(2, torch.cuda.LongTensor([11])).view(nB, nA, nH, nW) x6 = output.index_select(2, torch.cuda.LongTensor([12])).view(nB, nA, nH, nW) y6 = output.index_select(2, torch.cuda.LongTensor([13])).view(nB, nA, nH, nW) x7 = output.index_select(2, torch.cuda.LongTensor([14])).view(nB, nA, nH, nW) y7 = output.index_select(2, torch.cuda.LongTensor([15])).view(nB, nA, nH, nW) x8 = output.index_select(2, torch.cuda.LongTensor([16])).view(nB, nA, nH, nW) y8 = output.index_select(2, torch.cuda.LongTensor([17])).view(nB, nA, nH, nW) conf = torch.sigmoid(output.index_select(2, torch.cuda.LongTensor([18])).view(nB, nA, nH, nW)) mycls = output.index_select(2, torch.cuda.linspace(19,19+nC-1,nC)) mycls = mycls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC) t1 = time.time() # Create pred boxes pred_corners = torch.cuda.FloatTensor(18, nB*nA*nH*nW) grid_x = torch.linspace(0, nW-1, nW).repeat(nH,1).repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda() grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(nB*nA*nH*nW).cuda() pred_corners[0] = (x0.data.view_as(grid_x) + grid_x) / nW pred_corners[1] = (y0.data.view_as(grid_y) + grid_y) / nH pred_corners[2] = (x1.data.view_as(grid_x) + grid_x) / nW pred_corners[3] = (y1.data.view_as(grid_y) + grid_y) / nH pred_corners[4] = (x2.data.view_as(grid_x) + grid_x) / nW pred_corners[5] = (y2.data.view_as(grid_y) + grid_y) / nH pred_corners[6] = (x3.data.view_as(grid_x) + grid_x) / nW pred_corners[7] = (y3.data.view_as(grid_y) + grid_y) / nH pred_corners[8] = (x4.data.view_as(grid_x) + grid_x) / nW pred_corners[9] = (y4.data.view_as(grid_y) + grid_y) / nH pred_corners[10] = (x5.data.view_as(grid_x) + grid_x) / nW pred_corners[11] = (y5.data.view_as(grid_y) + grid_y) / nH pred_corners[12] = (x6.data.view_as(grid_x) + grid_x) / nW pred_corners[13] = (y6.data.view_as(grid_y) + grid_y) / nH pred_corners[14] = (x7.data.view_as(grid_x) + grid_x) / nW pred_corners[15] = (y7.data.view_as(grid_y) + grid_y) / nH pred_corners[16] = (x8.data.view_as(grid_x) + grid_x) / nW pred_corners[17] = (y8.data.view_as(grid_y) + grid_y) / nH gpu_matrix = pred_corners.transpose(0,1).contiguous().view(-1,18) pred_corners = utils.convert2cpu(gpu_matrix) t2 = time.time() # Build targets nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx0, tx1, tx2, tx3, tx4, tx5, tx6, tx7, tx8, ty0, ty1, ty2, ty3, ty4, ty5, ty6, ty7, ty8, tconf, tcls = \ build_targets(pred_corners, target.data, self.anchors, nA, nC, nH, nW, self.noobject_scale, self.object_scale, self.thresh, self.seen) cls_mask = (cls_mask == 1) nProposals = int((conf > 0.25).sum().data[0]) tx0 = tx0.cuda() ty0 = ty0.cuda() tx1 = tx1.cuda() ty1 = ty1.cuda() tx2 = tx2.cuda() ty2 = ty2.cuda() tx3 = tx3.cuda() ty3 = ty3.cuda() tx4 = tx4.cuda() ty4 = ty4.cuda() tx5 = tx5.cuda() ty5 = ty5.cuda() tx6 = tx6.cuda() ty6 = ty6.cuda() tx7 = tx7.cuda() ty7 = ty7.cuda() tx8 = tx8.cuda() ty8 = ty8.cuda() tconf = tconf.cuda() tcls = tcls[cls_mask].long().cuda() coord_mask = coord_mask.cuda() conf_mask = conf_mask.cuda().sqrt() cls_mask = cls_mask.view(-1, 1).repeat(1,nC).cuda() mycls = mycls[cls_mask].view(-1, nC) t3 = time.time() # Create loss loss_x0 = self.coord_scale * nn.MSELoss(size_average=False)(x0*coord_mask, tx0*coord_mask)/2.0 loss_y0 = self.coord_scale * nn.MSELoss(size_average=False)(y0*coord_mask, ty0*coord_mask)/2.0 loss_x1 = self.coord_scale * nn.MSELoss(size_average=False)(x1*coord_mask, tx1*coord_mask)/2.0 loss_y1 = self.coord_scale * nn.MSELoss(size_average=False)(y1*coord_mask, ty1*coord_mask)/2.0 loss_x2 = self.coord_scale * nn.MSELoss(size_average=False)(x2*coord_mask, tx2*coord_mask)/2.0 loss_y2 = self.coord_scale * nn.MSELoss(size_average=False)(y2*coord_mask, ty2*coord_mask)/2.0 loss_x3 = self.coord_scale * nn.MSELoss(size_average=False)(x3*coord_mask, tx3*coord_mask)/2.0 loss_y3 = self.coord_scale * nn.MSELoss(size_average=False)(y3*coord_mask, ty3*coord_mask)/2.0 loss_x4 = self.coord_scale * nn.MSELoss(size_average=False)(x4*coord_mask, tx4*coord_mask)/2.0 loss_y4 = self.coord_scale * nn.MSELoss(size_average=False)(y4*coord_mask, ty4*coord_mask)/2.0 loss_x5 = self.coord_scale * nn.MSELoss(size_average=False)(x5*coord_mask, tx5*coord_mask)/2.0 loss_y5 = self.coord_scale * nn.MSELoss(size_average=False)(y5*coord_mask, ty5*coord_mask)/2.0 loss_x6 = self.coord_scale * nn.MSELoss(size_average=False)(x6*coord_mask, tx6*coord_mask)/2.0 loss_y6 = self.coord_scale * nn.MSELoss(size_average=False)(y6*coord_mask, ty6*coord_mask)/2.0 loss_x7 = self.coord_scale * nn.MSELoss(size_average=False)(x7*coord_mask, tx7*coord_mask)/2.0 loss_y7 = self.coord_scale * nn.MSELoss(size_average=False)(y7*coord_mask, ty7*coord_mask)/2.0 loss_x8 = self.coord_scale * nn.MSELoss(size_average=False)(x8*coord_mask, tx8*coord_mask)/2.0 loss_y8 = self.coord_scale * nn.MSELoss(size_average=False)(y8*coord_mask, ty8*coord_mask)/2.0 loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2.0 loss_x = loss_x0 + loss_x1 + loss_x2 + loss_x3 + loss_x4 + loss_x5 + loss_x6 + loss_x7 + loss_x8 loss_y = loss_y0 + loss_y1 + loss_y2 + loss_y3 + loss_y4 + loss_y5 + loss_y6 + loss_y7 + loss_y8 loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls) loss = loss_x + loss_y + loss_conf + loss_cls print('%d: nGT %d, recall %d, proposals %d, loss: x0: %f x %f, y0: %f y %f, conf %f, cls %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x0.data[0], loss_x.data[0], loss_y0.data[0], loss_y.data[0], loss_conf.data[0], loss_cls.data[0], loss.data[0])) #else: # loss = loss_x + loss_y + loss_conf # print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, conf %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_conf.data[0], loss.data[0])) t4 = time.time() if False: print('-----------------------------------') print(' activation : %f' % (t1 - t0)) print(' create pred_corners : %f' % (t2 - t1)) print(' build targets : %f' % (t3 - t2)) print(' create loss : %f' % (t4 - t3)) print(' total : %f' % (t4 - t0)) return loss