def forward(self, output, dim_info, target, scaled_anchors, stride): # print(output.shape) # torch.Size([8, 3, 13, 13, 85]) # print(target.shape) # torch.Size([6]) pred_boxes = output[..., :4] / stride pred_conf = output[..., 4] pred_cls = output[..., 5:] x, y, w, h = dim_info[..., 0], dim_info[..., 1], dim_info[..., 2], dim_info[..., 3] iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = utils.build_targets( pred_boxes = pred_boxes, pred_cls = pred_cls, target = target, anchors = scaled_anchors, ignore_thres = self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.object_scale * loss_conf_obj + self.noobject_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls return total_loss
def train(neural_network, tf, idf): inputs = build_inputs(len(tf)) targets = build_targets(tf, idf) epochs = 1 if len(tf) > 10 else 10 for i in range(epochs): for input_vector, target_vector in zip(inputs, targets): backpropagate(neural_network, input_vector, target_vector) return neural_network
def train_from_tensors(dataset_directory, epochs=4, batch_size=1, dropout_rate=0.3): files = list_files(dataset_directory) all_tensors = build_tensors(files) targets = build_targets(files) model = HeadClass(dropout_rate=dropout_rate) model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy']) # ckpt = tf.train.Checkpoint() model.fit(all_tensors, targets, validation_split=0.2, epochs=epochs, batch_size=batch_size) model.summary()
def forward(self, fmaps: Dict[str, torch.Tensor], rois: List[torch.Tensor], img_dims: List[Tuple[int, int]], targets: List[Dict[str, torch.Tensor]] = None): # assign rois to gt and generate cls(Ntotal,) and reg(Ntotal,4) targets if targets is not None: # match/build targets matches, target_objectness, target_labels, target_offsets = build_targets( rois, targets, self._params['fg_iou_threshold'], self._params['bg_iou_threshold'], add_best_matches=False) # sample fg and bg with given ratio positives, negatives = sample_fg_bg(matches, self._params['num_of_samples'], self._params['positive_ratio']) matches[:] = 0 matches[positives] = 1 matches[negatives] = -1 sample_mask = torch.logical_or(matches == 1, matches == -1) positive_mask = matches == 1 target_objectness = target_objectness[sample_mask] target_labels = target_labels[sample_mask] target_offsets = target_offsets[positive_mask] current = 0 for roi_index, boxes in enumerate(rois): N = boxes.size(0) batch_sample_mask = sample_mask[current:current + N] current += N rois[roi_index] = boxes[batch_sample_mask] # extract all rois from feature maps (Ntotal,(C*output_size[0]*output_size[1])) # outputs: (Ntotal,output_features*output_size**2) outputs = self.roi_pool(fmaps, rois, img_dims).flatten(start_dim=1) # feed to the hidden units and get cls_logits and reg_deltas outputs = self.hidden_unit(outputs) # Ntotal,hiddin_channels cls_logits = self.cls_unit(outputs) # Ntotal,num_classes reg_deltas = self.reg_unit(outputs) # Ntotal,num_classes*4 reg_deltas = reg_deltas.reshape(-1, self.num_classes, 4) return (cls_logits, reg_deltas), (target_objectness, target_labels, target_offsets)
def forward(self, fmaps:torch.Tensor, img_dims:Tuple[int,int], targets:List[Dict[str,torch.Tensor]]=None): (keep_pre_nms, keep_post_nms),\ (batch_size_per_image, batch_positive_ratio,\ fg_iou_threshold, bg_iou_threshold,\ nms_threshold) = self.get_params() dtype = fmaps.dtype device = fmaps.device bs = fmaps.size(0) fmap_dims = fmaps.shape[-2:] # cls_logits: (bs x (h'*w'*nA) x 1) # reg_deltas: (bs x (h'*w'*nA) x 4) as dx,dy,dw,dh cls_logits,reg_deltas = self.prediction_layer(fmaps) batched_dets = self.detection_layer(cls_logits, reg_deltas, fmap_dims, img_dims, nms_threshold=nms_threshold, keep_pre_nms=keep_pre_nms, keep_post_nms=keep_post_nms, dtype=dtype, device=device) if targets is not None: # merge batches cls_logits = cls_logits.reshape(-1) reg_deltas = reg_deltas.reshape(-1,4) # match/build targets matches,target_objectness,target_labels,target_offsets = build_targets( self.detection_layer.anchors.repeat(bs,1,1), targets, fg_iou_threshold, bg_iou_threshold, add_best_matches=True) # sample fg and bg with given ratio positives,negatives = sample_fg_bg(matches,batch_size_per_image,batch_positive_ratio) samples = torch.cat([positives,negatives]) # compute loss cls_loss,reg_loss = self.compute_loss( cls_logits[samples], target_objectness[samples], reg_deltas[positives], target_offsets[positives]) losses = {'cls_loss': cls_loss,'reg_loss': reg_loss} return batched_dets,losses return batched_dets
def forward(self, x, targets=None): nA = self.num_anchors nB = x.size(0) nG = x.size(2) stride = self.image_dim / nG # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous() # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # Calculate offsets for each grid grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor) grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor) scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors]) anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1)) anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1)) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + grid_x pred_boxes[..., 1] = y.data + grid_y pred_boxes[..., 2] = torch.exp(w.data) * anchor_w pred_boxes[..., 3] = torch.exp(h.data) * anchor_h # Training if targets is not None: if x.is_cuda: self.mse_loss = self.mse_loss.cuda() self.bce_loss = self.bce_loss.cuda() self.ce_loss = self.ce_loss.cuda() nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets( pred_boxes=pred_boxes.cpu().data, pred_conf=pred_conf.cpu().data, pred_cls=pred_cls.cpu().data, target=targets.cpu().data, anchors=scaled_anchors.cpu().data, num_anchors=nA, num_classes=self.num_classes, grid_size=nG, ignore_thres=self.ignore_thres, img_dim=self.image_dim, ) nProposals = int((pred_conf > 0.5).sum().item()) recall = float(nCorrect / nGT) if nGT else 1 precision = float(nCorrect / nProposals) # Handle masks mask = Variable(mask.type(ByteTensor)) conf_mask = Variable(conf_mask.type(ByteTensor)) # Handle target variables tx = Variable(tx.type(FloatTensor), requires_grad=False) ty = Variable(ty.type(FloatTensor), requires_grad=False) tw = Variable(tw.type(FloatTensor), requires_grad=False) th = Variable(th.type(FloatTensor), requires_grad=False) tconf = Variable(tconf.type(FloatTensor), requires_grad=False) tcls = Variable(tcls.type(LongTensor), requires_grad=False) # Get conf mask where gt and where there is no gt conf_mask_true = mask conf_mask_false = conf_mask - mask # Mask outputs to ignore non-existing objects loss_x = self.mse_loss(x[mask], tx[mask]) loss_y = self.mse_loss(y[mask], ty[mask]) loss_w = self.mse_loss(w[mask], tw[mask]) loss_h = self.mse_loss(h[mask], th[mask]) loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss( pred_conf[conf_mask_true], tconf[conf_mask_true]) loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1)) loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls return ( loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), recall, precision, ) else: # If not in training phase return predictions output = torch.cat( ( pred_boxes.view(nB, -1, 4) * stride, pred_conf.view(nB, -1, 1), pred_cls.view(nB, -1, self.num_classes), ), -1, ) return output
def forward(self, x, CUDA, targets=None): detections = [] modules = self.blocks[1:] outputs = {} #We cache the outputs for the route layer write = 0 for i in range(len(modules)): module_type = (modules[i]["type"]) if module_type == "convolutional" or module_type == "upsample" or module_type == "maxpool": x = self.module_list[i](x) outputs[i] = x elif module_type == "route": layers = modules[i]["layers"] layers = [int(a) for a in layers] if (layers[0]) > 0: layers[0] = layers[0] - i if len(layers) == 1: x = outputs[i + (layers[0])] else: if (layers[1]) > 0: layers[1] = layers[1] - i map1 = outputs[i + layers[0]] map2 = outputs[i + layers[1]] x = torch.cat((map1, map2), 1) outputs[i] = x elif module_type == "shortcut": from_ = int(modules[i]["from"]) x = outputs[i - 1] + outputs[i + from_] outputs[i] = x elif module_type == 'yolo': anchors = self.module_list[i][0].anchors #Get the input dimensions inp_dim = int(self.net_info["height"]) #Get the number of classes num_classes = int(modules[i]["classes"]) #Output the result x = x.data if not write: detections = x write = 1 else: detections = torch.cat((detections, x), 1) outputs[i] = outputs[i - 1] # Training if targets is not None: if x.is_cuda: self.mse_loss = self.mse_loss.cuda() self.bce_loss = self.bce_loss.cuda() self.ce_loss = self.ce_loss.cuda() stride_ = inp_dim // detections.size(2) grid_size = inp_dim // stride_ num_anchors = len(anchors) FloatTensor = torch.cuda.FloatTensor if CUDA else torch.FloatTensor LongTensor = torch.cuda.LongTensor if CUDA else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if CUDA else torch.ByteTensor nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets( pred_boxes=detections[:, :, :4].cpu().data, pred_conf=detections[:, :, 4:5].cpu().data, pred_cls=detections[:, :, 5:].cpu().data, target=targets.cpu().data, anchors=anchors.cpu().data, num_anchors=num_anchors, num_classes=num_classes, grid_size=grid_size, ignore_thres=0.3, img_dim=inp_dim, ) nProposals = int((detections[:, :, 4:5] > 0.5).sum().item()) recall = float(nCorrect / nGT) if nGT else 1 precision = 0 if nProposals > 0: precision = float(nCorrect / nProposals) # Handle masks mask = Variable(mask.type(ByteTensor)) conf_mask = Variable(conf_mask.type(ByteTensor)) # Handle target variables tx = Variable(tx.type(FloatTensor), requires_grad=False) ty = Variable(ty.type(FloatTensor), requires_grad=False) tw = Variable(tw.type(FloatTensor), requires_grad=False) th = Variable(th.type(FloatTensor), requires_grad=False) tconf = Variable(tconf.type(FloatTensor), requires_grad=False) tcls = Variable(tcls.type(LongTensor), requires_grad=False) x = torch.sigmoid(detections[..., 0]) # Center x y = torch.sigmoid(detections[..., 1]) # Center y w = detections[..., 2] # Width h = detections[..., 3] # Height pred_conf = torch.sigmoid(detections[..., 4]) # Conf pred_cls = torch.sigmoid(detections[..., 5:]) # Cls pred. # Get conf mask where gt and where there is no gt conf_mask_true = mask conf_mask_false = conf_mask - mask # Mask outputs to ignore non-existing objects mse_loss = nn.MSELoss() bce_loss = nn.BCELoss() ce_loss = nn.CrossEntropyLoss() loss_x = mse_loss(x[mask], tx[mask]) loss_y = mse_loss(y[mask], ty[mask]) loss_w = mse_loss(w[mask], tw[mask]) loss_h = mse_loss(h[mask], th[mask]) loss_conf = bce_loss( pred_conf[conf_mask_false], tconf[conf_mask_false]) + bce_loss( pred_conf[conf_mask_true], tconf[conf_mask_true]) loss_cls = (1 / detections.size(0)) * ce_loss( pred_cls[mask], torch.argmax(tcls[mask], 1)) loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls return ( loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), recall, precision, )
def forward(self, x, targets=None): FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor numBatch = x.shape[0] gridSiz = x.shape[2] # Create grid offset if not created/different if gridSiz != self.gridSiz: self.compute_grid_offsets(gridSiz, cuda=x.is_cuda) # Separate output to managable blocks prediction = (x.view(numBatch, self.numAnchors, self.numOfClass+5, gridSiz, -1) .permute(0, 1, 3, 4, 2).contiguous()) # Note:prediction is (b,anc,grid,grid,numclasses+5) # Note: output sequence is (x,y,w,h,conf,classes:) x = torch.sigmoid(prediction[..., 0]) y = torch.sigmoid(prediction[..., 1]) w = prediction[..., 2] h = prediction[..., 3] predConf = torch.sigmoid(prediction[..., 4]) # Note: x,y,w,h,predConf -> (b,box,grid,grid) predCls = torch.sigmoid(prediction[..., 5:]) # Question: use sigmoid? # Note: predCls -> (b,box,grid,grid,numClass) # Convert to grid space pred_boxes = FloatTensor(prediction[..., :4].shape) # Note:becomes (b,box,grid,grid,4) pred_boxes[..., 0] = x.data + self.gridX # Note:convert x to grid space pred_boxes[..., 1] = y.data + self.gridY pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(numBatch, -1, 4) * self.stride, # Note:becomes (b,box*grid*grid,4) predConf.view(numBatch, -1, 1), # Note: (b,box*grid*grid,1) predCls.view(numBatch, -1, self.numOfClass)), -1) # Note: (b,box*grid*grid,numClass) if targets is None: return output, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=predCls, target=targets, anchors=self.scaledAnchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(predConf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(predConf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(predCls[obj_mask], tcls[obj_mask]) total_loss = ((loss_y + loss_x) * self.coord_scale + (loss_w + loss_h) * self.coord_scale + loss_conf + loss_cls) return output, total_loss
def forward(self, x, target=None): features = self.backbone(x) total_loss = [] output = [] for idx, x in enumerate(features): FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor batch_size = x.size(0) grid_size = x.size(2) current_anchors = [self.anchors[i] for i in self.anchors_mask[idx]] stride = self.input_size // grid_size prediction = x.view(batch_size, self.num_anchors, self.num_classes + 5, grid_size, grid_size) prediction = prediction.permute(0, 1, 3, 4, 2).contiguous() # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # Calculate offsets for each grid grid_x = torch.arange(grid_size).repeat(grid_size, 1).view( [1, 1, grid_size, grid_size]).type(FloatTensor) grid_y = torch.arange(grid_size).repeat(grid_size, 1).t().view( [1, 1, grid_size, grid_size]).type(FloatTensor) scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in current_anchors]) anchor_w = scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1)) anchor_h = scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1)) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + grid_x pred_boxes[..., 1] = y.data + grid_y pred_boxes[..., 2] = torch.exp(w.data) * anchor_w pred_boxes[..., 3] = torch.exp(h.data) * anchor_h output.append( torch.cat((pred_boxes.view(batch_size, -1, 4) * stride, pred_conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1)) if target is not None: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = utils.build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=target, anchors=scaled_anchors, ignore_thres=self.ignore_thres, ) # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.object_scale * loss_conf_obj + self.noobject_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls total_loss.append(loss) # prediction[..., 0] = torch.sigmoid(prediction[..., 0]) # Center x # prediction[..., 1] = torch.sigmoid(prediction[..., 1]) # Center y # prediction[..., 4] = torch.sigmoid(prediction[..., 4]) # object coofidence # prediction[..., 5:] = torch.sigmoid(prediction[..., 5:]) # classs prediction # dim_info = prediction[..., :4].clone() # # Add offset and scale with anchors # grid = np.arange(grid_size) # m,n = np.meshgrid(grid, grid) # x_offset = FloatTensor(m).view(-1,1) # y_offset = FloatTensor(n).view(-1,1) # x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,self.num_anchors).view(-1, 2).unsqueeze(0) # x_y_offset = x_y_offset.repeat(batch_size, 1, 1) # _scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in current_anchors]) # scaled_anchors = _scaled_anchors.repeat(grid_size*grid_size, 1).unsqueeze(0) # scaled_anchors = scaled_anchors.repeat(batch_size, 1, 1) # prediction[..., 0:2] = prediction[..., 0:2] + x_y_offset # prediction[..., 2:4] = torch.exp(prediction[..., 2:4]) * scaled_anchors # prediction[..., 0:4] = prediction[..., 0:4] * stride # output.append(prediction) # if target is not None: # total_loss.append(self.loss(prediction.view(batch_size, self.num_anchors, grid_size, grid_size, self.num_classes+5), # dim_info.view(batch_size, self.num_anchors, grid_size, grid_size, 4), # target, _scaled_anchors, stride)) return torch.cat(output, 1), np.sum(total_loss)
def forward(self, x, targets=None, img_dim=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) prediction = ( x.view(num_samples, self.num_anchors, self.num_classes + 5, grid_size, grid_size) .permute(0, 1, 3, 4, 2) .contiguous() ) # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), pred_cls.view(num_samples, -1, self.num_classes), ), -1, ) if targets is None: return output, 0 else: iou_scores, class_mask, obj_mask, noobj_mask, tx, ty, tw, th, tcls, tconf = build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=self.scaled_anchors, ignore_thres=self.ignore_thres, ) obj_mask=obj_mask.bool() # convert int8 to bool noobj_mask=noobj_mask.bool() #convert int8 to bool # Loss : Mask outputs to ignore non-existing objects (except with conf. loss) loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls # Metrics cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(loss_conf).item(), "cls": to_cpu(loss_cls).item(), "cls_acc": to_cpu(cls_acc).item(), "recall50": to_cpu(recall50).item(), "recall75": to_cpu(recall75).item(), "precision": to_cpu(precision).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets=None, img_dim=416): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor batch_size = x.shape[0] grid_size = x.shape[2] prediction = x.view(batch_size, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous() # Get outputs tx_hat = torch.sigmoid( prediction[:, :, :, :, 0] ) # For Center-x, we apply sigmoid on prediction to ensure value is between 0 and 1 ty_hat = torch.sigmoid( prediction[:, :, :, :, 1] ) # For Center-y, we apply sigmoid on prediction to ensure value is between 0 and 1 tw_hat = prediction[:, :, :, :, 2] # Width th_hat = prediction[:, :, :, :, 3] # Height pred_conf = torch.sigmoid(prediction[:, :, :, :, 4]) # Object Confidence pred_class = torch.sigmoid( prediction[:, :, :, :, 5:]) # Class Prediction Probability # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, img_dim) # The Log-Space Transformations (Adding the offsets and scaling with the anchors) pred_boxes = FloatTensor(prediction[:, :, :, :, :4].shape) pred_boxes[:, :, :, :, 0] = tx_hat + self.grid_x pred_boxes[:, :, :, :, 1] = ty_hat + self.grid_y pred_boxes[:, :, :, :, 2] = torch.exp(tw_hat) * self.anchor_w pred_boxes[:, :, :, :, 3] = torch.exp(th_hat) * self.anchor_h output = torch.cat( ( pred_boxes.view(batch_size, -1, 4) * self.stride, pred_conf.view(batch_size, -1, 1), pred_class.view(batch_size, -1, self.num_classes), ), -1, ) if targets is not None: obj_mask, noobj_mask, tx, ty, tw, th, tclass, tconf = build_targets( pred_boxes=pred_boxes, pred_class=pred_class, targets=targets, anchors=self.scaled_anchors, ignore_thresh=self.ignore_thresh) loss_x = self.mse_loss(tx_hat[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(ty_hat[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(tw_hat[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(th_hat[obj_mask], th[obj_mask]) loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_noobj = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj loss_class = self.bce_loss(pred_class[obj_mask], tclass[obj_mask]) total_loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_class # Metrics conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() self.metrics = { "loss": total_loss.cpu().item(), "x": loss_x.cpu().item(), "y": loss_y.cpu().item(), "w": loss_w.cpu().item(), "h": loss_h.cpu().item(), "conf": loss_conf.cpu().item(), "cls": loss_class.cpu().item(), "conf_obj": conf_obj.cpu().item(), "conf_noobj": conf_noobj.cpu().item(), "grid_size": grid_size, } return output, total_loss else: return output, 0
def forward(self, x, targets=None, img_dim=None): # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor self.img_dim = img_dim num_samples = x.size(0) grid_size = x.size(2) prediction = (x.view(num_samples, self.num_anchors, self.num_classes + 4, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height pred_conf = torch.sigmoid(prediction[..., 4]) # Conf # If grid size does not match current we compute new offsets if grid_size != self.grid_size: self.compute_grid_offsets(grid_size, cuda=x.is_cuda) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + self.grid_x pred_boxes[..., 1] = y.data + self.grid_y pred_boxes[..., 2] = torch.exp(w.data) * self.anchor_w pred_boxes[..., 3] = torch.exp(h.data) * self.anchor_h pred_conf = pred_conf.unsqueeze(-1) output = torch.cat((pred_boxes, pred_conf), -1) output = torch.cat( ( pred_boxes.view(num_samples, -1, 4) * self.stride, pred_conf.view(num_samples, -1, 1), ), -1, ) if targets is None: return output, 0 else: iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf = \ build_targets(pred_boxes, targets, self.scaled_anchors, self.ignore_thres) ## losses loss_x = self.mse_loss(x[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(y[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) obj_loss = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask].reshape(-1, 1)) noobj_loss = self.bce_loss(pred_conf[noobj_mask], tconf[noobj_mask].reshape(-1, 1)) conf_loss = self.obj_scale * obj_loss + self.noobj_scale * noobj_loss total_loss = loss_x + loss_y + loss_w + loss_h + conf_loss # Metrics conf_obj = pred_conf[obj_mask].mean() conf_noobj = pred_conf[noobj_mask].mean() self.metrics = { "loss": to_cpu(total_loss).item(), "x": to_cpu(loss_x).item(), "y": to_cpu(loss_y).item(), "w": to_cpu(loss_w).item(), "h": to_cpu(loss_h).item(), "conf": to_cpu(conf_loss).item(), "conf_obj": to_cpu(conf_obj).item(), "conf_noobj": to_cpu(conf_noobj).item(), "grid_size": grid_size, } return output, total_loss
def forward(self, x, targets): # x : feature map -> [batch_size, final_ch, grid, grid] # targets : ground truth -> [batch_size, 6] -> 6 = num(gt box의 num, 배치 인덱스), class, x, y, w, h num_batches = x.size(0) grid_size = x.size(2) #출력값 형태 변환 prediction = (x.view(num_batches, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()) #메모리 연속 할달 #get outputs #format : [batch, anchors, grid, grid] cx = torch.sigmoid(prediction[..., 0]) #예측 box의 중심 x좌표 cy = torch.sigmoid(prediction[..., 1]) #예측 box의 중심 y좌표 w = prediction[..., 2] #예측 box의 w h = prediction[..., 3] #예측 box의 h pred_conf = torch.sigmoid(prediction[..., 4]) #confidence pred_cls = torch.sigmoid(prediction[..., 5:]) #class 확률 #offset 구하기 stride = self.img_size / grid_size ''' grid_x=([[0],[1],[2],[3],...,[16]], [[0],[1],[2],[3],...,[16]], ... [[0],[1],[2],[3],...,[16]]) grid_y=([[0],[0],[0],...,[0]], [[1],[1],[1],...,[1]], ... [[16],[16],[16],...,[16]])''' grid_x = torch.arange(grid_size, dtype=torch.float).repeat( grid_size, 1).view([1, 1, grid_size, grid_size]) grid_y = torch.arange(grid_size, dtype=torch.float).repeat( grid_size, 1).t().view([1, 1, grid_size, grid_size]) scaled_anchor = torch.as_tensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors], dtype=torch.float) anchor_w = scaled_anchor[:, 0].view((1, self.num_anchors, 1, 1)) anchor_h = scaled_anchor[:, 1].view((1, self.num_anchors, 1, 1)) #예측한 anchor 좌표 구하기 pred_boxes = torch.zeros_like(prediction[..., :4]) #x, y, w, h pred_boxes[..., 0] = cx + grid_x pred_boxes[..., 1] = cy + grid_y pred_boxes[..., 2] = torch.exp(w) * anchor_w pred_boxes[..., 3] = torch.exp(h) * anchor_h pred = ( pred_boxes.view(num_batches, -1, 4) * stride, #(1, 3*grid*grid, 4) pred_conf.view(num_batches, -1, 1), #(1, 3*grid*grid, 1) pred_cls.view(num_batches, -1, self.num_classes)) #(1, 3*grid*grid, 80) output = torch.cat(pred, -1) #(1, 3*grid*grid, 85) if targets is None: return output, 0 iou_scores, class_mask, obj_mask, no_obj_mask, tx, ty, tw, th, tcls, tconf = utils.build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=scaled_anchor, ignore_thres=self.ignore_th) #loss 구하기 loss_x = self.mse_loss(cx[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(cy[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(w[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(h[obj_mask], th[obj_mask]) loss_bbox = loss_x + loss_y + loss_w + loss_h loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_no_obj = self.bce_loss(pred_conf[no_obj_mask], tconf[no_obj_mask]) loss_conf = self.obj_scale * loss_conf_obj + self.no_obj_scale * loss_conf_no_obj #패널티를 주기위해 loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) loss_layer = loss_bbox + loss_conf + loss_cls # Metrics conf50 = (pred_conf > 0.5).float() iou50 = (iou_scores > 0.5).float() iou75 = (iou_scores > 0.75).float() detected_mask = conf50 * class_mask * tconf cls_acc = 100 * class_mask[obj_mask].mean() conf_obj = pred_conf[obj_mask].mean() conf_no_obj = pred_conf[no_obj_mask].mean() precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16) recall50 = torch.sum(iou50 * detected_mask) / (obj_mask.sum() + 1e-16) recall75 = torch.sum(iou75 * detected_mask) / (obj_mask.sum() + 1e-16) # Write loss and metrics self.metrics = { "loss_x": loss_x.detach().cpu().item(), "loss_y": loss_y.detach().cpu().item(), "loss_w": loss_w.detach().cpu().item(), "loss_h": loss_h.detach().cpu().item(), "loss_bbox": loss_bbox.detach().cpu().item(), "loss_conf": loss_conf.detach().cpu().item(), "loss_cls": loss_cls.detach().cpu().item(), "loss_layer": loss_layer.detach().cpu().item(), "cls_acc": cls_acc.detach().cpu().item(), "conf_obj": conf_obj.detach().cpu().item(), "conf_no_obj": conf_no_obj.detach().cpu().item(), "precision": precision.detach().cpu().item(), "recall50": recall50.detach().cpu().item(), "recall75": recall75.detach().cpu().item() } return output, loss_layer
def forward(self, x, targets=None): bs = x.size(0) g_dim = x.size(2) stride = self.img_dim / g_dim # Tensors for cuda support FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor # print(x.shape, self.num_anchors, self.bbox_attrs, g_dim, g_dim) prediction = x.view(bs, self.num_anchors, self.bbox_attrs, g_dim, g_dim).permute(0, 1, 3, 4, 2).contiguous() # Get outputs x = torch.sigmoid(prediction[..., 0]) # Center x y = torch.sigmoid(prediction[..., 1]) # Center y w = prediction[..., 2] # Width h = prediction[..., 3] # Height conf = torch.sigmoid(prediction[..., 4]) # Conf pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred. # Calculate offsets for each grid grid_x = torch.linspace(0, g_dim-1, g_dim).repeat(g_dim,1).repeat(bs*self.num_anchors, 1, 1).view(x.shape).type(FloatTensor) grid_y = torch.linspace(0, g_dim-1, g_dim).repeat(g_dim,1).t().repeat(bs*self.num_anchors, 1, 1).view(y.shape).type(FloatTensor) scaled_anchors = [(a_w / stride, a_h / stride) for a_w, a_h in self.anchors] anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0])) anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1])) anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, g_dim*g_dim).view(w.shape) anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, g_dim*g_dim).view(h.shape) # Add offset and scale with anchors pred_boxes = FloatTensor(prediction[..., :4].shape) pred_boxes[..., 0] = x.data + grid_x pred_boxes[..., 1] = y.data + grid_y pred_boxes[..., 2] = torch.exp(w.data) * anchor_w pred_boxes[..., 3] = torch.exp(h.data) * anchor_h # Training if targets is not None: if x.is_cuda: self.mse_loss = self.mse_loss.cuda() self.bce_loss = self.bce_loss.cuda() nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes.cpu().data, targets.cpu().data, scaled_anchors, self.num_anchors, self.num_classes, g_dim, self.ignore_thres, self.img_dim) nProposals = int((conf > 0.25).sum().item()) recall = float(nCorrect / nGT) if nGT else 1 # Handle masks mask = Variable(mask.type(FloatTensor)) cls_mask = Variable(mask.unsqueeze(-1).repeat(1, 1, 1, 1, self.num_classes).type(FloatTensor)) conf_mask = Variable(conf_mask.type(FloatTensor)) # Handle target variables tx = Variable(tx.type(FloatTensor), requires_grad=False) ty = Variable(ty.type(FloatTensor), requires_grad=False) tw = Variable(tw.type(FloatTensor), requires_grad=False) th = Variable(th.type(FloatTensor), requires_grad=False) tconf = Variable(tconf.type(FloatTensor), requires_grad=False) tcls = Variable(tcls.type(FloatTensor), requires_grad=False) # Mask outputs to ignore non-existing objects loss_x = self.lambda_coord * self.bce_loss(x * mask, tx * mask) loss_y = self.lambda_coord * self.bce_loss(y * mask, ty * mask) loss_w = self.lambda_coord * self.mse_loss(w * mask, tw * mask) / 2 loss_h = self.lambda_coord * self.mse_loss(h * mask, th * mask) / 2 loss_conf = self.bce_loss(conf * conf_mask, tconf * conf_mask) loss_cls = self.bce_loss(pred_cls * cls_mask, tcls * cls_mask) loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls return loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), recall else: # If not in training phase return predictions output = torch.cat((pred_boxes.view(bs, -1, 4) * stride, conf.view(bs, -1, 1), pred_cls.view(bs, -1, self.num_classes)), -1) return output.data
def forward(self, x, targets=None): batch_size = x.size(0) grid_size = x.size(2) # 출력값 형태 변환하기 prediction = ( x.view(batch_size, self.num_anchors, self.num_classes + 5, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous() ) # contiguous()는 tensor에서 바로 옆에 있는 요소가 실제로 메모리상에서 서로 인접한 것 # outputs bx = torch.sigmoid(prediction[ ..., 0]) # Center x # 앞의 값은 모두 포함하고 맨 뒤에 인덱스는 0번 인덱스만 포함한다는 뜻 by = torch.sigmoid(prediction[..., 1]) # Center y bw = prediction[..., 2] # Width bh = prediction[..., 3] # Height pred_conf = torch.sigmoid( prediction[..., 4]) # Object confidence (objectness) pred_cls = torch.sigmoid(prediction[..., 5:]) # Class prediction # 각 그리드에 맞춰 offsets 계산하기 stride = self.image_size / grid_size cx = torch.arange(grid_size, dtype=torch.float).repeat( grid_size, 1).view([1, 1, grid_size, grid_size]) # arange 는 주어진 범위 내 정수를 순서대로 생성 , repeat는 dim=0으로 grid size만큼 dim=1로 1만큼 반복 의미 cy = torch.arange(grid_size, dtype=torch.float).repeat( grid_size, 1).t().view([1, 1, grid_size, grid_size]) scaled_anchors = torch.as_tensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors], dtype=torch.float) # scaled_anchors 에는 w와 h값 밖에 없는데 왜 굳이 [:,0:1]이라고 써주는지..? 질문 anchor_w = scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1)) anchor_h = scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1)) # anchors 에 offset 과 scale 추가 pred_boxes = torch.zeros_like(prediction[..., :4]) pred_boxes[..., 0] = bx + cx pred_boxes[..., 1] = by + cy pred_boxes[..., 2] = torch.exp(bw) * anchor_w pred_boxes[..., 3] = torch.exp(bh) * anchor_h # x,y,w,h와 conf,cls 합치기 # stride 곱해서 이미지에서 실제 좌표로 만들어주기 pred = ( pred_boxes.view(batch_size, -1, 4) * stride, # batch_size 가 의미하는건 무엇인지..? 굳이 여기 있는 이유는? pred_conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)) output = torch.cat(pred, -1) if targets is None: return output, 0 iou_scores, class_mask, obj_mask, no_obj_mask, tx, ty, tw, th, tcls, tconf = utils.build_targets( pred_boxes=pred_boxes, pred_cls=pred_cls, target=targets, anchors=scaled_anchors, ignore_thres=self.ignore_thres) # Loss 구하기(존재하지 않는 object를 무시하도록 mask. conf.loss는 제외) loss_x = self.mse_loss(bx[obj_mask], tx[obj_mask]) loss_y = self.mse_loss(by[obj_mask], ty[obj_mask]) loss_w = self.mse_loss(bw[obj_mask], tw[obj_mask]) loss_h = self.mse_loss(bh[obj_mask], th[obj_mask]) loss_bbox = loss_x + loss_y + loss_w + loss_h # bounding box안에 물체가 있는지 없는지에 대한 loss # 왜 bce loss썼는지? loss_conf_obj = self.bce_loss(pred_conf[obj_mask], tconf[obj_mask]) loss_conf_no_obj = self.bce_loss(pred_conf[no_obj_mask], tconf[no_obj_mask]) # scale 은 패널티 의미. 물체가 없을 때 있다고 하면 더 크게 패널티를 줌 loss_conf = self.obj_scale * loss_conf_obj + self.no_obj_scale * loss_conf_no_obj # class 예측에 대한 loss loss_cls = self.bce_loss(pred_cls[obj_mask], tcls[obj_mask]) loss_layer = loss_bbox + loss_conf + loss_cls return output, loss_layer