def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchor_scales, gt_boxes=None): rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy() rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy() rois, scores, anchor_inds, labels = proposal_layer_py( rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=_feat_stride, anchor_scales=anchor_scales, gt_boxes=gt_boxes) z = np.zeros((rois.shape[0], 2)) z[:, 1] = scores[:, 0] rois = network.np_to_variable(rois, is_cuda=True) scores = network.np_to_variable(z, is_cuda=True) anchor_inds = network.np_to_variable(anchor_inds, is_cuda=True, dtype=torch.LongTensor) labels = network.np_to_variable(labels, is_cuda=True, dtype=torch.LongTensor) return rois, scores, anchor_inds, labels
def forward(self, im_data, rois, im_info, gt_vec=None, gt_boxes=None, gt_ishard=None, dontcare_areas=None): im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) #TODO: Use im_data and rois as input # compute cls_prob which are N_roi X 20 scores # Checkout faster_rcnn.py for inspiration x = self.features(im_data) x = self.roi_pool(x, network.np_to_variable(rois, is_cuda=True)) x = x.view(x.size(0), 256 * 6 * 6) x = self.classifier(x) cls_score = self.score_cls(x) det_score = self.score_det(x) # cls_score = F.softmax(cls_score, dim=-1) # det_score = F.softmax(det_score, dim=-2) cls_score = F.softmax(cls_score, dim=1) det_score = F.softmax(det_score, dim=0) cls_prob = cls_score * det_score if self.training: label_vec = network.np_to_variable(gt_vec, is_cuda=True) # label_vec = label_vec.view(self.n_classes,-1) self.cross_entropy = self.build_loss(cls_prob, label_vec) return cls_prob
def build_roi_loss(self, rpn_cls_score_reshape, rpn_cls_prob_reshape, scores, anchor_inds, labels): batch_size = rpn_cls_score_reshape.size()[0] rpn_cls_score = rpn_cls_score_reshape.permute( 0, 2, 3, 1) #.contiguous().view(-1, 2) bg_scores = torch.index_select( rpn_cls_score, 3, network.np_to_variable(np.arange(0, 9), is_cuda=True, dtype=torch.LongTensor)) fg_scores = torch.index_select( rpn_cls_score, 3, network.np_to_variable(np.arange(9, 18), is_cuda=True, dtype=torch.LongTensor)) bg_scores = bg_scores.contiguous().view(-1, 1) fg_scores = fg_scores.contiguous().view(-1, 1) rpn_cls_score = torch.cat([bg_scores, fg_scores], 1) rpn_cls_score = torch.index_select(rpn_cls_score, 0, anchor_inds.view(-1)) labels = labels.view(-1) roi_cross_entropy = F.cross_entropy(rpn_cls_score, labels, size_average=False) return roi_cross_entropy
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchor_scales, gt_boxes=None): #convert to numpy rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy() rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy() rois, scores, anchor_inds, labels = proposal_layer_py(rpn_cls_prob_reshape, rpn_bbox_pred, #prop_info = proposal_layer_py(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=_feat_stride, anchor_scales=anchor_scales, gt_boxes=gt_boxes) rois = network.np_to_variable(rois, is_cuda=True) anchor_inds = network.np_to_variable(anchor_inds, is_cuda=True, dtype=torch.LongTensor) labels = network.np_to_variable(labels, is_cuda=True, dtype=torch.LongTensor) #just get fg scores, make bg scores 0 #b_scores = np.zeros((info[0].shape[0], 2)) #b_scores[:,1] = info[1][:,0] scores = network.np_to_variable(scores, is_cuda=True) return rois, scores, anchor_inds, labels
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride, anchor_scales): """ rpn_cls_score: for pytorch (1, Ax2, H, W) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] gt_ishard: (G, 1), 1 or 0 indicates difficult or not dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (1, 1, HxA, W), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (1, 4xA, H, W), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (1, 4xA, H, W) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (1, 4xA, H, W) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ rpn_cls_score = rpn_cls_score.data.cpu().numpy() rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = \ anchor_target_layer_py(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride, anchor_scales) rpn_labels = network.np_to_variable(rpn_labels, is_cuda=True, dtype=torch.LongTensor) rpn_bbox_targets = network.np_to_variable(rpn_bbox_targets, is_cuda=True) rpn_bbox_inside_weights = network.np_to_variable( rpn_bbox_inside_weights, is_cuda=True) rpn_bbox_outside_weights = network.np_to_variable( rpn_bbox_outside_weights, is_cuda=True) return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def proposal_target_layer(rpn_rois, gt_boxes, gt_ishard, dontcare_areas, num_classes): """ ---------- rpn_rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2] gt_boxes: (G, 5) [x1 ,y1 ,x2, y2, class] int # gt_ishard: (G, 1) {0 | 1} 1 indicates hard dontcare_areas: (D, 4) [ x1, y1, x2, y2] num_classes ---------- Returns ---------- rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2] labels: (1 x H x W x A, 1) {0,1,...,_num_classes-1} bbox_targets: (1 x H x W x A, K x4) [dx1, dy1, dx2, dy2] bbox_inside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss bbox_outside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss """ rpn_rois = rpn_rois.data.cpu().numpy() rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = \ proposal_target_layer_py(rpn_rois, gt_boxes, gt_ishard, dontcare_areas, num_classes) # print labels.shape, bbox_targets.shape, bbox_inside_weights.shape rois = network.np_to_variable(rois, is_cuda=True) labels = network.np_to_variable(labels, is_cuda=True, dtype=torch.LongTensor) bbox_targets = network.np_to_variable(bbox_targets, is_cuda=True) bbox_inside_weights = network.np_to_variable(bbox_inside_weights, is_cuda=True) bbox_outside_weights = network.np_to_variable(bbox_outside_weights, is_cuda=True) return rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
def forward(self, im_data, rois, im_info, gt_vec=None, gt_boxes=None, gt_ishard=None, dontcare_areas=None): im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) #TODO: Use im_data and rois as input # compute cls_prob which are N_roi X 20 scores # Checkout faster_rcnn.py for inspiration rois = network.np_to_variable(rois, is_cuda=True) out = self.features(im_data) #print('feature shape:',out.shape) #print('rois shape:', rois.shape) out = self.roi_pool(out, rois) #print('roi_pool out shape:',out.shape) out = out.view(out.shape[0], -1) #print('reshape roi_pool out shape:',out.shape) out = self.classifier(out) #print('classifier out shape:',out.shape) score_cls = self.score_cls(out) softmax_cls = self.cls_softmax(score_cls) score_det = self.score_det(out) softmax_det = self.det_softmax(score_det) cls_prob = softmax_cls * softmax_det if self.training: label_vec = network.np_to_variable(gt_vec, is_cuda=True) label_vec = label_vec.view(self.n_classes,-1) self.cross_entropy = self.build_loss(cls_prob, label_vec) return cls_prob
def forward(self, im_data, rois, im_info, gt_vec=None, gt_boxes=None, gt_ishard=None, dontcare_areas=None): im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) #TODO: Use im_data and rois as input # compute cls_prob which are N_roi X 20 scores # Checkout faster_rcnn.py for inspiration rois = network.np_to_variable(rois, is_cuda=True) x = self.features(im_data) x = self.roi_pool(x, rois) x = x.view(x.size(0), -1) x = self.classifier(x) score_cls = self.score_cls(x) score_det = self.score_det(x) # print(score_cls) score_cls = F.softmax(score_cls, dim=1) score_det = F.softmax(score_det, dim=0) # print(score_det) cls_prob = score_cls * score_det # check how to implement # print(cls_prob.shape) if self.training: label_vec = network.np_to_variable(gt_vec, is_cuda=True) # label_vec = label_vec.view(self.n_classes,-1) self.cross_entropy = self.build_loss(cls_prob, label_vec) return cls_prob
def proposal_target_layer(region_rois, gt_regions, n_classes_obj, voc_sign, is_training=False, graph_generation=False): """ ---------- object_rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2] region_rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2] gt_objects: (G_obj, 5) [x1 ,y1 ,x2, y2, obj_class] int gt_relationships: (G_obj, G_obj) [pred_class] int (-1 for no relationship) gt_regions: (G_region, 4+40) [x1, y1, x2, y2, word_index] (-1 for padding) # gt_ishard: (G_region, 4+40) {0 | 1} 1 indicates hard # dontcare_areas: (D, 4) [ x1, y1, x2, y2] n_classes_obj n_classes_pred is_training to indicate whether in training scheme ---------- Returns ---------- rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2] labels: (1 x H x W x A, 1) {0,1,...,_num_classes-1} bbox_targets: (1 x H x W x A, K x4) [dx1, dy1, dx2, dy2] bbox_inside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss bbox_outside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss """ #object_rois = object_rois.data.cpu().numpy() region_rois = region_rois.data.cpu().numpy() region_seq, region_rois, \ bbox_targets_region, bbox_inside_weights_region, bbox_outside_weights_region= \ proposal_target_layer_py(region_rois, gt_regions, n_classes_obj, voc_sign, is_training, graph_generation=graph_generation) # print labels.shape, bbox_targets.shape, bbox_inside_weights.shape if is_training: # object_labels = network.np_to_variable(object_labels, is_cuda=True, dtype=torch.LongTensor) # bbox_targets = network.np_to_variable(bbox_targets, is_cuda=True) # bbox_inside_weights = network.np_to_variable(bbox_inside_weights, is_cuda=True) # bbox_outside_weights = network.np_to_variable(bbox_outside_weights, is_cuda=True) # phrase_label = network.np_to_variable(phrase_label, is_cuda=True, dtype=torch.LongTensor) region_seq = network.np_to_variable(region_seq, is_cuda=True, dtype=torch.LongTensor) bbox_targets_region = network.np_to_variable(bbox_targets_region, is_cuda=True) bbox_inside_weights_region = network.np_to_variable( bbox_inside_weights_region, is_cuda=True) bbox_outside_weights_region = network.np_to_variable( bbox_outside_weights_region, is_cuda=True) #object_rois = network.np_to_variable(object_rois, is_cuda=True) #phrase_rois = network.np_to_variable(phrase_rois, is_cuda=True) region_rois = network.np_to_variable(region_rois, is_cuda=True) return (region_rois, region_seq, bbox_targets_region, bbox_inside_weights_region, bbox_outside_weights_region)
def forward(self, im_data, gt_data=None): im_data = network.np_to_variable(im_data, is_cuda=True, is_training=self.training) density_map = self.DME(im_data) if self.training: gt_data = network.np_to_variable(gt_data, is_cuda=True, is_training=self.training) self.loss_mse = self.build_loss(density_map, gt_data) return density_map
def forward(self,im_data,gt_data): im_data = network.np_to_variable(im_data,is_cuda=True,is_training=self.training) final = self.DA_Net(im_data) if self.training: gt_data = network.np_to_variable(gt_data, is_cuda=True, is_training=self.training) self.loss_mse = self.build_loss(final,gt_data) return final else: return final
def forward(self, im_data, gt_data=None, gt_cls_label=None, ce_weights=None): im_data = network.np_to_variable(im_data, is_cuda=True, is_training=self.training) density_map, density_cls_score = self.CCN(im_data) density_cls_prob = F.softmax(density_cls_score) if self.training: gt_data = network.np_to_variable(gt_data, is_cuda=True, is_training=self.training) gt_cls_label = network.np_to_variable(gt_cls_label, is_cuda=True, is_training=self.training,dtype=torch.FloatTensor) self.loss_mse, self.cross_entropy = self.build_loss(density_map, density_cls_prob, gt_data, gt_cls_label, ce_weights) return density_map
def select_to_match_dimensions(a,b): if a.size()[2] > b.size()[2]: a = torch.index_select(a, 2, network.np_to_variable(np.arange(0, b.size()[2]).astype(np.int32), is_cuda=True,dtype=torch.LongTensor)) if a.size()[3] > b.size()[3]: a = torch.index_select(a, 3, network.np_to_variable(np.arange(0, b.size()[3]).astype(np.int32), is_cuda=True,dtype=torch.LongTensor)) return a
def forward(self, im_data, gt_data=None): im_data = network.np_to_variable(im_data, is_cuda=True, is_training=self.training) feature_map = self.mcnn_backbone(im_data) density_map = self.fuse(feature_map) if self.training: gt_data = network.np_to_variable(gt_data, is_cuda=True, is_training=self.training) self.loss_mse = nn.MSELoss()(density_map, gt_data) return density_map
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchor_scales, anchor_ratios, is_region): rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy() rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy() x, scores = proposal_layer_py(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchor_scales, anchor_ratios, is_region=is_region) x = network.np_to_variable(x, is_cuda=True) scores = network.np_to_variable(scores, is_cuda=True) return x.view(-1, 5), scores.view(-1, )
def __index__(self, i): '''bid: image index; pid: patch index, to find slice''' bid, pid = self.patch_list[i] transform_img = [] transform_den = [] transform_raw = [] transform_raw.append(transforms.Lambda(lambda img: i_crop(img, self.patches[bid][pid], self.crop_size))) transform_raw.append(transforms.Lambda(lambda img: i_flip(img, self.filps[i]))) transform_raw.append(transforms.Lambda(lambda img: np.array(img))) transform_raw = transforms.Compose(transform_raw) transform_img.append(transforms.Lambda(lambda img: i_crop(img, self.patches[bid][pid], self.crop_size))) transform_img.append(transforms.Lambda(lambda img: i_flip(img, self.filps[i]))) transform_img += [ transforms.ToTensor(), transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]) ] transform_img = transforms.Compose(transform_img) transform_den.append(transforms.Lambda(lambda img: d_crop(img, self.patches[bid][pid], self.crop_size))) transform_den.append(transforms.Lambda(lambda img: d_flip(img, self.filps[i]))) transform_den += [transforms.Lambda(lambda den: network.np_to_variable(den, is_cuda=False, is_training=self.training))] transform_den = transforms.Compose(transform_den) img, den, gt_count = self.dataloader[bid] return transform_img(img.copy()), transform_den(den), transform_raw(img.copy()), gt_count, i
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None): im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) features = self.features(im_data) rpn_conv1 = self.conv1(features) # rpn score rpn_cls_score = self.score_conv(rpn_conv1) rpn_cls_score_reshape = self.reshape_layer(rpn_cls_score, 2) rpn_cls_prob = F.softmax(rpn_cls_score_reshape) rpn_cls_prob_reshape = self.reshape_layer(rpn_cls_prob, len(self.anchor_scales)*3*2) # rpn boxes rpn_bbox_pred = self.bbox_conv(rpn_conv1) # proposal layer cfg_key = 'TRAIN' if self.training else 'TEST' rois = self.proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, self._feat_stride, self.anchor_scales) # generating training labels and build the rpn loss if self.training: assert gt_boxes is not None rpn_data = self.anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, self._feat_stride, self.anchor_scales) self.cross_entropy, self.loss_box = self.build_loss(rpn_cls_score_reshape, rpn_bbox_pred, rpn_data) return features, rois
def get_features(self, im_data): im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) im_in = im_data # self.input_conv(im_data) features = self.features(im_in) return features
def __index__(self, i): bid, pid = self.patch_list[i] ncl = self.ncls[bid + 1] choice = self.choices[bid + 1] idx, image, label = self.sample_dict[bid + 1][ncl][choice] img, den, gt_count = self.loaded[idx] transform_img = [] transform_den = [] transform_raw = [] transform_raw.append(transforms.Lambda(lambda img: i_crop(img, self.patches[bid][pid], self.crop_size))) transform_raw.append(transforms.Lambda(lambda img: i_flip(img, self.filps[i]))) transform_raw.append(transforms.Lambda(lambda img: np.array(img))) transform_raw = transforms.Compose(transform_raw) transform_img.append(transforms.Lambda(lambda img: i_crop(img, self.patches[bid][pid], self.crop_size))) transform_img.append(transforms.Lambda(lambda img: i_flip(img, self.filps[i]))) transform_img += [ transforms.ToTensor(), transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]) ] transform_img = transforms.Compose(transform_img) transform_den.append(transforms.Lambda(lambda img: d_crop(img, self.patches[bid][pid], self.crop_size))) transform_den.append(transforms.Lambda(lambda img: d_flip(img, self.filps[i]))) transform_den += [transforms.Lambda(lambda den: network.np_to_variable(den, is_cuda=False, is_training=self.training))] transform_den = transforms.Compose(transform_den) return transform_img(img.copy()), transform_den(den), transform_raw(img.copy()), gt_count, i
def build_roi_loss(self, rpn_cls_score_reshape, rpn_cls_prob_reshape, scores, anchor_inds, labels): batch_size = rpn_cls_score_reshape.size()[0] rpn_cls_score = rpn_cls_score_reshape.permute( 0, 2, 3, 1) #.contiguous().view(-1, 2) bg_scores = torch.index_select( rpn_cls_score, 3, network.np_to_variable(np.arange(0, 9), is_cuda=True, dtype=torch.LongTensor)) fg_scores = torch.index_select( rpn_cls_score, 3, network.np_to_variable(np.arange(9, 18), is_cuda=True, dtype=torch.LongTensor)) bg_scores = bg_scores.contiguous().view(batch_size, -1, 1) fg_scores = fg_scores.contiguous().view(batch_size, -1, 1) all_rpn_cls_scores = None all_labels = None for batch_ind in range(batch_size): b_ind_var = network.np_to_variable(np.zeros(1) + batch_ind, is_cuda=True, dtype=torch.LongTensor) bg = torch.index_select(bg_scores, 0, b_ind_var) fg = torch.index_select(fg_scores, 0, b_ind_var) #rpn_cls_score = torch.cat([bg_scores[batch_ind,:,:],fg_scores[batch_ind,:,:]],1) rpn_cls_score = torch.cat([bg.squeeze(0), fg.squeeze(0)], 1) rpn_cls_score = torch.index_select(rpn_cls_score, 0, anchor_inds[batch_ind]) if all_rpn_cls_scores is None: all_rpn_cls_scores = rpn_cls_score all_labels = labels[batch_ind] else: all_rpn_cls_scores = torch.cat( [all_rpn_cls_scores, rpn_cls_score], 0) all_labels = torch.cat([all_labels, labels[batch_ind]], 0) #roi_cross_entropy = F.cross_entropy(rpn_cls_score, labels, size_average=False) roi_cross_entropy = F.cross_entropy(all_rpn_cls_scores, all_labels, size_average=False) return roi_cross_entropy
def forward(self, im_data, rois, im_info, gt_vec=None, gt_boxes=None, gt_ishard=None, dontcare_areas=None): im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) #TODO: Use im_data and rois as input # compute cls_prob which are N_roi X 20 scores # Checkout faster_rcnn.py for inspiration #print('\n\nHERE HERE\n\n') #pdb.set_trace() rois = network.np_to_variable(rois, is_cuda=True) #print(rois.shape) conv_features = self.features(im_data) #print(conv_features.shape) roi_pooled_features = self.roi_pool(conv_features, rois) #print(roi_pooled_features.shape) roi_pooled_features = roi_pooled_features.view( roi_pooled_features.shape[0], -1) #print(roi_pooled_features.shape) classifier_features = self.classifier(roi_pooled_features) #print(classifier_features.shape) prob_clas = F.softmax(self.score_cls(classifier_features), dim=1) #print(prob_clas.shape) prob_det = F.softmax(self.score_det(classifier_features), dim=0) #print(prob_det.shape) cls_prob = prob_clas * prob_det #print(cls_prob.shape) if self.training: label_vec = network.np_to_variable(gt_vec, is_cuda=True) label_vec = label_vec.view(self.n_classes, -1) self.cross_entropy = self.build_loss(cls_prob, label_vec) return cls_prob
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchor_scales): rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy() rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy() x = proposal_layer_py(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchor_scales) x = network.np_to_variable(x, is_cuda=False) return x.view(-1, 5)
def forward(self, im_data, gt_data=None): im_data = network.np_to_variable(im_data, is_cuda=True, is_training=self.training) feature_map = self.mcnn_backbone(im_data) x = self.conv3x3(im_data) x = self.downsample(x) x = torch.cat((feature_map, x), 1) x = self.amend(x) density_map = self.fuse(x) if self.training: gt_data = network.np_to_variable(gt_data, is_cuda=True, is_training=self.training) self.loss_mse = nn.MSELoss()(density_map, gt_data) return density_map
def forward(self, im_data, rois, im_info, gt_vec=None, gt_boxes=None, gt_ishard=None, dontcare_areas=None): im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) #TODO: Use im_data and rois as input # compute cls_prob which are N_roi X 20 scores # Checkout faster_rcnn.py for inspiration if self.training: label_vec = network.np_to_variable(gt_vec, is_cuda=True) label_vec = label_vec.view(self.n_classes, -1) self.cross_entropy = self.build_loss(cls_prob, label_vec) return cls_prob
def forward(self, im_data, rois, im_info, gt_vec=None, gt_boxes=None, gt_ishard=None, dontcare_areas=None): im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) # TODO: Use im_data and rois as input # compute cls_prob which are N_roi X 20 scores # Checkout faster_rcnn.py for inspiration features = self.features(im_data) # compute SPP and fc6, fc7 rois_var = network.np_to_variable(rois, is_cuda=True) pooled_features = self.roi_pool(features, rois_var) x = pooled_features.view(pooled_features.size()[0], -1) x = self.classifier(x) # compute classification stream and detection stream reg_cls_score = self.score_cls(x) reg_cls_prob = F.softmax(reg_cls_score, dim=1) det_score = self.score_det(x) det_prob = F.softmax(det_score, dim=0) # combine region scores and detection cls_prob = torch.mul(reg_cls_prob, det_prob) #if self.training: if gt_vec is not None: label_vec = network.np_to_variable(gt_vec, is_cuda=True) label_vec = label_vec.view(self.n_classes, -1) self.cross_entropy = self.build_loss(cls_prob, label_vec) return cls_prob
def remove_bad_roi(gt_objects): """remove the gt rois with heights or weights which are smaller than 1""" check_gt = gt_objects[:, :4] width_gt = (check_gt[:, 2] - check_gt[:, 0]) height_gt = (check_gt[:, 3] - check_gt[:, 1]) gt_objects_without_error = gt_objects[np.where( (width_gt > 1) * (height_gt > 1))] zeros = np.zeros((gt_objects_without_error.shape[0], 1), dtype=gt_objects.dtype) object_rois_gt = np.hstack((zeros, gt_objects_without_error[:, :4])) object_rois_gt = network.np_to_variable(object_rois_gt, is_cuda=True) object_rois = object_rois_gt return object_rois
def forward(self, im_data, rois, bbox_3d_targets=None, bbox_loss_3d_weights=None, labels=None): im_data = network.np_to_variable(im_data, is_cuda=True) #features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas) #im_data = im_data.permute(0, 3, 1, 2) features = self.features(im_data) #rois = boxes #pdb.set_trace() if self.training: roi_data = self.proposal_target_layer(rois, labels, bbox_3d_targets, bbox_loss_3d_weights) rois = roi_data[0] else: rois = network.np_to_variable(rois, is_cuda=True) #pdb.set_trace() # roi pool pooled_features = self.roi_pool(features, rois) x = pooled_features.view(pooled_features.size()[0], -1) x = self.fc6(x) x = F.dropout(x, training=self.training) x = self.fc7(x) x = F.dropout(x, training=self.training) cls_score = self.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = self.bbox_fc(x) #pdb.set_trace() if self.training: self.cross_entropy, self.loss_box = self.build_loss( cls_score, bbox_pred, roi_data) return cls_prob, bbox_pred, rois
def forward(self, im_data, rois, im_info, gt_vec=None, gt_boxes=None, gt_ishard=None, dontcare_areas=None): # if rois.shape[0] > 256 and (self.training != True): if rois.shape[0] > 64: rois = rois[:64,:] # print("after clip{0}".format(rois.shape)) # print(im_data.shape) # print(rois.shape) im_data = network.np_to_variable(im_data, is_cuda=True) im_data = im_data.permute(0, 3, 1, 2) # print im_data.shape #TODO: Use im_data and rois as input # compute cls_prob which are N_roi X 20 scores # Checkout faster_rcnn.py for inspiration feature = self.features(im_data) rois = network.np_to_variable(rois, is_cuda=True) # print("feature {0}".format(feature)) # debug_draw(im_data, rois) # print("rois_shape{0}".format(rois.shape)) pooled_features = self.roi_pool(feature, rois) # print("pooled_features range {0}-{1}".format(pooled_features.min(),pooled_features.max())) x = pooled_features.view(pooled_features.size()[0], -1) x = self.classifier_share(x) # print("classifier_share_min {0}-{1}".format(x.min(), x.max())) output_c = self.classifier_c(x) output_d = self.classifier_d(x) pred_score = output_c*output_d cls_prob = pred_score.sum(0) # print("shape of out put {0} and {1}".format(output_c1.size(), output_d1.size())) if cls_prob.max().data.cpu().numpy() > 1 + 1e-3: print("cls_error") if self.training: label_vec = network.np_to_variable(gt_vec, is_cuda=True) label_vec = label_vec.squeeze() self.cross_entropy = -self.build_loss(cls_prob, label_vec) return cls_prob, pred_score
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None): im_data = network.np_to_variable(im_data, is_cuda=False) im_data = im_data.permute(0, 3, 1, 2) features = self.features(im_data) rpn_conv1 = self.conv1(features) # rpn score rpn_cls_score = self.score_conv(rpn_conv1) rpn_cls_score_reshape = self.reshape_layer(rpn_cls_score, 2) rpn_cls_prob = F.softmax(rpn_cls_score_reshape) #变成概率了!注意区分score和prob rpn_cls_prob_reshape = self.reshape_layer( rpn_cls_prob, len(self.anchor_scales) * 3 * 2) # rpn boxes rpn_bbox_pred = self.bbox_conv(rpn_conv1) # proposal layer cfg_key = 'TRAIN' if self.training else 'TEST' rois = self.proposal_layer( rpn_cls_prob_reshape, rpn_bbox_pred, im_info, #此处的rois是anchor按照bbox_pred进行了转换的后的按照fg概率大小从高到低返回的top_k cfg_key, self._feat_stride, self.anchor_scales) # print("region proposal",len(rois)) #此处返回300+个region proposals # generating training labels and build the rpn loss if self.training: assert gt_boxes is not None # print("gt_boxes",gt_boxes) #此处的gt_boxes仍使用绝对坐标 rpn_data = self.anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, self._feat_stride, self.anchor_scales) self.cross_entropy, self.loss_box = self.build_loss( rpn_cls_score_reshape, rpn_bbox_pred, rpn_data) return features, rois
def proposal_target_layer(object_rois, gt_objects, gt_relationships, n_classes_obj, is_training=False): """ ---------- object_rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2], Variable, cuda phrase_rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2], Variable, cuda gt_objects: (G_obj, 5) [x1 ,y1 ,x2, y2, obj_class] int, tensor gt_relationships: (G_obj, G_obj) [pred_class] int (-1 for no relationship), tensor gt_regions: (G_region, 4+40) [x1, y1, x2, y2, word_index] (-1 for padding), tensor # gt_ishard: (G_region, 4+40) {0 | 1} 1 indicates hard # dontcare_areas: (D, 4) [ x1, y1, x2, y2] n_classes_obj n_classes_pred is_training to indicate whether in training scheme ---------- Returns ---------- rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2] labels: (1 x H x W x A, 1) {0,1,...,_num_classes-1} bbox_targets: (1 x H x W x A, K x4) [dx1, dy1, dx2, dy2] bbox_inside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss bbox_outside_weights: (1 x H x W x A, Kx4) 0, 1 masks for the computing loss """ object_rois = object_rois.data.cpu().numpy() object_labels, object_rois, bbox_targets_object, bbox_inside_weights_object, bbox_outside_weights_object, \ phrase_labels, phrase_rois, \ mat_object, mat_phrase, keep_inds = \ proposal_target_layer_py(object_rois, gt_objects, gt_relationships, n_classes_obj, is_training) # print labels.shape, bbox_targets.shape, bbox_inside_weights.shape if is_training: object_labels = network.np_to_variable(object_labels, is_cuda=True, dtype=torch.LongTensor) bbox_targets_object = network.np_to_variable(bbox_targets_object, is_cuda=True) bbox_inside_weights_object = network.np_to_variable(bbox_inside_weights_object, is_cuda=True) bbox_outside_weights_object = network.np_to_variable(bbox_outside_weights_object, is_cuda=True) phrase_labels = network.np_to_variable(phrase_labels, is_cuda=True, dtype=torch.LongTensor) object_rois = network.np_to_variable(object_rois, is_cuda=True) phrase_rois = network.np_to_variable(phrase_rois, is_cuda=True) return (object_rois, object_labels, bbox_targets_object, bbox_inside_weights_object, bbox_outside_weights_object), \ (phrase_rois, phrase_labels), \ mat_object, mat_phrase, keep_inds