def __init__(self, nhidden, use_kmeans_anchors=False, n_classes=None, model='vgg'): super(FasterRCNN, self).__init__() print('use {}'.format(model)) # self.n_classes = 151 # if classes is not None: # self.classes = np.asarray(classes) self.n_classes = n_classes self.rpn = RPN(use_kmeans_anchors, model=model) self.roi_pool = RoIAlign(7, 7, 1.0/16) if model == 'vgg': self.fc6 = FC(512*7*7, nhidden) elif model == 'resnet50' or model == 'resnet101': self.fc6 = FC(1024 * 7 * 7, nhidden) else: print('please choose a model') self.spacial_conv = SpacialConv_new(pooling_size=32, d_g=64) self.object_relation1 = ObjectRelationModule(nhidden, 64, 64, 64) self.fc7 = FC(nhidden, nhidden) self.object_relation2 = ObjectRelationModule(nhidden, 64, 64, 64) self.score_fc = FC(nhidden, self.n_classes, relu=False) self.bbox_fc = FC(nhidden, self.n_classes * 4, relu=False) # loss self.cross_entropy = None self.loss_box = None
def __init__(self, n_box_per_frame, step_per_layer, num_classes): super(NASGCN, self).__init__() self.n_box_per_frame = n_box_per_frame ## define roialign self.roi_pool = RoIAlign(7, 7, 1.0 / 32.0) ## define base feature transformation layer self.base_feat_transform = nn.Linear(2048, 256) self.activate = nn.LeakyReLU(inplace=True) ## define graph operations search layer k = sum(1 for i in range(step_per_layer) for n in range(1 + i)) self.num_ops = len(PRIMITIVES) self.arch_weights = nn.Linear(256, self.num_ops * k) self.graph_layer = NAS_layer(256, 16 * n_box_per_frame, step_per_layer) ## define classification layer self.cls_fc = nn.Linear(1024, num_classes)
def __init__(self, nhidden, n_object_cats, n_predicate_cats, MPS_iter, object_loss_weight, predicate_loss_weight, dropout=False, use_kmeans_anchors=True, base_model='vgg'): super(Hierarchical_Descriptive_Model, self).__init__(nhidden, n_object_cats, n_predicate_cats, MPS_iter, object_loss_weight, predicate_loss_weight, dropout) self.dropout = dropout # self.rpn = RPN(use_kmeans_anchors) self.rcnn = FasterRCNN(nhidden, use_kmeans_anchors, n_object_cats, model=base_model) # self.roi_pool_object = RoIPool(7, 7, 1.0/16) self.roi_pool_phrase = RoIAlign(7, 7, 1.0/16) if base_model == 'vgg': # self.fc6 = FC(512*7*7, nhidden) self.fc6_phrase = FC(512*7*7, nhidden, relu=True) elif base_model == 'resnet50' or base_model == 'resnet101': # self.fc6 = FC(1024*7*7, nhidden) self.fc6_phrase = FC(1024*7*7, nhidden, relu=True) else: print('please choose a model') # self.fc7 = FC(nhidden, nhidden, relu=True) self.fc7_phrase = FC(nhidden, nhidden, relu=True) self.spacial_conv = SpacialConv(pooling_size=32) if MPS_iter == 0: self.mps = None else: self.mps = Hierarchical_Message_Passing_Structure(nhidden, n_object_cats, n_predicate_cats) # the hierarchical message passing structure network.weights_normal_init(self.mps, 0.01) # self.score_fc = FC(nhidden, self.n_classes_obj, relu=False) # self.bbox_fc = FC(nhidden, self.n_classes_obj * 4, relu=False) self.score_fc_pred = FC(nhidden+64, self.n_classes_pred, relu=False) # self.bbox_pred_fc = FC(nhidden, self.n_classes_pred * 4, relu=False) # network.weights_normal_init(self.score_fc, 0.01) # network.weights_normal_init(self.bbox_fc, 0.005) network.weights_normal_init(self.score_fc_pred, 0.01)
def __init__(self, img_size): super(imgCropper, self).__init__() self.isCuda = False self.img_size = img_size self.roi_align_model = RoIAlign(img_size, img_size, 1.)
class imgCropper(nn.Module): def __init__(self, img_size): super(imgCropper, self).__init__() self.isCuda = False self.img_size = img_size self.roi_align_model = RoIAlign(img_size, img_size, 1.) def gpuEnable(self): self.roi_align_model = self.roi_align_model.cuda() self.isCuda = True def forward(self, image, roi): aligned_image_var = self.roi_align_model(image, roi) return aligned_image_var def crop_image(self, image, box, result_size): ## constraint = several box from common 1 image ishape = image.shape cur_image_var = np.reshape(image, (1, ishape[0], ishape[1], ishape[2])) cur_image_var = cur_image_var.transpose(0, 3, 1, 2) cur_image_var = cur_image_var.astype('float32') cur_image_var = Variable(torch.from_numpy(cur_image_var).float()) roi = np.copy(box) roi[:, 2:4] += roi[:, 0:2] roi = np.concatenate((np.zeros((roi.shape[0], 1)), roi), axis=1) roi = Variable(torch.from_numpy(roi).float()) if self.isCuda: cur_image_var = cur_image_var.cuda() roi = roi.cuda() self.roi_align_model.aligned_width = result_size[0] self.roi_align_model.aligned_height = result_size[1] cropped_image = self.forward(cur_image_var, roi) return cropped_image, cur_image_var def crop_several_image(self, img_list, target_list): ## constraint = one to one matching between image and target ## exception handling assert (len(target_list) == len(img_list)) ## image crop torch.cuda.synchronize() start_time = time.time() cur_images = torch.squeeze(torch.stack(img_list, 0)) torch.cuda.synchronize() print('10 image stacking time:{}'.format(time.time() - start_time)) ishape = cur_images.size() # Extract sample features and get target location sample_rois = np.array(target_list) sample_rois[:, 2:4] += sample_rois[:, 0:2] batch_num = np.reshape(np.arange(0, len(sample_rois)), (len(sample_rois), 1)) sample_rois = np.concatenate((batch_num, sample_rois), axis=1) sample_rois = Variable(torch.from_numpy(sample_rois.astype('float32'))) if self.isCuda: sample_rois = sample_rois.cuda() cur_images = cur_images.cuda() cropped_images = self.forward(cur_images, sample_rois) return cropped_images
tensor = tensor.cuda() var = Variable(tensor, requires_grad=requires_grad) return var def myshow(img): plt.imshow(img) plt.axis('off') plt.show() # test roialign from roi_align.modules.roi_align import RoIAlign img = cv2.imread('/home/dalong/testimg.png') features = np.float32(img.transpose(2, 0, 1)[np.newaxis, :, :, :]) features = to_varabile(features, True, True) bbox = [0, 10, 10, 400, 400] # [batch_ind, x1, y1, x2, y2] rois = np.array([bbox], dtype=np.float32) rois = to_varabile(rois, False, True) aligned_height = 500 aligned_width = 500 spatial_scale = 1.0 sampling_ratio = 0.0 alignlayer = RoIAlign(aligned_height, aligned_width, spatial_scale, sampling_ratio) res = alignlayer(features, rois) vis = res.cpu().detach().numpy()[0].transpose(1,2,0) print vis.shape print np.max(vis) print np.min(vis) myshow(np.uint8(vis[:,:,::-1])) myshow(np.uint8(img[bbox[2]:bbox[4],bbox[1]:bbox[3],::-1]))