def fit(self, traindir, image_ids=None): self.data_loader = get_dataloader(traindir, transform = self.transform, image_ids=image_ids, resize=self.resize) self.traindir = traindir self.trainids = image_ids train_loader = self.data_loader descriptors0 = np.zeros((1, 512)) descriptors1 = np.zeros((1, 512)) print("fiting...") for index, image in tqdm(enumerate(train_loader)): if self.use_cuda: image = image.cuda() for i, layer in enumerate(self.pretrained_feature_model): image = layer(image) if i in self.selected_layers: output = image[0, :].clone() output = output.view(512, output.shape[1]*output.shape[2]) output = output.transpose(0, 1) if i == self.selected_layers[0]: descriptors0 = np.vstack((descriptors0, output.cpu().detach().numpy().copy())) else: descriptors1 = np.vstack((descriptors1, output.cpu().detach().numpy().copy())) del output descriptors0 = descriptors0[1:] descriptors1 = descriptors1[1:] #计算descriptor均值,并将其降为0 descriptors0_mean = sum(descriptors0)/len(descriptors0) descriptors0_mean_tensor = torch.FloatTensor(descriptors0_mean) descriptors1_mean = sum(descriptors1)/len(descriptors1) descriptors1_mean_tensor = torch.FloatTensor(descriptors1_mean) pca0 = PCA(n_components=1) pca0.fit(descriptors0) trans_vec0 = pca0.components_[0] pca1 = PCA(n_components=1) pca1.fit(descriptors1) trans_vec1 = pca1.components_[0] return (trans_vec0, trans_vec1), [descriptors0_mean_tensor, descriptors1_mean_tensor]
def fit(self, traindir, image_ids=None): self.data_loader = get_dataloader(traindir, transform=self.transform, image_ids=image_ids, resize=self.resize) self.traindir = traindir self.trainids = image_ids train_loader = self.data_loader nsamples = 0 descriptors = np.zeros((1, self.feature_dim)) print("fiting...") for index, image in tqdm(enumerate(train_loader)): if self.use_cuda: image = image.cuda() output = self.pretrained_feature_model(image)[0, :] #output = F.normalize(output, p=2, dim=0) #output = output.view(self.feature_dim, output.shape[1] * output.shape[2]) nsamples += output.shape[1] output = self.pretrained_feature_model(image) rois = load_rois(train_loader.dataset.img_paths[index]) if rois is not None: output = self.roi_pool(output, [rois]) output = output.view( self.feature_dim, output.shape[0] * output.shape[2] * output.shape[3]) output = output.transpose(0, 1) descriptors = np.vstack( (descriptors, output.detach().cpu().numpy().copy())) del output print("nsamples: ", nsamples) descriptors = descriptors[1:] #计算descriptor均值,并将其降为0 descriptors_mean = sum(descriptors) / len(descriptors) descriptors_mean_tensor = torch.FloatTensor(descriptors_mean) pca = PCA() #n_components=1 pca.fit(descriptors) trans_vec = pca.components_[0] return trans_vec, descriptors_mean_tensor
def fit(self, traindir, image_ids=None): self.data_loader = get_dataloader(traindir, transform=self.transform, image_ids=image_ids, resize=self.resize) self.traindir = traindir self.trainids = image_ids train_loader = self.data_loader nsamples = 0 descriptors = np.zeros((1, self.feature_dim)) print("fiting...") for index, image in tqdm(enumerate(train_loader)): if self.use_cuda: image = image.cuda() # print(train_loader.dataset.img_paths[index]) output = self.pretrained_feature_model(image) if self.upsample is not None: output = self.upsample(output)[0, :] else: output = output[0, :] output = output.view(self.feature_dim, output.shape[1] * output.shape[2]) nsamples += output.shape[1] output = output.transpose(0, 1) descriptors = np.vstack( (descriptors, output.detach().cpu().numpy().copy())) del output print("nsamples: ", nsamples) descriptors = descriptors[1:] #计算descriptor均值,并将其降为0 descriptors_mean = sum(descriptors) / len(descriptors) descriptors_mean_tensor = torch.FloatTensor(descriptors_mean) self.SC = SphereCluster() trans_vec = self.SC.fit(descriptors) return trans_vec, descriptors_mean_tensor
def co_locate(self, testdir, savedir, trans_vector, descriptor_mean_tensor, image_ids=None): is_imageids_same = (set(self.trainids) == set(image_ids)) if ( image_ids is not None and self.trainids is not None) else True if (testdir == self.traindir) and is_imageids_same: test_loader = self.data_loader else: test_loader = get_dataloader(testdir, transform=self.transform, image_ids=image_ids, resize=self.resize) if self.use_cuda: descriptor_mean_tensor = descriptor_mean_tensor.cuda() print("colocate...") result_file = open(osp.join(savedir, "result.txt"), "w") for index, image in tqdm(enumerate(test_loader)): img_id = osp.basename( test_loader.dataset.img_paths[index]).split(".")[0] origin_image = cv2.imread(test_loader.dataset.img_paths[index]) origin_height, origin_width = origin_image.shape[:2] if self.use_cuda: image = image.cuda() # featmap = self.pretrained_feature_model(image)[0, :] # #featmap = F.normalize(featmap, p=2, dim=0) # h, w = featmap.shape[1], featmap.shape[2] # featmap = featmap.view(self.feature_dim, -1).transpose(0, 1) output = self.pretrained_feature_model(image) rois = load_rois(test_loader.dataset.img_paths[index]) if rois is not None: output = self.roi_pool(output, [rois]) output = output.view( self.feature_dim, output.shape[0] * output.shape[2] * output.shape[3]) output = output.transpose(0, 1) featmap = output.cuda() featmap -= descriptor_mean_tensor.repeat(featmap.shape[0], 1) features = featmap.detach().cpu().numpy() del featmap #P = np.dot(trans_vector, features.transpose()).reshape(h, w) #P = do_crf(origin_image, P) P = np.dot(trans_vector, features.transpose()) max_score_idx = np.argmax(P) bboxes = [] for i in range(P.shape[0]): if P[i] > 0 and i == max_score_idx: bbox = rois[i, :] bboxes.append(bbox) mask = np.zeros((1, origin_height, origin_width), dtype=np.uint16) # sys.exit() # mask = self.max_conn_mask(P, origin_height, origin_width) # bboxes = self.get_bboxes(mask) mask_3 = np.concatenate((np.zeros((2, origin_height, origin_width), dtype=np.uint16), mask * 255), axis=0) #将原图同mask相加并展示 mask_3 = np.transpose(mask_3, (1, 2, 0)) mask_3 = origin_image + mask_3 mask_3[mask_3[:, :, 2] > 254, 2] = 255 mask_3 = np.array(mask_3, dtype=np.uint8) #draw bboxes if len(bboxes) == 0: result_file.write(img_id + "\n") for (x, y, w, h) in bboxes: cv2.rectangle(mask_3, (x, y), (x + w, y + h), (0, 255, 0), 2) result_file.write(img_id + " {} {} {} {}\n".format(x, y, x + w, y + h)) cv2.imwrite(osp.join(savedir, img_id + ".jpg"), mask_3) result_file.close()
def co_locate(self, testdir, savedir, trans_vectors, descriptor_mean_tensors, image_ids=None): is_imageids_same = (set(self.trainids) == set(image_ids)) if (image_ids is not None and self.trainids is not None) else True if (testdir == self.traindir) and is_imageids_same: test_loader = self.data_loader else: test_loader = get_dataloader(testdir, transform=self.transform, image_ids=image_ids, resize=self.resize) result_file = open(osp.join(savedir, "result.txt"), "w") print("colocate...") for index, image in tqdm(enumerate(test_loader)): img_id = osp.basename(test_loader.dataset.img_paths[index]).split(".")[0] origin_image = cv2.imread(test_loader.dataset.img_paths[index]) origin_height, origin_width = origin_image.shape[:2] if self.use_cuda: image = image.cuda() descriptor_mean_tensors[0] = descriptor_mean_tensors[0].cuda() descriptor_mean_tensors[1] = descriptor_mean_tensors[1].cuda() for i, layer in enumerate(self.pretrained_feature_model): image = layer(image) if i in self.selected_layers: featmap = image[0, :].clone() if i == self.selected_layers[0]: h0, w0 = featmap.shape[1], featmap.shape[2] featmap = featmap.view(512, -1).transpose(0, 1) featmap -= descriptor_mean_tensors[0].repeat(featmap.shape[0], 1) features0 = featmap.cpu().detach().numpy() else: h1, w1 = featmap.shape[1], featmap.shape[2] featmap = featmap.view(512, -1).transpose(0, 1) featmap -= descriptor_mean_tensors[1].repeat(featmap.shape[0], 1) features1 = featmap.cpu().detach().numpy() del featmap P0 = np.dot(trans_vectors[0], features0.transpose()).reshape(h0, w0) P1 = np.dot(trans_vectors[1], features1.transpose()).reshape(h1, w1) mask0 = self.max_conn_mask(P0, origin_height, origin_width) mask1 = self.max_conn_mask(P1, origin_height, origin_width) mask = mask0+mask1 mask[mask==1] = 0 mask[mask==2] = 1 #get bounding boxes bboxes = self.get_bboxes(mask) #mask = mask1 mask_3 = np.concatenate( (np.zeros((2, origin_height, origin_width), dtype=np.uint16), mask * 255), axis=0) #将原图同mask相加并展示 mask_3 = np.transpose(mask_3, (1, 2, 0)) mask_3 = origin_image + mask_3 mask_3[mask_3[:, :, 2] > 254, 2] = 255 mask_3 = np.array(mask_3, dtype=np.uint8) #draw bboxes if len(bboxes) == 0: result_file.write(img_id + "\n") for (x, y, w, h) in bboxes: cv2.rectangle(mask_3, (x,y), (x+w, y+h), (0, 255, 0), 2) result_file.write(img_id + " {} {} {} {}\n".format(x, y, x+w, y+h)) # print("save the " + str(index) + "th image. ") cv2.imwrite(osp.join(savedir, img_id + ".jpg"), mask_3)
def co_locate(self, testdir, savedir, trans_vector, descriptor_mean_tensor, image_ids=None): is_imageids_same = (set(self.trainids) == set(image_ids)) if ( image_ids is not None and self.trainids is not None) else True if (testdir == self.traindir) and is_imageids_same: test_loader = self.data_loader else: test_loader = get_dataloader(testdir, transform=self.transform, image_ids=image_ids, resize=self.resize) if self.use_cuda: descriptor_mean_tensor = descriptor_mean_tensor.cuda() print("colocate...") result_file = open(osp.join(savedir, "result.txt"), "w") for index, image in tqdm(enumerate(test_loader)): img_id = osp.basename( test_loader.dataset.img_paths[index]).split(".")[0] origin_image = cv2.imread(test_loader.dataset.img_paths[index]) origin_height, origin_width = origin_image.shape[:2] #print(test_loader.dataset.img_paths[index]) if self.use_cuda: image = image.cuda() featmap = self.pretrained_feature_model(image) if self.upsample is not None: featmap = self.upsample(featmap) else: featmap = featmap #get mask featmap = featmap[0, :] c, h, w = featmap.shape featmap = featmap.view(self.feature_dim, featmap.shape[1] * featmap.shape[2]) featmap = featmap.transpose(0, 1) featmap = featmap.detach().cpu().numpy() labled = self.SC.predict(featmap) mask = np.zeros((1, h * w)) #print(labled.shape) mask[0, np.where(labled == self.SC.main_id)] = 1 mask = mask.reshape(h, w) mask = self.max_conn_mask(mask, origin_height, origin_width) # mask = cv2.resize(mask, # (origin_width, origin_height), # interpolation=cv2.INTER_NEAREST) # mask = np.array(mask, dtype=np.uint16).reshape(1, origin_height, origin_width) #P = do_crf(origin_image, P) bboxes = self.get_bboxes(mask) mask_3 = np.concatenate((np.zeros((2, origin_height, origin_width), dtype=np.uint16), mask * 255), axis=0) #将原图同mask相加并展示 mask_3 = np.transpose(mask_3, (1, 2, 0)) mask_3 = origin_image + mask_3 mask_3[mask_3[:, :, 2] > 254, 2] = 255 mask_3 = np.array(mask_3, dtype=np.uint8) #draw bboxes if len(bboxes) == 0: result_file.write(img_id + "\n") for (x, y, w, h) in bboxes: cv2.rectangle(mask_3, (x, y), (x + w, y + h), (0, 255, 0), 2) result_file.write(img_id + " {} {} {} {}\n".format(x, y, x + w, y + h)) cv2.imwrite(osp.join(savedir, img_id + ".jpg"), mask_3) result_file.close()