def __getitem__(self, index): image, labels = self.dataset[index] np_arr = np.asarray(image) ptimage = PTImage.from_numpy_array(np_arr) objects = [] for t in labels: box = Box.from_xywh(t['bbox']) obj_type = self.coco.loadCats([t['category_id']])[0]['name'] # convert segmentation to polygon using the pycocotools # note the segmentation could in one of several formats, for example the custom coco RLE, # to convert the RLE back to polygon is bit of a pain so I will just ignore those right now # according the COCO site, most of the data is in polygon form (not sure why theres a discrepency?) # and I'd rather not store 2D binary masks with every object. polygon = t.get('segmentation') # reshape to 2d poly, assume its convex hull? polys = [] if polygon and isinstance(polygon, list): for seg in polygon: polys.append( Polygon( np.array(seg).reshape((int(old_div(len(seg), 2)), 2)))) objects.append(Object(box, obj_type=obj_type, polygons=polys)) frame = Frame.from_image_and_objects(ptimage, objects) return frame
def from_dict(cls, objdict): #print objdict box = Box(objdict['box']['min'][0], objdict['box']['max'][0], objdict['box']['min'][1], objdict['box']['max'][1]) uid = objdict['unique_identifier'] obj_type = objdict['attributes']['type'] return cls(box, uid, obj_type)
def find_negative_crop(self, frame, objects): # pick a random crop, check that it does not overlap with an existing target # TODO, this is inefficient, fix this algorithm later frame_size = frame.image.get_wh() max_attempts = 10 for i in range(0, max_attempts): randcx = random.randrange(self.crop_size[0] / 2, frame_size[0] - self.crop_size[0] / 2) randcy = random.randrange(self.crop_size[1] / 2, frame_size[1] - self.crop_size[1] / 2) new_box = Box(randcx - self.crop_size[0] / 2, randcy - self.crop_size[1] / 2, randcx + self.crop_size[0] / 2, randcy + self.crop_size[1] / 2) box_found = all( Box.intersection(x.box, new_box) is None for x in objects) if box_found: return new_box return None
def test_tensor_to_boxes(self): box0 = [0, 0, 1, 1] box1 = [2, 2, 3, 3] t1 = torch.Tensor([box0, box1]) boxes = Box.tensor_to_boxes(t1) tboxes0 = boxes[0].to_single_array() tboxes1 = boxes[1].to_single_array() self.assertTrue(all(box0[i] == tboxes0[i] for i in range(len(tboxes0)))) self.assertTrue(all(box1[i] == tboxes1[i] for i in range(len(tboxes1))))
def __getitem__(self,index): pil_img,label = self.dataset[index] # assert 2D here np_arr = np.asarray(pil_img) np_arr = np.expand_dims(np_arr, axis=2) # create the PTImage, and object that span the frame # add extra channel dimension ptimage = PTImage.from_numpy_array(np_arr) obj = Object(Box(0,0,pil_img.size[0],pil_img.size[1])) frame = Frame.from_image_and_objects(ptimage,[obj]) return frame
def perturb_frame(frame,params): dims = frame.image.get_hw() rand_affine = RandomPerturber.generate_random_affine(dims/2,dims,params) perturbed_frame = Frame(frame.image_path) perturbed_frame.image = rand_affine.apply_to_image(frame.image,dims) for i,obj in enumerate(frame.objects): # filter out completely out of bound objects perturbed_obj_box = rand_affine.apply_to_box(obj.box) perturbed_polygons = rand_affine.apply_to_polygons(obj.polygons) if Box.intersection(perturbed_obj_box,perturbed_frame.image.get_bounding_box()) is not None: obj_copy = copy.deepcopy(obj) obj_copy.box = perturbed_obj_box obj_copy.polygons = perturbed_polygons perturbed_frame.objects.append(obj_copy) return perturbed_frame
def apply_affine_to_frame(frame, affine, output_size): perturbed_frame = Frame(frame.image_path) perturbed_frame.image = affine.apply_to_image(frame.image, output_size) for i, obj in enumerate(frame.objects): # filter out completely out of bound objects perturbed_obj_box = affine.apply_to_box(obj.box) perturbed_polygons = affine.apply_to_polygons(obj.polygons) if Box.intersection( perturbed_obj_box, perturbed_frame.image.get_bounding_box()) is not None: obj_copy = copy.deepcopy(obj) obj_copy.box = perturbed_obj_box obj_copy.polygons = perturbed_polygons perturbed_frame.objects.append(obj_copy) return perturbed_frame
def __load_frames(self, cars_dir, labels_mat): print('Loading Stanford Cars Frames') labels = scipy.io.loadmat(labels_mat)['annotations'][0] # load frames with labels for label in labels: if len(label) == 5: xmin, ymin, xmax, ymax, path = label elif len(label) == 6: xmin, ymin, xmax, ymax, _, path = label else: assert False, 'unable to parse label!' box = Box(float(xmin[0][0]), float(ymin[0][0]), float(xmax[0][0]), float(ymax[0][0])) obj = Object(box, obj_type='car') image_path = os.path.join(cars_dir, path[0]) self.frames.append(Frame(image_path, [obj]))
def unapply_to_box(self,box): transformed_box = np.dot(self.inverse,box.augmented_matrix()) return Box.from_augmented_matrix(transformed_box)
def to_object(self): box_format = [self.bbox[0], self.bbox[1], self.bbox[2], self.bbox[3]] return Object(Box.from_single_array(box_format), self.track_idx, self.type)
def load_train(self): frame1, frame2, neg_box, pos_box, anchor_box = None, None, None, None, None # TODO, this should probably break if never find anything for a while while neg_box is None: indices = random.sample(self.frame_ids, 2) frame1, frame2 = [self.source[x] for x in indices] frame1_objs = filter(lambda x: x.obj_type in self.obj_types, frame1.get_objects()) frame2_objs = filter(lambda x: x.obj_type in self.obj_types, frame2.get_objects()) # get random pos boxes pos_box = random.choice(frame1_objs).box anchor_box = random.choice(frame2_objs).box # find random neg crop neg_box = self.find_negative_crop(frame1, frame1_objs) perturbed_pos_box = RandomPerturber.perturb_crop_box( pos_box, self.perturbations) affine_crop0 = crop_image_resize(frame1.image, perturbed_pos_box, self.crop_size) pos_crop = affine_crop0.apply_to_image(frame1.image, self.crop_size) affine_crop1 = crop_image_resize(frame2.image, anchor_box, self.anchor_size) anchor_crop = affine_crop1.apply_to_image(frame2.image, self.anchor_size) affine_crop2 = crop_image_resize(frame1.image, neg_box, self.crop_size) neg_crop = affine_crop2.apply_to_image(frame1.image, self.crop_size) # neg_crop.visualize(display=True,title='neg') # now find all the boxes that intersect with the perturbed_pos_box intersected_boxes = [] for obj in filter(lambda x: x.obj_type in self.obj_types, frame1.get_objects()): if Box.intersection(obj.box, perturbed_pos_box) is not None: intersected_boxes.append(obj.box) intersected_boxes = list( map(lambda x: affine_crop0.apply_to_box(x), intersected_boxes)) # test display # disp_frame = Frame.from_image_and_objects(pos_crop,[Object(box_crop)]) # disp_frame.visualize(display=True,title='pos frame') # pos_crop.visualize(display=True,title='pos crop') pos = torch.Tensor( pos_crop.to_order_and_class( Ordering.CHW, ValueClass.FLOAT01).get_data().astype(float)) neg = torch.Tensor( neg_crop.to_order_and_class( Ordering.CHW, ValueClass.FLOAT01).get_data().astype(float)) anchor = torch.Tensor( anchor_crop.to_order_and_class( Ordering.CHW, ValueClass.FLOAT01).get_data().astype(float)) # pos_map = generate_response_map_from_boxes(pos_crop.get_hw(),intersected_boxes) # PTImage.from_2d_numpy(pos_map).visualize(display=True,title='pos frame') pos_map = torch.Tensor( generate_response_map_from_boxes(pos_crop.get_hw(), intersected_boxes)) neg_map = torch.Tensor( generate_response_map_from_boxes(pos_crop.get_hw())) data = [pos, neg, anchor] target = [pos_map, neg_map, anchor] return TripletDetectionSample(data, target)
def __convert_to_objects(self, boxes, classes): boxlist = Box.tensor_to_boxes(boxes.cpu()) objects = [] for x, y in zip(*(boxlist, classes.cpu().numpy())): objects.append(Object(x, 0, self.class_lookup[y])) return objects