def next(self): # just grab the next random frame frame = self.source[random.choice(self.frame_ids)] # frame.show_image_with_labels() # get a random crop object crop_objs = filter(lambda x: not self.obj_types or x.obj_type in self.obj_types,frame.get_objects()) # print 'Num crop objs in sample: {0}'.format(len(crop_objs)) crop = random.choice(crop_objs) # print 'crop_box: ' + str(crop.box) # frame.show_image_with_labels() # 1) Randomly perturb crop box (scale and translation) transformed_box = RandomPerturber.perturb_crop_box(crop.box,{}) # 2) Take crop, todo, change to using center crop to preserve aspect ratio # check if the affine is identity within some toleranc, then don't bother applying affine = Affine() scalex = float(self.crop_size[0])/transformed_box.edges()[0] scaley = float(self.crop_size[1])/transformed_box.edges()[1] affine.append(Affine.translation(-transformed_box.xy_min())) affine.append(Affine.scaling((scalex,scaley))) transformed_image = affine.apply_to_image(frame.image,self.crop_size) # transformed_image.visualize(title='transformed image') # 3) Randomly perturb cropped image (rotation only) chw_image = transformed_image.to_order_and_class(Ordering.CHW,ValueClass.FLOAT01) # chw_image.visualize(title='chw_image') sample = AutoEncoderSample([torch.Tensor(chw_image.get_data().astype(float))], [torch.Tensor(chw_image.get_data().astype(float))]) return sample
def next(self): # 1) pick a random frame frame = self.source[random.choice(self.frame_ids)] # 2) generate a random perturbation and perturb the frame, this also perturbs the objects including segementation polygons perturbed_frame = RandomPerturber.perturb_frame(frame, {}) # 3) scale the perturbed frame to the desired input resolution crop_affine = resize_image_center_crop(perturbed_frame.image, self.crop_size) perturbed_frame = apply_affine_to_frame(perturbed_frame, crop_affine, self.crop_size) # visualize the perturbed_frame along with its perturbed objects and masks here # perturbed_frame.visualize(display=True) # 3) for each object type, produce a merged binary mask over the frame, # this results in a w x h x k target map where k is the number of classes in consideration # for now we will use the pycocotool's merge and polygon mapping functions since they are implemented in c # although I prefer to not have this dependency # loop over all object type and create a binary mask for each # declare a np array of whk masks = np.zeros(perturbed_frame.image.get_hw().tolist() + [len(self.obj_types_to_ids)]) for k, v in self.obj_types_to_ids.items(): # a) for all objs in the frame that belong to this type, create a merged mask polygons = [] for obj in perturbed_frame.get_objects(): if obj.obj_type == k: polygons.extend(obj.polygons) masks[:, :, v] = Polygon.create_mask(polygons, perturbed_frame.image.get_wh()[0], perturbed_frame.image.get_wh()[1]) # 4) create the segmentation sample chw_image = perturbed_frame.image.to_order_and_class( Ordering.CHW, ValueClass.FLOAT01) # transpose the mask chw_mask = np.transpose(masks, axes=(2, 0, 1)) # chw_image.visualize(title='chw_image') sample = SegmentationSample( [torch.Tensor(chw_image.get_data().astype(float))], [torch.Tensor(chw_mask)], self.ids_to_obj_types) return sample
def __next__(self): # 1) pick a random frame frame = self.source[random.choice(self.frame_ids)] # 2) generate a random perturbation and perturb the frame perturb_params = { 'translation_range': [-0.1, 0.1], 'scaling_range': [0.9, 1.1] } perturbed_frame = RandomPerturber.perturb_frame(frame, perturb_params) crop_affine = resize_image_center_crop(perturbed_frame.image, self.crop_size) output_size = [self.crop_size[1], self.crop_size[0]] perturbed_frame = apply_affine_to_frame(perturbed_frame, crop_affine, output_size) # perturbed_frame.visualize(title='chw_image',display=True) # 3) encode the objects into targets with size that does not exceed max_objects # if there are more objects than max_objects, the remaining ones are dropped. # vector of length max_objects # each comp vector has the form [class(1),bbox(4)] # a padding target is used to represent a non-existent object, this has the form [-1,-1,-1,-1,-1] # create the padding vector class_encoding, class_decoding = dict(), dict() padvec = [np.array([-1] * 5) for i in range(self.max_objects)] for i, obj in enumerate(perturbed_frame.objects[ 0:min(self.max_objects, len(perturbed_frame.objects))]): if obj.obj_type not in self.obj_types: continue if obj.obj_type not in class_encoding: code = len(class_encoding) class_encoding[obj.obj_type] = code class_decoding[code] = obj.obj_type box_coords = obj.box.to_single_array() padvec[i] = np.concatenate( (np.array([class_encoding[obj.obj_type]]), box_coords), axis=0) chw_image = perturbed_frame.image.to_order_and_class( Ordering.CHW, ValueClass.FLOAT01) sample = MultiObjectDetectionSample( [torch.Tensor(chw_image.get_data().astype(float))], [torch.Tensor(padvec)], class_decoding) return sample
def __next__(self): # randomly pick 3 frames in a row num_frames_in_src = len(self.source) # print("Number of frames in src {}".format(num_frames_in_src)) # 1) choose the first frame from 0 -> N-2 frames = [] first_frame = random.randint(0, num_frames_in_src - self.num_frames) for i in range(0, self.num_frames): frames.append(self.source[first_frame + i]) # 2) generate a random perturbation and perturb all the frames # note, need to apply same perts to all frames perturb_params = { 'translation_range': [0.0, 0.0], 'scaling_range': [1.0, 1.0] } perturbed_frames = [] for f in frames: perturbed_frame = RandomPerturber.perturb_frame(f, perturb_params) crop_affine = resize_image_center_crop(perturbed_frame.image, self.crop_size) output_size = [self.crop_size[1], self.crop_size[0]] perturbed_frame = apply_affine_to_frame(perturbed_frame, crop_affine, output_size) perturbed_frames.append(perturbed_frame) # perturbed_frame.visualize(title='chw_image',display=True) # 3) prepare tensors # -make a tensor with a stack of 3 frame # -add the calibration to the target input_tensors = [] calib_mats = [] for f in perturbed_frames: img = f.image.to_order_and_class(Ordering.CHW, ValueClass.FLOAT01) input_tensors.append(torch.Tensor(img.get_data().astype(float))) calib_mats.append(torch.Tensor(f.calib_mat)) # the input is now 3xCxWxH sample = SequenceVideoSample([torch.stack(input_tensors, dim=0)], [torch.stack(calib_mats)]) return sample
def load_train(self): frame1, frame2, neg_box, pos_box, anchor_box = None, None, None, None, None # TODO, this should probably break if never find anything for a while while neg_box is None: indices = random.sample(self.frame_ids, 2) frame1, frame2 = [self.source[x] for x in indices] frame1_objs = filter(lambda x: x.obj_type in self.obj_types, frame1.get_objects()) frame2_objs = filter(lambda x: x.obj_type in self.obj_types, frame2.get_objects()) # get random pos boxes pos_box = random.choice(frame1_objs).box anchor_box = random.choice(frame2_objs).box # find random neg crop neg_box = self.find_negative_crop(frame1, frame1_objs) perturbed_pos_box = RandomPerturber.perturb_crop_box( pos_box, self.perturbations) affine_crop0 = crop_image_resize(frame1.image, perturbed_pos_box, self.crop_size) pos_crop = affine_crop0.apply_to_image(frame1.image, self.crop_size) affine_crop1 = crop_image_resize(frame2.image, anchor_box, self.anchor_size) anchor_crop = affine_crop1.apply_to_image(frame2.image, self.anchor_size) affine_crop2 = crop_image_resize(frame1.image, neg_box, self.crop_size) neg_crop = affine_crop2.apply_to_image(frame1.image, self.crop_size) # neg_crop.visualize(display=True,title='neg') # now find all the boxes that intersect with the perturbed_pos_box intersected_boxes = [] for obj in filter(lambda x: x.obj_type in self.obj_types, frame1.get_objects()): if Box.intersection(obj.box, perturbed_pos_box) is not None: intersected_boxes.append(obj.box) intersected_boxes = list( map(lambda x: affine_crop0.apply_to_box(x), intersected_boxes)) # test display # disp_frame = Frame.from_image_and_objects(pos_crop,[Object(box_crop)]) # disp_frame.visualize(display=True,title='pos frame') # pos_crop.visualize(display=True,title='pos crop') pos = torch.Tensor( pos_crop.to_order_and_class( Ordering.CHW, ValueClass.FLOAT01).get_data().astype(float)) neg = torch.Tensor( neg_crop.to_order_and_class( Ordering.CHW, ValueClass.FLOAT01).get_data().astype(float)) anchor = torch.Tensor( anchor_crop.to_order_and_class( Ordering.CHW, ValueClass.FLOAT01).get_data().astype(float)) # pos_map = generate_response_map_from_boxes(pos_crop.get_hw(),intersected_boxes) # PTImage.from_2d_numpy(pos_map).visualize(display=True,title='pos frame') pos_map = torch.Tensor( generate_response_map_from_boxes(pos_crop.get_hw(), intersected_boxes)) neg_map = torch.Tensor( generate_response_map_from_boxes(pos_crop.get_hw())) data = [pos, neg, anchor] target = [pos_map, neg_map, anchor] return TripletDetectionSample(data, target)