a_width = annotation.size[1] a_height = annotation.size[0] margin_y = annotation.loc[0] margin_x = annotation.loc[1] num_patches_ann = np.ceil(float(config_opt.num_patches) * a_width * a_height / total_object_area) if patch_count + num_patches_ann > config_opt.num_patches: num_patches_ann = config_opt.num_patches - patch_count y_ind = np.random.randint(0, a_height - config_opt.feature.cell.patch_size, num_patches_ann) + margin_y x_ind = np.random.randint(0, a_width - config_opt.feature.cell.patch_size, num_patches_ann) + margin_x x_ind = x_ind[:, np.newaxis, np.newaxis] + patch_pixel_map_x[np.newaxis, :, :] y_ind = y_ind[:, np.newaxis, np.newaxis] + patch_pixel_map_y[np.newaxis, :, :] frame = vivid.cvmat2array(gv.get_frame(image_ind)) patches[patch_count : patch_count + num_patches_ann] = frame[y_ind, x_ind].reshape((num_patches_ann, -1)) print "%s: %d" % (annotation.image_name, num_patches_ann) patch_count += num_patches_ann patch_means = np.mean(patches, axis=1) patches = patches - patch_means[:, np.newaxis] np.save(target_file, patches)
total_object_area)) if (patch_count + num_patches_ann > config_opt.num_patches): num_patches_ann = config_opt.num_patches - patch_count y_ind = np.random.randint(0, a_height - config_opt.feature.cell.patch_size, num_patches_ann) + margin_y x_ind = np.random.randint(0, a_width - config_opt.feature.cell.patch_size, num_patches_ann) + margin_x x_ind = x_ind[:, np.newaxis, np.newaxis] + patch_pixel_map_x[np.newaxis, :, :] y_ind = y_ind[:, np.newaxis, np.newaxis] + patch_pixel_map_y[np.newaxis, :, :] frame = vivid.cvmat2array(gv.get_frame(image_ind)) patches[patch_count:patch_count + num_patches_ann] = (frame[y_ind, x_ind].reshape( (num_patches_ann, -1))) print "%s: %d" % (annotation.image_name, num_patches_ann) patch_count += num_patches_ann patch_means = np.mean(patches, axis=1) patches = patches - patch_means[:, np.newaxis] np.save(target_file, patches)
def testRead(self): frame_uint8 = vivid.cvmat2array(self.fv.get_frame(0)) assert(np.allclose(frame_uint8[:,:,::-1], self.reference_image_uint8))
config_opts[0].image_sets_fp[options.set_type][annotation_image_ind]) annotation = config_opts[0].annotations[annotation_ind] max_size = np.max(annotation.size) max_dim = np.argmax(annotation.size) o_height = config_opts[0].object_size[0] o_width = config_opts[0].object_size[1] scaling = float(config_opts[0].object_size[max_dim]) / max_size annotation_location = [annotation.loc[0], annotation.loc[1], scaling] ss.scale = scaling gr_frame = vivid.cvmat2array(gv.get_frame(annotation_image_ind)) scaled_frame = vivid.cvmat2array(ss.get_frame(annotation_image_ind)) scaled_loc = np.array(annotation.loc) * scaling scaled_size = np.array(annotation.size) * scaling scaled_fixed_loc = (scaled_loc - (np.array(config_opts[0].object_size) - np.array(scaled_size)) / 2.0) scaled_fixed_loc = ( (scaled_fixed_loc / config_opts[0].feature.window_stride).astype('int') * config_opts[0].feature.window_stride) margin_up = -scaled_fixed_loc[0] margin_left = -scaled_fixed_loc[1]
def detect_and_write(input_images, output_images): DETECTION_THRESHOLD = -.5 config_opt = ConfigOpt(options.config_source) if options.bootstrap: svm_model_file = os.path.join(config_opt.svm_model_path, "modelb.svm") else: svm_model_file = os.path.join(config_opt.svm_model_path, "model1.svm") lm = read_liblinear_model(svm_model_file) w = lm.w[:-1] b = lm.w[-1] * lm.bias fv = vivid.ImageSource(imlist=input_images) cs = vivid.ConvertedSource(fv, target_type=vivid.cv.CV_32FC3, scale=1.0 / 255.0) gv = vivid.GreySource(cs) clusters = np.load(config_opt.dictionary_file).reshape(( config_opt.feature.cell.dictionary_size, config_opt.feature.cell.patch_size, config_opt.feature.cell.patch_size)) fs = FeatureSource(gv, config_opt.feature, clusters) all_scores = [] for fi, input_image, output_image in zip( xrange(len(input_images)), input_images, output_images): print('Image: {0}'.format(input_image)) fs.init_frame(fi) frame = vivid.cvmat2array(cs.get_frame(fi))[:, :, ::-1] while True: try: locs, scale = fs.init_next_scale() print("Processing scale: {0:.2f}\t".format(scale)) num_y, num_x = locs[0].shape scale_scores = np.empty((num_y, num_x), dtype='float32') for yind in range(0, num_y, 10): ymin = yind ymax = min(num_y, yind + 10) feas = fs.get_features_from_scale( ymin=yind, ymax=ymax, xmin=0, xmax=num_x) scale_scores[ymin:ymax, :] = (feas * w[np.newaxis, np.newaxis, :]).sum(axis=2) + b detections = scale_scores >= DETECTION_THRESHOLD for yi, xi, detection_score in zip( locs[0][detections], locs[1][detections], scale_scores[detections]): if detection_score >= 0: box_color = [1.0, 0, 0] else: box_color = [1.0, 1.0, 0] print("Detection at: y: {0}, x: {1}, s: {2:.2f}".format( yi, xi, scale)) frame = draw_bounding_box(frame, np.array([yi, xi, scale]), text="%.3f" % detection_score, color=box_color) except EndOfScales: break imsave(output_image, frame)
def init_frame(self, frame_num, margin_up=None, margin_down=None, margin_left=None, margin_right=None, scales=None): self.scales = scales self.feature_frame = frame_num self.last_scale = -1 """ this is the entry point, call this function to compute features margin is in terms of window strides """ min_offset = self.feature_opt.cell.patch_size / 2 if margin_up == None: margin_up = -min_offset if margin_down == None: margin_down = -min_offset if margin_left == None: margin_left = -min_offset if margin_right == None: margin_right = -min_offset self.margins = [margin_up, margin_down, margin_left, margin_right] frame_size = np.array([self.origin.get_frame(frame_num).height, self.origin.get_frame(frame_num).width]) self.scaled_sources = [] if self.scales == None: scale_factor = 1.0 self.scales = [] while (True): cs = vivid.CachedSource(vivid.ScaledSource( self.origin, scale=1.0 / scale_factor), cache_size=1) frame_size = np.array(vivid.cvmat2array( cs.get_frame(self.feature_frame)).shape, dtype='float32') #see if we can fit a window in this scale h_windows = ((frame_size[0] - margin_up + margin_down) / self.feature_opt.window_size[0]) w_windows = ((frame_size[1] + margin_left + margin_right) / self.feature_opt.window_size[1]) #print "h: %.2f\t%.2f" % (h_windows, w_windows) if h_windows < 1 or w_windows < 1: break self.scales.append(1.0 / scale_factor) scale_factor *= self.feature_opt.scale_step self.scaled_sources.append(cs) else: for s in scales: self.scaled_sources.append( vivid.ScaledSource(self.origin, scale=s))
config_opts[0].image_sets_fp[options.set_type][annotation_image_ind]) annotation = config_opts[0].annotations[annotation_ind] max_size = np.max(annotation.size) max_dim = np.argmax(annotation.size) o_height = config_opts[0].object_size[0] o_width = config_opts[0].object_size[1] scaling = float(config_opts[0].object_size[max_dim]) / max_size annotation_location = [annotation.loc[0], annotation.loc[1], scaling] ss.scale = scaling gr_frame = vivid.cvmat2array(gv.get_frame(annotation_image_ind)) scaled_frame = vivid.cvmat2array(ss.get_frame(annotation_image_ind)) scaled_loc = np.array(annotation.loc) * scaling scaled_size = np.array(annotation.size) * scaling scaled_fixed_loc = ( scaled_loc - (np.array(config_opts[0].object_size) - np.array(scaled_size)) / 2.0) scaled_fixed_loc = ((scaled_fixed_loc / config_opts[0].feature.window_stride).astype('int') * config_opts[0].feature.window_stride) margin_up = -scaled_fixed_loc[0]
import pathfinder import vivid from matplotlib.pyplot import imshow, show, gray # Create the reader for a list of image files iv = vivid.ImageSource(imlist=["./media/kewell1.jpg"]) # Source for converting to float and scaling to [0,1] cs = vivid.ConvertedSource(iv, vivid.cv.CV_32FC3, 1.0 / 255.0) # Source for covnerting to grayscale gs = vivid.GreySource(cs) # Source for resizing ss = vivid.ScaledSource(gs, 0.5) # Get the first image frame = cs.get_frame(0) # Get the grayscale version of the first image frame_gray = gs.get_frame(0) # Get the scaled version of the grayscale image frame_gray_scaled = ss.get_frame(0) # Convert into numpy arrays fr = vivid.cvmat2array(frame) fr_gray = vivid.cvmat2array(frame_gray) fr_gray_scaled = vivid.cvmat2array(frame_gray_scaled)
import pathfinder import vivid import numpy as np # Create the reader for a list of image files iv = vivid.ImageSource(imlist=['./media/kewell1.jpg']) cs = vivid.ConvertedSource(iv, vivid.cv.CV_32FC3, 1.0 / 255.0) sqs = vivid.SquaredSource(cs) sq_frame = vivid.cvmat2array(sqs.get_frame(0)) reference_sq_frame = vivid.cvmat2array(iv.get_frame(0)).astype('float32') / 255.0 reference_sq_frame *= reference_sq_frame print "Check: " + str(np.allclose(reference_sq_frame, sq_frame))