def extract_features_per_image(self, ann_dict): """This function extract features of correct and wrong bounding boxes for an image. """ fn = os.path.join(ann_dict["path"],ann_dict["basename"]) img = cv2.imread(fn) if img is None: warnings.warn("The image %s does not exist in the filesystem."%fn) img_h, img_w, nch = img.shape #calculating features for each ground truth bounding box bbs = ann_dict["bbs"] for k,obj in enumerate(bbs): bb = obj["bb"] ann_dict["pos_bbs"], img_sizes_records = self.ground_truth_bounding_box_sampling(bb) for idx, sampled_bb in enumerate(ann_dict["pos_bbs"]): grad = get_features(img, bb = sampled_bb, w = self.gradient_edge, h = self.gradient_edge) feat = np.reshape(grad,(1,self.gradient_edge**2)) flipped_grad = cv2.flip(grad,1) #1 to flip horizontally flipped_feat = np.reshape(flipped_grad,(1,self.gradient_edge**2)) #shared memory among different threads, synchonization needed for avoiding race conditions with self.features_labels_lock: self.features.append(feat.astype(float)) self.features.append(flipped_feat.astype(float)) self.labels.append(POSITIVE_BB) self.labels.append(POSITIVE_BB) size_idx = img_sizes_records[idx] self.pos_sizes_records.append(size_idx) #generating negative samples by randomly extracting bounding boxes, that do not overlap enough with #the ground truths ones. neg_counter = 0 counter = 0 ann_dict["neg_bbs"] = [] max_iter = 100 while neg_counter<self.num_negatives_bbs_for_image: counter = counter + 1 if counter>max_iter: break x1 = random.randint(1,img_w) y1 = random.randint(1,img_h) x2 = random.randint(1,img_w) y2 = random.randint(1,img_h) neg_bb_candidate = (min(x1,x2),min(y1,y2),max(x1,x2),max(y1,y2)) if bounding_box_overlap_on_ground_truths(ground_truth_bbs = bbs, bb = neg_bb_candidate)<0.5: ann_dict["neg_bbs"].append(neg_bb_candidate) grad = get_features(img, bb = neg_bb_candidate, w = self.gradient_edge, h = self.gradient_edge) feat = np.reshape(grad,(1,self.gradient_edge**2)) #shared memory among different threads, synchonization needed for avoiding race condition with self.features_labels_lock: self.features.append(feat) self.labels.append(NEGATIVE_BB) neg_counter = neg_counter + 1
def extract_features_per_image(self, ann_dict): """This function extract features of correct and wrong bounding boxes for an image. """ fn = os.path.join(ann_dict["path"], ann_dict["basename"]) img = cv2.imread(fn) if img is None: warnings.warn("The image %s does not exist in the filesystem." % fn) img_h, img_w, nch = img.shape #calculating features for each ground truth bounding box bbs = ann_dict["bbs"] for k, obj in enumerate(bbs): bb = obj["bb"] ann_dict[ "pos_bbs"], img_sizes_records = self.ground_truth_bounding_box_sampling( bb) for idx, sampled_bb in enumerate(ann_dict["pos_bbs"]): grad = get_features(img, bb=sampled_bb, w=self.gradient_edge, h=self.gradient_edge) feat = np.reshape(grad, (1, self.gradient_edge**2)) flipped_grad = cv2.flip(grad, 1) #1 to flip horizontally flipped_feat = np.reshape(flipped_grad, (1, self.gradient_edge**2)) #shared memory among different threads, synchonization needed for avoiding race conditions with self.features_labels_lock: self.features.append(feat.astype(float)) self.features.append(flipped_feat.astype(float)) self.labels.append(POSITIVE_BB) self.labels.append(POSITIVE_BB) size_idx = img_sizes_records[idx] self.pos_sizes_records.append(size_idx) #generating negative samples by randomly extracting bounding boxes, that do not overlap enough with #the ground truths ones. neg_counter = 0 counter = 0 ann_dict["neg_bbs"] = [] max_iter = 100 while neg_counter < self.num_negatives_bbs_for_image: counter = counter + 1 if counter > max_iter: break x1 = random.randint(1, img_w) y1 = random.randint(1, img_h) x2 = random.randint(1, img_w) y2 = random.randint(1, img_h) neg_bb_candidate = (min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2)) if bounding_box_overlap_on_ground_truths( ground_truth_bbs=bbs, bb=neg_bb_candidate) < 0.5: ann_dict["neg_bbs"].append(neg_bb_candidate) grad = get_features(img, bb=neg_bb_candidate, w=self.gradient_edge, h=self.gradient_edge) feat = np.reshape(grad, (1, self.gradient_edge**2)) #shared memory among different threads, synchonization needed for avoiding race condition with self.features_labels_lock: self.features.append(feat) self.labels.append(NEGATIVE_BB) neg_counter = neg_counter + 1
def second_stage_training(self, wrapper_type=CUSTOM_LIBLINEAR_WRAPPER, weights_fn=None): print "Starting second stage training." fstp = FirstStagePrediction(self.first_stage_weights, self.scale_space_sizes, edge=self.gradient_edge, base_log=self.base_log, min_edge_log=self.min_edge_log, edge_log_range=self.edge_log_range, num_win_psz=NUM_WIN_PSZ) SCORE = 0 LABEL = 1 sizes_dict = dict() for sz in self.scale_space_sizes: #each key of sizes_dict correspond to a value that is a score, label tuple sizes_dict["%s" % sz] = (list(), list()) #first stage detection are performed on all the training set, then are marked as correct or #not, according to their overlapping with the ground-truth bounding boxes. #for each size, 1D features array and labels array are prepared. #second stage prediction will be 2nd_stage_score = w1_s * 1st_stage_score + w0_s #w1_s and w2_s are size dependent. print "Building second stage training training set..." for ann_key in self.annotations.keys(): ann_dict = self.annotations[ann_key] fn = os.path.join(ann_dict["path"], ann_dict["basename"]) img = cv2.imread(fn) if img is None: warnings.warn( "The image %s does not exist in the filesystem." % fn) img_h, img_w, nch = img.shape ground_truth_bbs_objs = ann_dict["bbs"] predictions = fstp.predict(img, nss=2) for pred_bbs, score, size in predictions: y = 1 if bounding_box_overlap_on_ground_truths( ground_truth_bbs_objs, pred_bbs) > 0.5 else -1 sizes_dict["%s" % size][SCORE].append(score) sizes_dict["%s" % size][LABEL].append(y) #in this loop, the training is performed for each size, w1_s and w0_s are determined. print "Learning size-specific coefficients." to_delete = [] for size_key in sorted(sizes_dict.keys(), key=lambda x: int(x)): item = sizes_dict[size_key] sizes_dict[size_key] = dict() #features array is 1-D, the only feature is the first-stage bounding-box detection score train = np.reshape(np.array(item[SCORE]), (-1, 1)) labels = np.array(item[LABEL]) #really weird thing, liblinear takes as positive label the first labels it encounters Xp, Xn = self.split_positive_and_negative_features(train, labels) train, labels = self.pack_positive_negative_features(Xp, Xn) num_pos = np.sum((labels == 1).astype(int)) print "Size %s: number training samples %s, number positive samples %s." % ( size_key, train.shape[0], num_pos) if train.shape[0] == 0: to_delete.append(size_key) print "Training set for size %s is missing. Skip this size." % size_key continue weight, bias = fit_svm(train, labels, C=100, wrapper_type=wrapper_type) #weight is 1 element array! sizes_dict[size_key]["weight"] = float(weight[0]) sizes_dict[size_key]["bias"] = float(bias) #remove the item that do not have enough candidates for a training. for key in to_delete: del sizes_dict[key] if not weights_fn is None: basedir = os.path.dirname(weights_fn) if not os.path.exists(basedir): os.makedirs(basedir) if weights_fn[-5:] != ".json": warnings.warn( "The filename for saving second stage weights does not have a json extension!" ) print "Saving second stage learning coefficients to file." sizes_json = json.dumps(sizes_dict, sort_keys=True, indent=4, separators=(',', ': ')) f = open(weights_fn, "w") f.write(sizes_json) f.close() print "Storing second stage learning coefficients in numpy array." num_sizes = len(sizes_dict.keys()) coeffs = np.zeros((num_sizes, 2)) for i, size_key in enumerate(sizes_dict.keys()): sz_dict = sizes_dict[size_key] coeffs[i, 0] = sz_dict["weight"] coeffs[i, 1] = sz_dict["bias"] self.coeffs = coeffs return sizes_dict, coeffs
def second_stage_training(self, wrapper_type = CUSTOM_LIBLINEAR_WRAPPER, weights_fn = None): print "Starting second stage training." fstp = FirstStagePrediction(self.first_stage_weights, self.scale_space_sizes, edge = self.gradient_edge, base_log = self.base_log, min_edge_log = self.min_edge_log, edge_log_range = self.edge_log_range, num_win_psz = NUM_WIN_PSZ) SCORE = 0 LABEL = 1 sizes_dict = dict() for sz in self.scale_space_sizes: #each key of sizes_dict correspond to a value that is a score, label tuple sizes_dict["%s"%sz] = (list(),list()) #first stage detection are performed on all the training set, then are marked as correct or #not, according to their overlapping with the ground-truth bounding boxes. #for each size, 1D features array and labels array are prepared. #second stage prediction will be 2nd_stage_score = w1_s * 1st_stage_score + w0_s #w1_s and w2_s are size dependent. print "Building second stage training training set..." for ann_key in self.annotations.keys(): ann_dict = self.annotations[ann_key] fn = os.path.join(ann_dict["path"],ann_dict["basename"]) img = cv2.imread(fn) if img is None: warnings.warn("The image %s does not exist in the filesystem."%fn) img_h, img_w, nch = img.shape ground_truth_bbs_objs = ann_dict["bbs"] predictions = fstp.predict(img, nss = 2) for pred_bbs, score, size in predictions: y = 1 if bounding_box_overlap_on_ground_truths(ground_truth_bbs_objs, pred_bbs) > 0.5 else -1 sizes_dict["%s"%size][SCORE].append(score) sizes_dict["%s"%size][LABEL].append(y) #in this loop, the training is performed for each size, w1_s and w0_s are determined. print "Learning size-specific coefficients." to_delete = [] for size_key in sorted(sizes_dict.keys(), key=lambda x:int(x)): item = sizes_dict[size_key] sizes_dict[size_key] = dict() #features array is 1-D, the only feature is the first-stage bounding-box detection score train = np.reshape(np.array(item[SCORE]),(-1,1)) labels = np.array(item[LABEL]) #really weird thing, liblinear takes as positive label the first labels it encounters Xp, Xn = self.split_positive_and_negative_features(train, labels) train, labels = self.pack_positive_negative_features(Xp, Xn) num_pos = np.sum((labels==1).astype(int)) print "Size %s: number training samples %s, number positive samples %s."%(size_key, train.shape[0], num_pos) if train.shape[0]==0: to_delete.append(size_key) print "Training set for size %s is missing. Skip this size." % size_key continue weight, bias = fit_svm(train, labels, C = 100, wrapper_type = wrapper_type) #weight is 1 element array! sizes_dict[size_key]["weight"] = float(weight[0]) sizes_dict[size_key]["bias"] = float(bias) #remove the item that do not have enough candidates for a training. for key in to_delete: del sizes_dict[key] if not weights_fn is None: basedir = os.path.dirname(weights_fn) if not os.path.exists(basedir): os.makedirs(basedir) if weights_fn[-5:]!=".json": warnings.warn("The filename for saving second stage weights does not have a json extension!") print "Saving second stage learning coefficients to file." sizes_json = json.dumps(sizes_dict, sort_keys=True, indent=4, separators=(',', ': ')) f = open(weights_fn,"w") f.write(sizes_json) f.close() print "Storing second stage learning coefficients in numpy array." num_sizes = len(sizes_dict.keys()) coeffs = np.zeros((num_sizes,2)) for i, size_key in enumerate(sizes_dict.keys()): sz_dict = sizes_dict[size_key] coeffs[i,0] = sz_dict["weight"] coeffs[i,1] = sz_dict["bias"] self.coeffs = coeffs return sizes_dict, coeffs