def extract_features_per_image(self, ann_dict):
     """This function extract features of correct and wrong bounding boxes for an image.
     """
     fn = os.path.join(ann_dict["path"],ann_dict["basename"])
     img = cv2.imread(fn)
     if img is None:
         warnings.warn("The image %s does not exist in the filesystem."%fn)
     img_h, img_w, nch = img.shape
     
     #calculating features for each ground truth bounding box
     bbs = ann_dict["bbs"]
     for k,obj in enumerate(bbs):
         bb = obj["bb"]
         ann_dict["pos_bbs"], img_sizes_records = self.ground_truth_bounding_box_sampling(bb)
         for idx, sampled_bb in enumerate(ann_dict["pos_bbs"]):
             grad = get_features(img, bb = sampled_bb, w = self.gradient_edge, h = self.gradient_edge)
             feat = np.reshape(grad,(1,self.gradient_edge**2))
             flipped_grad = cv2.flip(grad,1) #1 to flip horizontally
             flipped_feat = np.reshape(flipped_grad,(1,self.gradient_edge**2))
             #shared memory among different threads, synchonization needed for avoiding race conditions                
             with self.features_labels_lock:
                 self.features.append(feat.astype(float))
                 self.features.append(flipped_feat.astype(float))
                 self.labels.append(POSITIVE_BB)
                 self.labels.append(POSITIVE_BB)
             size_idx = img_sizes_records[idx]
             self.pos_sizes_records.append(size_idx)
     
     #generating negative samples by randomly extracting bounding boxes, that do not overlap enough with 
     #the ground truths ones.       
     neg_counter = 0
     counter = 0
     ann_dict["neg_bbs"] = []
     max_iter = 100
     while neg_counter<self.num_negatives_bbs_for_image:
         counter = counter + 1
         if counter>max_iter:
             break
         x1 = random.randint(1,img_w)
         y1 = random.randint(1,img_h)
         x2 = random.randint(1,img_w)
         y2 = random.randint(1,img_h)
         neg_bb_candidate = (min(x1,x2),min(y1,y2),max(x1,x2),max(y1,y2))
         if bounding_box_overlap_on_ground_truths(ground_truth_bbs = bbs, bb = neg_bb_candidate)<0.5:
             ann_dict["neg_bbs"].append(neg_bb_candidate)
             grad = get_features(img, bb = neg_bb_candidate, w = self.gradient_edge, h = self.gradient_edge)
             feat = np.reshape(grad,(1,self.gradient_edge**2))
             #shared memory among different threads, synchonization needed for avoiding race condition
             with self.features_labels_lock:
                 self.features.append(feat)
                 self.labels.append(NEGATIVE_BB)
                 neg_counter = neg_counter + 1
Beispiel #2
0
    def extract_features_per_image(self, ann_dict):
        """This function extract features of correct and wrong bounding boxes for an image.
        """
        fn = os.path.join(ann_dict["path"], ann_dict["basename"])
        img = cv2.imread(fn)
        if img is None:
            warnings.warn("The image %s does not exist in the filesystem." %
                          fn)
        img_h, img_w, nch = img.shape

        #calculating features for each ground truth bounding box
        bbs = ann_dict["bbs"]
        for k, obj in enumerate(bbs):
            bb = obj["bb"]
            ann_dict[
                "pos_bbs"], img_sizes_records = self.ground_truth_bounding_box_sampling(
                    bb)
            for idx, sampled_bb in enumerate(ann_dict["pos_bbs"]):
                grad = get_features(img,
                                    bb=sampled_bb,
                                    w=self.gradient_edge,
                                    h=self.gradient_edge)
                feat = np.reshape(grad, (1, self.gradient_edge**2))
                flipped_grad = cv2.flip(grad, 1)  #1 to flip horizontally
                flipped_feat = np.reshape(flipped_grad,
                                          (1, self.gradient_edge**2))
                #shared memory among different threads, synchonization needed for avoiding race conditions
                with self.features_labels_lock:
                    self.features.append(feat.astype(float))
                    self.features.append(flipped_feat.astype(float))
                    self.labels.append(POSITIVE_BB)
                    self.labels.append(POSITIVE_BB)
                size_idx = img_sizes_records[idx]
                self.pos_sizes_records.append(size_idx)

        #generating negative samples by randomly extracting bounding boxes, that do not overlap enough with
        #the ground truths ones.
        neg_counter = 0
        counter = 0
        ann_dict["neg_bbs"] = []
        max_iter = 100
        while neg_counter < self.num_negatives_bbs_for_image:
            counter = counter + 1
            if counter > max_iter:
                break
            x1 = random.randint(1, img_w)
            y1 = random.randint(1, img_h)
            x2 = random.randint(1, img_w)
            y2 = random.randint(1, img_h)
            neg_bb_candidate = (min(x1, x2), min(y1, y2), max(x1,
                                                              x2), max(y1, y2))
            if bounding_box_overlap_on_ground_truths(
                    ground_truth_bbs=bbs, bb=neg_bb_candidate) < 0.5:
                ann_dict["neg_bbs"].append(neg_bb_candidate)
                grad = get_features(img,
                                    bb=neg_bb_candidate,
                                    w=self.gradient_edge,
                                    h=self.gradient_edge)
                feat = np.reshape(grad, (1, self.gradient_edge**2))
                #shared memory among different threads, synchonization needed for avoiding race condition
                with self.features_labels_lock:
                    self.features.append(feat)
                    self.labels.append(NEGATIVE_BB)
                    neg_counter = neg_counter + 1
Beispiel #3
0
    def second_stage_training(self,
                              wrapper_type=CUSTOM_LIBLINEAR_WRAPPER,
                              weights_fn=None):

        print "Starting second stage training."
        fstp = FirstStagePrediction(self.first_stage_weights,
                                    self.scale_space_sizes,
                                    edge=self.gradient_edge,
                                    base_log=self.base_log,
                                    min_edge_log=self.min_edge_log,
                                    edge_log_range=self.edge_log_range,
                                    num_win_psz=NUM_WIN_PSZ)

        SCORE = 0
        LABEL = 1
        sizes_dict = dict()
        for sz in self.scale_space_sizes:
            #each key of sizes_dict correspond to a value that is a score, label tuple
            sizes_dict["%s" % sz] = (list(), list())

        #first stage detection are performed on all the training set, then are marked as correct or
        #not, according to their overlapping with the ground-truth bounding boxes.
        #for each size, 1D features array and labels array are prepared.
        #second stage prediction will be 2nd_stage_score = w1_s * 1st_stage_score + w0_s
        #w1_s and w2_s are size dependent.
        print "Building second stage training training set..."
        for ann_key in self.annotations.keys():
            ann_dict = self.annotations[ann_key]
            fn = os.path.join(ann_dict["path"], ann_dict["basename"])
            img = cv2.imread(fn)
            if img is None:
                warnings.warn(
                    "The image %s does not exist in the filesystem." % fn)
            img_h, img_w, nch = img.shape
            ground_truth_bbs_objs = ann_dict["bbs"]
            predictions = fstp.predict(img, nss=2)
            for pred_bbs, score, size in predictions:
                y = 1 if bounding_box_overlap_on_ground_truths(
                    ground_truth_bbs_objs, pred_bbs) > 0.5 else -1
                sizes_dict["%s" % size][SCORE].append(score)
                sizes_dict["%s" % size][LABEL].append(y)

        #in this loop, the training is performed for each size, w1_s and w0_s are determined.
        print "Learning size-specific coefficients."
        to_delete = []
        for size_key in sorted(sizes_dict.keys(), key=lambda x: int(x)):
            item = sizes_dict[size_key]
            sizes_dict[size_key] = dict()
            #features array is 1-D, the only feature is the first-stage bounding-box detection score
            train = np.reshape(np.array(item[SCORE]), (-1, 1))
            labels = np.array(item[LABEL])
            #really weird thing, liblinear takes as positive label the first labels it encounters
            Xp, Xn = self.split_positive_and_negative_features(train, labels)
            train, labels = self.pack_positive_negative_features(Xp, Xn)
            num_pos = np.sum((labels == 1).astype(int))
            print "Size %s: number training samples %s, number positive samples %s." % (
                size_key, train.shape[0], num_pos)
            if train.shape[0] == 0:
                to_delete.append(size_key)
                print "Training set for size %s is missing. Skip this size." % size_key
                continue
            weight, bias = fit_svm(train,
                                   labels,
                                   C=100,
                                   wrapper_type=wrapper_type)
            #weight is 1 element array!
            sizes_dict[size_key]["weight"] = float(weight[0])
            sizes_dict[size_key]["bias"] = float(bias)

        #remove the item that do not have enough candidates for a training.
        for key in to_delete:
            del sizes_dict[key]

        if not weights_fn is None:
            basedir = os.path.dirname(weights_fn)
            if not os.path.exists(basedir):
                os.makedirs(basedir)
            if weights_fn[-5:] != ".json":
                warnings.warn(
                    "The filename for saving second stage weights does not have a json extension!"
                )
            print "Saving second stage learning coefficients to file."
            sizes_json = json.dumps(sizes_dict,
                                    sort_keys=True,
                                    indent=4,
                                    separators=(',', ': '))
            f = open(weights_fn, "w")
            f.write(sizes_json)
            f.close()

        print "Storing second stage learning coefficients in numpy array."
        num_sizes = len(sizes_dict.keys())
        coeffs = np.zeros((num_sizes, 2))
        for i, size_key in enumerate(sizes_dict.keys()):
            sz_dict = sizes_dict[size_key]
            coeffs[i, 0] = sz_dict["weight"]
            coeffs[i, 1] = sz_dict["bias"]

        self.coeffs = coeffs

        return sizes_dict, coeffs
 def second_stage_training(self, wrapper_type = CUSTOM_LIBLINEAR_WRAPPER, weights_fn = None):
     
     print "Starting second stage training."
     fstp = FirstStagePrediction(self.first_stage_weights, self.scale_space_sizes, edge = self.gradient_edge, base_log = self.base_log, min_edge_log = self.min_edge_log, edge_log_range = self.edge_log_range, num_win_psz = NUM_WIN_PSZ)
     
     SCORE = 0
     LABEL = 1
     sizes_dict = dict()
     for sz in self.scale_space_sizes:
         #each key of sizes_dict correspond to a value that is a score, label tuple
         sizes_dict["%s"%sz] = (list(),list())
     
     #first stage detection are performed on all the training set, then are marked as correct or
     #not, according to their overlapping with the ground-truth bounding boxes.
     #for each size, 1D features array and labels array are prepared.
     #second stage prediction will be 2nd_stage_score = w1_s * 1st_stage_score + w0_s
     #w1_s and w2_s are size dependent. 
     print "Building second stage training training set..."
     for ann_key in self.annotations.keys():
         ann_dict = self.annotations[ann_key]
         fn = os.path.join(ann_dict["path"],ann_dict["basename"])
         img = cv2.imread(fn)
         if img is None:
             warnings.warn("The image %s does not exist in the filesystem."%fn)
         img_h, img_w, nch = img.shape
         ground_truth_bbs_objs = ann_dict["bbs"]
         predictions = fstp.predict(img, nss = 2)
         for pred_bbs, score, size in predictions:
             y = 1 if bounding_box_overlap_on_ground_truths(ground_truth_bbs_objs, pred_bbs) > 0.5 else -1
             sizes_dict["%s"%size][SCORE].append(score)
             sizes_dict["%s"%size][LABEL].append(y)
     
     #in this loop, the training is performed for each size, w1_s and w0_s are determined.
     print "Learning size-specific coefficients."
     to_delete = []      
     for size_key in sorted(sizes_dict.keys(), key=lambda x:int(x)):
         item = sizes_dict[size_key]
         sizes_dict[size_key] = dict()
         #features array is 1-D, the only feature is the first-stage bounding-box detection score
         train = np.reshape(np.array(item[SCORE]),(-1,1))
         labels = np.array(item[LABEL])
         #really weird thing, liblinear takes as positive label the first labels it encounters
         Xp, Xn = self.split_positive_and_negative_features(train, labels)
         train, labels = self.pack_positive_negative_features(Xp, Xn)
         num_pos = np.sum((labels==1).astype(int))
         print "Size %s: number training samples %s, number positive samples %s."%(size_key, train.shape[0], num_pos)
         if train.shape[0]==0:
             to_delete.append(size_key)
             print "Training set for size %s is missing. Skip this size." % size_key
             continue
         weight, bias = fit_svm(train, labels, C = 100, wrapper_type = wrapper_type)
         #weight is 1 element array!
         sizes_dict[size_key]["weight"] = float(weight[0])
         sizes_dict[size_key]["bias"] = float(bias)
     
     #remove the item that do not have enough candidates for a training.
     for key in to_delete:
         del sizes_dict[key]
 
     if not weights_fn is None:
         basedir = os.path.dirname(weights_fn)
         if not os.path.exists(basedir):
             os.makedirs(basedir)
         if weights_fn[-5:]!=".json":
             warnings.warn("The filename for saving second stage weights does not have a json extension!")
         print "Saving second stage learning coefficients to file."
         sizes_json = json.dumps(sizes_dict, sort_keys=True, indent=4, separators=(',', ': '))
         f = open(weights_fn,"w")
         f.write(sizes_json)
         f.close()
 
     print "Storing second stage learning coefficients in numpy array."
     num_sizes = len(sizes_dict.keys())
     coeffs = np.zeros((num_sizes,2))
     for i, size_key in enumerate(sizes_dict.keys()):
         sz_dict = sizes_dict[size_key]
         coeffs[i,0] = sz_dict["weight"]
         coeffs[i,1] = sz_dict["bias"]
        
     self.coeffs = coeffs
     
     return sizes_dict, coeffs