Beispiel #1
0
def count_acc(is_crf=True):
    average_acc = 0
    whole_acc = 0
    #box num of per image
    box_num_img = 0
    #box num of whole  test images
    box_num_imgs = 0
    #right predictions num per image
    right_num_img = 0
    #right predictions num of whole test images
    right_num_imgs = 0
    #num of test images
    num_imgs = 0
    if is_crf:
        pred_dir = crf_pred_dir
    else:
        pred_dir = test_pred_dir
    with open(test_dir, 'r') as f_test_txt:
        for img_name in f_test_txt:
            img_name = img_name.rstrip()
            num_imgs += 1
            img = Image.open(img_dir + img_name + '.jpg')
            img_w = img.size[0]
            img_h = img.size[1]
            pred_mask = Image.open(pred_dir + img_name + '.png')
            pred_mask = np.array(pred_mask)

            annpath = (anns_dir + img_name).rstrip() + '.xml'
            DOMTree = parse(annpath)
            collection = DOMTree.documentElement
            objects = collection.getElementsByTagName('object')
            if len(objects) > 0:
                for object_ in objects:
                    bndbox = object_.getElementsByTagName('bndbox')[0]
                    xmin = bndbox.getElementsByTagName(
                        'xmin')[0].childNodes[0].nodeValue
                    ymin = bndbox.getElementsByTagName(
                        'ymin')[0].childNodes[0].nodeValue
                    xmax = bndbox.getElementsByTagName(
                        'xmax')[0].childNodes[0].nodeValue
                    ymax = bndbox.getElementsByTagName(
                        'ymax')[0].childNodes[0].nodeValue
                    xmin, ymin, xmax, ymax = get_int_coor(
                        xmin, ymin, xmax, ymax, img_w, img_h)
                    xmin = int(xmin * 1632 / img_w)
                    xmax = int(xmax * 1632 / img_w)
                    ymin = int(ymin * 1216 / img_h)
                    ymax = int(ymax * 1216 / img_h)
                    box_num_img += 1
                    box_num_imgs += 1
                    if np.sum(pred_mask[ymin:ymax, xmin:xmax]) != 0:
                        right_num_img += 1
            average_acc += (right_num_img / box_num_img)
            right_num_imgs += right_num_img
            right_num_img = 0
            box_num_img = 0

            print('{} / {}'.format(num_imgs, img_name), end='\r')

        average_acc = average_acc / num_imgs
        whole_acc = right_num_imgs / box_num_imgs

    return average_acc, whole_acc
Beispiel #2
0
def update_label(predict_model, device):
       
       """load train_pairs.txt info for check the missed diagnosis objects"""
       #ann_info:[image name, image name_num_ class_id.png, bbox_ymin,
       #                    bbox_xmin,bbox_ymax, bbox_xmax, class_name]
       print('start to update...')
       ANNS = {}
       with open(dataset_pairs_dir, 'r') as da_p_txt:
                 for ann_info in da_p_txt:
                        # split the string line, get the list
                        ann_info = ann_info.rstrip().split('###')
                        if ann_info[0].rstrip()  not in ANNS:
                            ANNS[ann_info[0].rstrip()] = []
                        ANNS[ann_info[0].rstrip()].append(ann_info)


    #    print('loading model...')
    #    predict_model = torch.load(model_path)
    #    device = torch.device('cuda: 3' if torch.cuda.is_available() else 'cpu')
    #    predict_model = predict_model.to(device)
       predict_model.eval()

       

       # define the same image transformations
       transformations = transforms.Compose([
                                             transforms.ToTensor(),
                                             transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                             std=[0.229, 0.224, 0.225])
                                             ])

       update_num = 0
       print('updating progress:')
       with open(dataset_txt_dir, 'r') as da_txt:
                 # don't use the code line below
                 # or it will close the file and the whole programm end here (I guess)
                 # I debug here for two hours......
                 #lines = len(da_txt.readlines())
                 for update_name in da_txt:
                         update_num += 1
                         # in RGB [W, H, depth]
                         img = Image.open(os.path.join(img_dir, update_name).rstrip()+'.jpg')
                         img_w = img.size[0]
                         img_h = img.size[1]
                         #img = img.resize((1632, 1216), Image.LANCZOS)
                         # in drone_data,size-1024
                         img = img.resize((1024, 1024), Image.LANCZOS)
                         input_ = transformations(img).float()
                         # add batch_size dimension
                         #[3, H, W]-->[1, 3, H, W]
                         input_ = input_.unsqueeze_(0)
                         input_ = input_.to(device)
                         #pred = predict_model(input_).view([1216, 1632]).data.cpu()
                         # in drone data, size-1024
                         pred = predict_model(input_).view([1024, 1024]).data.cpu()
                         #pred.shape[H,W]
                         pred = np.array(pred)
                         """crf smooth prediction"""
                         crf_pred = run_densecrf(img_dir, update_name,  pred)

                         """start to update"""
                         last_label = Image.open(os.path.join(label_dir, update_name).rstrip()+'.png')
                         #last_label = last_label.resize((1632, 1216), Image.NEAREST)
                         # in drone_data size-1024
                         last_label = last_label.resize((1024, 1024), Image.NEAREST)
                         last_label = np.array(last_label)

                         # predicted label without false-positive segments
                         updated_label = crf_pred + last_label
                         updated_label = np.where((updated_label==2), 1, 0).astype('uint8')
                         # predicted label with missed diagnosis 
                         # we just use the box segments as missed diagnosis for now
                         info4check = ANNS[update_name.rstrip()]
                        #  masks_missed = np.zeros((1216, 1632), np.uint8)
                        # for drone data size-1024
                         masks_missed = np.zeros((1024, 1024), np.uint8)
                         for box4check in info4check:
                                xmin = box4check[3]
                                ymin = box4check[2]
                                xmax = box4check[5]
                                ymax = box4check[4]
                                xmin, ymin, xmax, ymax = get_int_coor(xmin, ymin, 
                                                                      xmax, ymax, img_w, img_h)
                                # xmin = int(xmin * 1632 / img_w)
                                # xmax = int(xmax * 1632 / img_w)
                                # ymin = int(ymin * 1216 / img_h)
                                # ymax = int(ymax * 1216 / img_h)
                                # for drone data - size 1024
                                xmin = int(xmin * 1024 / img_w)
                                xmax = int(xmax * 1024 / img_w)
                                ymin = int(ymin * 1024 / img_h)
                                ymax = int(ymax * 1024 / img_h)
                                if np.sum(updated_label[ymin:ymax, xmin:xmax]) == 0:
                                    masks_missed[ymin:ymax, xmin:xmax] = 1

                         updated_label = updated_label + masks_missed
                         scipy.misc.toimage(updated_label, cmin=0, cmax=255, pal=colors_map, 
                                                            mode='P').save(os.path.join(label_dir, 
                                                                           update_name).rstrip()+ '.png')
                         print('{} / {}'.format(update_num, len(ANNS)), end='\r')
Beispiel #3
0
def grabcut(img_name):
        masks = [] 
        # one image has many object that need to grabcut
        for i, ann_info in enumerate(ANNS[img_name], start=1):
               img = cv.imread((img_dir +img_name).rstrip()+'.jpg')
               grab_name = ann_info[1]
               xmin = ann_info[3]
               ymin = ann_info[2]
               xmax = ann_info[5]
               ymax = ann_info[4]
               """get int box coor"""
               img_w = img.shape[1]
               img_h = img.shape[0]
               xmin, ymin, xmax, ymax = get_int_coor(xmin, ymin, xmax, ymax, img_w, img_h)           
               box_w = xmax - xmin
               box_h = ymax - ymin
               # cv.grabcut's para
               mask = np.zeros(img.shape[:2], np.uint8)
               # rect is the tuple
               rect = (xmin, ymin, box_w, box_h)
               bgdModel = np.zeros((1, 65), np.float64)
               fgdModel = np.zeros((1, 65), np.float64)
               #for small bbox:
               if box_w * box_h < MINI_AREA:
                   img_mask = mask[ymin:ymax, xmin:xmax] = 1
                # for big box that area == img.area(one object bbox is just the whole image)
               elif box_w * box_h == img.shape[1] * img.shape[0]:
                      rect = [RECT_SHRINK, RECT_SHRINK, box_w - RECT_SHRINK * 2, box_h - RECT_SHRINK * 2]
                      cv.grabCut(img, mask, rect, bgdModel,fgdModel, ITER_NUM, cv.GC_INIT_WITH_RECT)
                      # astype('uint8') keep the image pixel in range[0,255]
                      img_mask =  np.where((mask == 0) | (mask == 2), 0, 1).astype('uint8')
                # for normal bbox:
               else:
                       cv.grabCut(img, mask, rect, bgdModel,fgdModel, ITER_NUM, cv.GC_INIT_WITH_RECT)
                       img_mask = np.where((mask == 0) | (mask == 2), 0, 1).astype('uint8')
                       # if the grabcut output is just background(it happens in my dataset)
                       if np.sum(img_mask) == 0:
                           img_mask = np.where((mask == 0), 0, 1).astype('uint8')
                        # couting IOU
                        # if the grabcut output too small region, it need reset to bbox mask
                       box_mask = np.zeros((img.shape[0], img.shape[1]))
                       box_mask[ymin:ymax, xmin:xmax] = 1
                       sum_area = box_mask + img_mask
                       intersection = np.where((sum_area==2), 1, 0).astype('uint8')
                       union = np.where((sum_area==0), 0, 1).astype('uint8')
                       IOU = np.sum(intersection) / np.sum(union)
                       if IOU <= IOU_THRESHOLD:
                           img_mask = box_mask
                # for draw mask on the image later           
               img = cv.cvtColor(img, cv.COLOR_BGR2RGB) 
               masks.append([img_mask, grab_name, rect])
        
        num_object = i
        """for multi-objects intersection and fix the label """
        masks.sort(key=lambda mask: np.sum(mask[0]), reverse=True)
        for j in range(num_object):
              for k in range(j+1, num_object):
                      masks[j][0] = masks[j][0] - masks[k][0]
              masks[j][0] = np.where((masks[j][0]==1), 1, 0).astype('uint8')
              """get class name  id"""
              grab_name = masks[j][1]
              class_id = grab_name.split('_')[-1]
              class_id = int(class_id.split('.')[0])

              #set the numpy value to class_id
              masks[j][0] = np.where((masks[j][0]==1), class_id, 0).astype('uint8')
              # save grabcut_inst(one object in a image)
              scipy.misc.toimage(masks[j][0], cmin=0, cmax=255, pal=tbvoc_info.colors_map,
                                                      mode='P' ).save((grabcut_dir).rstrip()+masks[j][1])
        
        """merge masks"""
        # built array(img.shape size)
        mask_ = np.zeros(img.shape[:2])
        for mask in masks:
                mask_ = mask_ + mask[0]
        # save segmetation_label(every object in a image)
        scipy.misc.toimage(mask_, cmin=0, cmax=255, pal=tbvoc_info.colors_map,
                                                mode='P').save((segmentation_label_dir+img_name).rstrip()+'.png')
        
        """create figure with masks and bbox in a image"""
        fig = plt.figure()

        # covert to inch
        # dpi: dot per inch
        W = img.shape[1] / float(fig.get_dpi())
        H = img.shape[0] / float(fig.get_dpi())
        # set fig size
        fig.set_size_inches(W, H)

        for mask in masks:
                rect = mask[2]
                mask = mask[0]
                color = tbvoc_info.colors[np.amax(mask)]
                # add one dimension mask[H,W,1]
                mask = mask[:, :, np.newaxis]
                # draw mask in image(RGB)
                for c in range(3):
                       img[:, :, c] = np.where((mask[:, :, 0] != 0),
                                                                      img[:, :, c]*0.2+0.8*color[c], img[:, :, c])
                #compute bbox coordinates
                #reference: https://www.cnblogs.com/xiaopengli/p/8058408.html
                # use axes, so the coordinates is relative
                left = rect[0] / img.shape[1]
                bottom = 1-  (rect[1] + rect[3]) / img.shape[0]
                ax_w = (rect[0] + rect[2]) / img.shape[1] - left
                ax_h = 1 - rect[1] / img.shape[0] - bottom
                # draw bbox
                ax = fig.add_axes([left, bottom, ax_w, ax_h])
                ax.xaxis.set_visible(False)
                ax.yaxis.set_visible(False)
                ax.patch.set_fill(False)
                ax.patch.set_linewidth(5)
                ax.patch.set_color('b')
                # add a non-resampled image to the figure
                # add the axes to the figure
                plt.figimage(img)
        # save img_grabcuts(bbox+mask)
        fig.savefig((img_grabcuts_dir+img_name).rstrip()+'.png')
        """
        plt.cla() : clear local activate axes, others keep constant
        plt.clf() :  clear all axex in the figure, but don't shutdown the figure window
        plt.close(): close the figure window
        """
        plt.cla()
        plt.clf()
        plt.close()