def load_bbs_from_ann(data_path, n_images=-1): ''' load bounding boxes from annotation files ''' jpg_train = [ img_f for img_f in os.listdir(data_path) if img_f.find('.jpg') > 0 ] # if n_images is specified, then only look at the first n_images if n_images > 1: jpg_train = jpg_train[:min(len(jpg_train), n_images)] bbs_gt_dict = {} for ind_img, img_file in enumerate(jpg_train): logger.debug("processing %s" % img_file) ann_file = img_file.split('.')[0] ann_path = ann_path = os.path.join(data_path, ann_file) annotation = get_annotation(ann_path) # get all bbs for this image bbs_gt_raw = get_bbs(annotation) bbs_gt = np.empty((len(bbs_gt_raw), 4)) for kk, (xy, width, height) in enumerate(bbs_gt_raw): x, y = xy bbs_gt[kk, :] = [x, y, x + width, y + height] bbs_gt_dict[img_file] = bbs_gt return bbs_gt_dict
for i, j in enumerate(jpg_train + jpg_test): try: im = scipy.misc.imread(os.path.join(train_path, j)) except IOError: print "There was a problem reading the jpg: %s." % j continue # the rollaxis command rolls the last (-1) axis back until the start # do a colourspace conversion im_y, im_i, im_q = colorsys.rgb_to_yiq(*np.rollaxis(im[..., :3], axis=-1)) ann_file = j.split('.')[0] ann_path = ann_path = os.path.join(train_path, ann_file) annotation = get_annotation(ann_path) # get all bbs for this image bbs = get_bbs(annotation) for xy, width, height in bbs: x, y = xy # remember y is indexed first in image moth = im_y[y:(y + height), x:(x + width)] moth_list.append(moth) n_moths = len(moth_list) widths = np.array([m.shape[1] for m in moth_list]) max_w = widths.max() heights = np.array([m.shape[0] for m in moth_list]) max_h = heights.max() moth_array = np.zeros((n_moths, max_h, max_w)) for i, m in enumerate(moth_list):
pos_dir = '/mnt/data/datasets/bugs_annotated_2014/Good Images/Positive_Counts_All' neg_dir = '/mnt/data/datasets/bugs_annotated_2014/Good Images/No_Counts_All' # pos_name = '1471_39381.jpg' # pos_name = '1088_26202.jpg' # pos_name = '1227_35266.jpg' pos_name = '1227_39869.jpg' neg_name = '501_30706.jpg' pos_path = os.path.join(pos_dir, pos_name) neg_path = os.path.join(neg_dir, neg_name) img_pos = imread(pos_path) img_neg = imread(neg_path) bbs_pos = get_bbs(get_annotation(pos_path.strip('.jpg'))) plt.ioff() fig = plt.figure() ax = plt.subplot(1, 1, 1) annotate_bbs(ax, map(xywh_to_x1y1x2y2, bbs_pos)) plt.imshow(img_pos) plt.axis('off') plt.tight_layout() plt.savefig(os.path.join(fig_dir, "example_pos.png")) # TODO: make the boundaries tighter # plot_margin = 0.25 # x0, x1, y0, y1 = plt.axis() # plt.axis((x0 - plot_margin,
DEBUG = False dir_root = '/mnt/data/datasets/bugs_annotated_2014/Good Images' dir_all = '/mnt/data/datasets/bugs_annotated_2014/Good Images/All' dir_no_cnt_all = '/mnt/data/datasets/bugs_annotated_2014/Good Images/No_Counts_All' dir_pos_cnt_all = '/mnt/data/datasets/bugs_annotated_2014/Good Images/Positive_Counts_All' if not os.path.isdir(dir_no_cnt_all): os.makedirs(dir_no_cnt_all) if not os.path.isdir(dir_pos_cnt_all): os.makedirs(dir_pos_cnt_all) # load the label file and determine no/pos counts and seperate to different # folders. for filename in os.listdir(dir_all): if not filename.endswith(".jpg"): bbs = get_bbs(get_annotation(os.path.join(dir_all, filename))) if DEBUG: print bbs if len(bbs): shutil.copyfile(os.path.join(dir_all, filename), os.path.join(dir_pos_cnt_all, filename)) shutil.copyfile(os.path.join(dir_all, filename + ".jpg"), os.path.join(dir_pos_cnt_all, filename + ".jpg")) else: shutil.copyfile(os.path.join(dir_all, filename), os.path.join(dir_no_cnt_all, filename)) shutil.copyfile(os.path.join(dir_all, filename + ".jpg"), os.path.join(dir_no_cnt_all, filename + ".jpg"))
def get_pos( data_path, target_height, target_width, flag_rescale=False, flag_multiscale=False, flag_rgb=True, detect_width_list=[8, 16, 32, 64], detect_height_list=[8, 16, 32, 64], flag_trans_aug=False, dist_trans_list=(-2, 0, 2), ): """ Get positive training examples examples are rescaled to target_height and target_width With the assumption that the annotation file have the same name with the image but with no extension flag_trans_aug: if do translation augmentation """ jpg_train = [f for f in os.listdir(data_path) if f.find('.jpg') > 0] # moths = [] moth_resized_list = [] for i, j in enumerate(jpg_train): try: im = scipy.misc.imread(os.path.join(data_path, j)) except IOError: logger.warn("There was a problem reading the jpg: %s." % j) continue im = grey_world(im) if not flag_rgb: # im will be assigned to the new gray image # the rollaxis command rolls the last (-1) axis back until the start # do a colourspace conversion im, im_i, im_q = colorsys.rgb_to_yiq( *np.rollaxis(im[..., :3], axis=-1)) ann_file = j.split('.')[0] ann_path = os.path.join(data_path, ann_file) annotation = get_annotation(ann_path) # get all bbs for this image bbs = get_bbs(annotation) if flag_trans_aug: bbs = augment_bbs_by_trans(bbs, dist_trans_list) for xy, width, height in bbs: x, y = xy # determine if the xy, width, height are postive and within range values_with_in_range = width > 0 and height > 0 \ and y >= 0 and y + height < im.shape[0] \ and x >= 0 and x + width < im.shape[1] if not values_with_in_range: print "Bad boundingbox, ignored" print xy, width, height continue # remember y is indexed first in image # moth = im[y:(y + height), x:(x + width)] # moths.append(moth) # print moth.shape if flag_multiscale: moth_resized = crop_and_rescale_nearest( im, xy, width, height, target_width, target_height, detect_width_list=detect_width_list, detect_height_list=detect_height_list) elif flag_rescale: moth_resized = crop_and_rescale(im, xy, width, height, target_width, target_height) else: moth_resized = crop_centered_box(im, xy, width, height, target_width, target_height) moth_resized_list.append(moth_resized) return moth_resized_list
for i, j in enumerate(jpg_train): try: im = plt.imread(os.path.join(train_path, j), 'r') except IOError: print "There was a problem reading the jpg: %s." % j continue # the rollaxis command rolls the last (-1) axis back until the start # do a colourspace conversion im_y, im_i, im_q = colorsys.rgb_to_yiq(*np.rollaxis(im[...,:3], axis=-1)) ann_file = j.split('.')[0] ann_path = ann_path = os.path.join(train_path, ann_file) annotation = get_annotation(ann_path) # get all bbs for this image bbs = get_bbs(annotation) for xy, width, height in bbs: x, y = xy # remember y is indexed first in image moth = im_y[y:(y + height), x:(x + width)] moths.append(moth) #print moth.shape moth_resized = crop_and_rescale(im_y, xy, width, height, target_width=32, target_height=32) moths_resized.append(moth_resized) n_moths = len(moths_resized) m = np.asarray(moths_resized).reshape((n_moths, target_height * target_width))
for item in list_rm: os.remove(os.path.join(dir_all, item)) os.remove(os.path.join(dir_all, item + ext_img)) os.remove(os.path.join(dir_all, item + ext_seg)) # step 3, make statistics by looking at the annotation files list_all = [ filename.strip(ext_img) for filename in os.listdir(dir_all) if ext_img in filename ] list_len = [] for item in list_all: list_len.append( len(get_bbs(get_annotation(os.path.join(dir_all, item))))) # report statistics num_withmoth = np.sum(np.array(list_len) > 0) num_nomoth = np.sum(np.array(list_len) == 0) print '' print 'Total:' print_nums(list_len) # distribute # step sort, and then have probability of going to train and test # list_all_sorted, sorted_tuples = sorted(zip(list_len, list_all), reverse=True) list_ann_sorted = [img for (num, img) in sorted_tuples]