Beispiel #1
0
def build_referit_batches(setname, T, input_H, input_W):
    # data directory
    im_dir = '/data/ryli/text_objseg/exp-referit/referit-dataset/images/'
    mask_dir = '/data/ryli/text_objseg/exp-referit/referit-dataset/mask/'
    query_file = './data/referit/referit_query_' + setname + '.json'
    vocab_file = './data/vocabulary_referit.txt'

    # saving directory
    data_folder = './referit/' + setname + '_batch/'
    data_prefix = 'referit_' + setname
    if not os.path.isdir(data_folder):
        os.makedirs(data_folder)
    fp = open('./referit/trainval_list.txt', 'w')

    # load annotations
    query_dict = json.load(open(query_file))
    im_list = query_dict.keys()
    vocab_dict = text_processing.load_vocab_dict_from_file(vocab_file)

    # collect training samples
    samples = []
    for n_im, name in enumerate(im_list):
        im_name = name.split('_', 1)[0] + '.jpg'
        mask_name = name + '.mat'
        for sent in query_dict[name]:
            samples.append((im_name, mask_name, sent))

    # save batches to disk
    num_batch = len(samples)
    for n_batch in range(num_batch):
        print('saving batch %d / %d' % (n_batch + 1, num_batch))
        im_name, mask_name, sent = samples[n_batch]
        fp.write('%d\t%s%s\n' % (n_batch, im_dir, im_name))
        im = skimage.io.imread(im_dir + im_name)
        mask = load_gt_mask(mask_dir + mask_name).astype(np.float32)

        if 'train' in setname:
            im = skimage.img_as_ubyte(
                im_processing.resize_and_pad(im, input_H, input_W))
            mask = im_processing.resize_and_pad(mask, input_H, input_W)
        if im.ndim == 2:
            im = np.tile(im[:, :, np.newaxis], (1, 1, 3))

        text = text_processing.preprocess_sentence(sent, vocab_dict, T)

        np.savez(file=data_folder + data_prefix + '_' + str(n_batch) + '.npz',
                 text_batch=text,
                 im_batch=im,
                 mask_batch=(mask > 0),
                 sent_batch=[sent])
    fp.close()
Beispiel #2
0
imcrop_val = np.zeros((N, input_H, input_W, 3), dtype=np.float32)

num_im = len(imlist)
for n_im in range(num_im):
    print('testing image %d / %d' % (n_im, num_im))
    imname = imlist[n_im]

    # Extract visual features from all proposals
    im = skimage.io.imread(image_dir + imname)
    processed_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, input_H, input_W))
    if processed_im.ndim == 2:
        processed_im = np.tile(processed_im[:, :, np.newaxis], (1, 1, 3))

    imcrop_val[...] = processed_im.astype(np.float32) - segmodel.vgg_net.channel_mean
    for imcrop_name, _, description in flat_query_dict[imname]:
        mask = load_gt_mask(mask_dir + imcrop_name + '.mat').astype(np.float32)
        labels = (mask > 0)
        processed_labels = im_processing.resize_and_pad(mask, input_H, input_W) > 0

        text_seq_val[:, 0] = text_processing.preprocess_sentence(description, vocab_dict, T)
        scores_val = sess.run(scores, feed_dict={
                text_seq_batch  : text_seq_val,
                imcrop_batch    : imcrop_val
            })
        scores_val = np.squeeze(scores_val)

        # Evaluate the segmentation performance of using bounding box segmentation
        pred_raw = (scores_val >= score_thresh).astype(np.float32)
        predicts = im_processing.resize_and_crop(pred_raw, im.shape[0], im.shape[1])
        I, U = eval_tools.compute_mask_IU(predicts, labels)
        cum_I += I
################################################################################
# Collect training samples
################################################################################

training_samples = []
num_imcrop = len(imcrop_list)
for n_imcrop in range(num_imcrop):
    if n_imcrop % 200 == 0: print('processing %d / %d' % (n_imcrop+1, num_imcrop))
    imcrop_name = imcrop_list[n_imcrop]

    # Image and mask
    imname = imcrop_name.split('_', 1)[0] + '.jpg'
    mask_name = imcrop_name + '.mat'
    im = skimage.io.imread(image_dir + imname)
    mask = load_gt_mask(mask_dir + mask_name).astype(np.float32)

    processed_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, input_H, input_W))
    if processed_im.ndim == 2:
        processed_im = processed_im[:, :, np.newaxis]
    processed_mask = im_processing.resize_and_pad(mask, input_H, input_W)
    subsampled_mask = skimage.transform.downscale_local_mean(processed_mask, (32, 32))

    labels_fine = (processed_mask > 0)
    labels_coarse = (subsampled_mask > 0)

    for description in query_dict[imcrop_name]:
        text_seq = text_processing.preprocess_sentence(description, vocab_dict, T)
        training_samples.append((processed_im, text_seq, labels_coarse, labels_fine))

# Shuffle the training instances
Beispiel #4
0
def build_referit_batches(setname, T, input_H, input_W):
    # data directory
    im_dir = './data/referit/images/'
    mask_dir = './data/referit/mask/'
    query_file = './data/referit_query_' + setname + '.json'
    vocab_file = './data/vocabulary_spacy_referit.txt'

    # saving directory
    data_folder = './referit/' + setname + '_batch/'
    data_prefix = 'referit_' + setname
    if not os.path.isdir(data_folder):
        os.makedirs(data_folder)

    # load annotations
    query_dict = json.load(open(query_file))
    im_list = query_dict.keys()
    vocab_dict = text_processing.load_vocab_dict_from_file(vocab_file)

    # collect training samples
    samples = []
    for n_im, name in enumerate(im_list):
        im_name = name.split('_', 1)[0] + '.jpg'
        mask_name = name + '.mat'
        for sent in query_dict[name]:
            samples.append((im_name, mask_name, sent))

    # save batches to disk
    # spacy load
    nlp = spacy.load("en_core_web_sm")
    SENTENCE_SPLIT_REGEX = re.compile(r'(\W+)')
    num_batch = len(samples)
    valid = 0
    for n_batch in range(num_batch):
        print('saving batch %d / %d' % (n_batch + 1, num_batch))
        im_name, mask_name, sent = samples[n_batch]
        im = skimage.io.imread(im_dir + im_name)
        mask = load_gt_mask(mask_dir + mask_name).astype(np.float32)

        if 'train' in setname:
            im = skimage.img_as_ubyte(
                im_processing.resize_and_pad(im, input_H, input_W))
            mask = im_processing.resize_and_pad(mask, input_H, input_W)
        if im.ndim == 2:
            im = np.tile(im[:, :, np.newaxis], (1, 1, 3))

        sent = sent.lower()
        words = SENTENCE_SPLIT_REGEX.split(sent.strip())
        words = [w for w in words if len(w.strip()) > 0]
        # remove .
        if words[-1] == '.':
            words = words[:-1]
        if len(words) > 20:
            words = words[:20]
        n_sent = ""
        for w in words:
            n_sent = n_sent + w + ' '
        n_sent = n_sent.strip()
        try:
            n_sent = n_sent.decode("utf-8")
        except UnicodeEncodeError:
            continue
        doc = nlp(n_sent)
        if (len(doc) > 30):
            continue

        text, graph, height = text_processing.preprocess_spacy_sentence(
            doc, vocab_dict, T)

        np.savez(file=data_folder + data_prefix + '_' + str(valid) + '.npz',
                 text_batch=text,
                 im_batch=im,
                 mask_batch=(mask > 0),
                 sent_batch=[n_sent],
                 graph_batch=graph,
                 height_batch=np.array([height], dtype=np.int32))
        valid += 1
imcrop_val = np.zeros((N, input_H, input_W, 3), dtype=np.float32)

num_im = len(imlist)
for n_im in range(num_im):
    print('testing image %d / %d' % (n_im, num_im))
    imname = imlist[n_im]

    # Extract visual features from all proposals
    im = skimage.io.imread(image_dir + imname)
    processed_im = skimage.img_as_ubyte(im_processing.resize_and_pad(im, input_H, input_W))
    if processed_im.ndim == 2:
        processed_im = np.tile(processed_im[:, :, np.newaxis], (1, 1, 3))

    imcrop_val[...] = processed_im.astype(np.float32) - segmodel.vgg_net.channel_mean
    for imcrop_name, _, description in flat_query_dict[imname]:
        mask = load_gt_mask(mask_dir + imcrop_name[:-4] + '.mat').astype(np.float32)
        labels = (mask > 0)
        processed_labels = im_processing.resize_and_pad(mask, input_H, input_W) > 0

        text_seq_val[:, 0] = text_processing.preprocess_sentence(description, vocab_dict, T)
        scores_val = sess.run(scores, feed_dict={
                text_seq_batch  : text_seq_val,
                imcrop_batch    : imcrop_val
            })
        scores_val = np.squeeze(scores_val)

        # Evaluate the segmentation performance of using bounding box segmentation
        pred_raw = (scores_val >= score_thresh).astype(np.float32)
        predicts = im_processing.resize_and_crop(pred_raw, im.shape[0], im.shape[1])
        I, U = eval_tools.compute_mask_IU(predicts, labels)
        cum_I += I
Beispiel #6
0
text_seq_batch = np.zeros((T, N), dtype=np.int32)
imcrop_batch = np.zeros((N, input_H, input_W, 3), dtype=np.uint8)
label_coarse_batch = np.zeros((N, featmap_H, featmap_W, 1), dtype=np.bool)
label_fine_batch = np.zeros((N, input_H, input_W, 1), dtype=np.bool)

if not os.path.isdir(data_folder):
    os.mkdir(data_folder)
for n_batch in range(num_batch):
    print('saving batch %d / %d' % (n_batch + 1, num_batch))
    batch_begin = n_batch * N
    batch_end = (n_batch + 1) * N
    for n_sample in range(batch_begin, batch_end):
        imname, mask_name, description = shuffled_training_samples[n_sample]
        im = skimage.io.imread(image_dir + imname)
        mask = load_gt_mask(mask_dir + mask_name).astype(np.float32)

        processed_im = skimage.img_as_ubyte(
            im_processing.resize_and_pad(im, input_H, input_W))
        if processed_im.ndim == 2:
            processed_im = processed_im[:, :, np.newaxis]
        processed_mask = im_processing.resize_and_pad(mask, input_H, input_W)
        subsampled_mask = skimage.transform.downscale_local_mean(
            processed_mask, (8, 8))

        labels_fine = (processed_mask > 0)
        labels_coarse = (subsampled_mask > 0)

        text_seq = text_processing.preprocess_sentence(description, vocab_dict,
                                                       T)
def inference():
    with open('./seg_model/test.prototxt', 'w') as f:
        f.write(str(seg_model.generate_model('val', test_config.N)))

    caffe.set_device(test_config.gpu_id)
    caffe.set_mode_gpu()

    # Load pretrained model
    net = caffe.Net('./seg_model/test.prototxt',
                    test_config.pretrained_model,
                    caffe.TEST)

    ################################################################################
    # Load annotations and bounding box proposals
    ################################################################################

    query_dict = json.load(open(test_config.query_file))
    bbox_dict = json.load(open(test_config.bbox_file))
    imcrop_dict = json.load(open(test_config.imcrop_file))
    imsize_dict = json.load(open(test_config.imsize_file))
    imlist = list({name.split('_', 1)[0] + '.jpg' for name in query_dict})
    vocab_dict = text_processing.load_vocab_dict_from_file(test_config.vocab_file)

    ################################################################################
    # Flatten the annotations
    ################################################################################

    flat_query_dict = {imname: [] for imname in imlist}
    for imname in imlist:
        this_imcrop_names = imcrop_dict[imname]
        for imcrop_name in this_imcrop_names:
            gt_bbox = bbox_dict[imcrop_name]
            if imcrop_name not in query_dict:
                continue
            this_descriptions = query_dict[imcrop_name]
            for description in this_descriptions:
                flat_query_dict[imname].append((imcrop_name, gt_bbox, description))

    ################################################################################
    # Testing
    ################################################################################

    cum_I, cum_U = 0.0, 0.0
    eval_seg_iou_list = [0.5, 0.6, 0.7, 0.8, 0.9]
    seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32)
    seg_total = 0.0

    # Pre-allocate arrays
    imcrop_val = np.zeros((test_config.N, test_config.input_H, test_config.input_W, 3), dtype=np.float32)
    text_seq_val = np.zeros((test_config.T, test_config.N), dtype=np.int32)

    num_im = len(imlist)
    for n_im in tqdm(range(num_im)):
        imname = imlist[n_im]

        # Extract visual features from all proposals
        im = skimage.io.imread(test_config.image_dir + imname)
        processed_im = skimage.img_as_ubyte(
            im_processing.resize_and_pad(im, test_config.input_H, test_config.input_W))
                                                                         
        if processed_im.ndim == 2:
            processed_im = np.tile(processed_im[:, :, np.newaxis], (1, 1, 3))

        imcrop_val[...] = processed_im.astype(np.float32) - seg_model.channel_mean
        imcrop_val_trans = imcrop_val.transpose((0, 3, 1, 2))

        # Extract spatial features
        spatial_val = processing_tools.generate_spatial_batch(test_config.N,
                                                              test_config.featmap_H,
                                                              test_config.featmap_W)
        spatial_val = spatial_val.transpose((0, 3, 1, 2))

        for imcrop_name, _, description in flat_query_dict[imname]:
            mask = load_gt_mask(test_config.mask_dir + imcrop_name + '.mat').astype(np.float32)
            labels = (mask > 0)
            processed_labels = im_processing.resize_and_pad(mask, test_config.input_H, test_config.input_W)
            processed_labels = processed_labels > 0

            text_seq_val[:, 0] = text_processing.preprocess_sentence(description, vocab_dict, test_config.T)
            cont_val = text_processing.create_cont(text_seq_val)

            net.blobs['language'].data[...] = text_seq_val
            net.blobs['cont'].data[...] = cont_val
            net.blobs['image'].data[...] = imcrop_val_trans
            net.blobs['spatial'].data[...] = spatial_val
            net.blobs['label'].data[...] = processed_labels

            net.forward()
            upscores = net.blobs['upscores'].data[...].copy()
            upscores = np.squeeze(upscores)

            # Evaluate the segmentation performance of using bounding box segmentation
            pred_raw = (upscores >= test_config.score_thresh).astype(np.float32)
            predicts = im_processing.resize_and_crop(pred_raw, im.shape[0], im.shape[1])
            I, U = eval_tools.compute_mask_IU(predicts, labels)
            cum_I += I
            cum_U += U
            this_IoU = I/float(U)
            for n_eval_iou in range(len(eval_seg_iou_list)):
                eval_seg_iou = eval_seg_iou_list[n_eval_iou]
                seg_correct[n_eval_iou] += (I/float(U) >= eval_seg_iou)
            seg_total += 1


    # Print results
    print('Final results on the whole test set')
    result_str = ''
    for n_eval_iou in range(len(eval_seg_iou_list)):
        result_str += 'precision@%s = %f\n' % \
            (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total)
    result_str += 'overall IoU = %f\n' % (cum_I/cum_U)
    print(result_str)
Beispiel #8
0
def inference(config):
    with open('./seg_model/test.prototxt', 'w') as f:
        f.write(str(seg_model.generate_model('val', config)))

    caffe.set_device(config.gpu_id)
    caffe.set_mode_gpu()

    # Load pretrained model
    net = caffe.Net('./seg_model/test.prototxt',
                    config.pretrained_model,
                    caffe.TEST)

    ################################################################################
    # Load annotations and bounding box proposals
    ################################################################################

    query_dict = json.load(open(config.query_file))
    bbox_dict = json.load(open(config.bbox_file))
    imcrop_dict = json.load(open(config.imcrop_file))
    imsize_dict = json.load(open(config.imsize_file))
    imlist = list({name.split('_', 1)[0] + '.jpg' for name in query_dict})
    vocab_dict = text_processing.load_vocab_dict_from_file(config.vocab_file)

    ################################################################################
    # Flatten the annotations
    ################################################################################

    flat_query_dict = {imname: [] for imname in imlist}
    for imname in imlist:
        this_imcrop_names = imcrop_dict[imname]
        for imcrop_name in this_imcrop_names:
            gt_bbox = bbox_dict[imcrop_name]
            if imcrop_name not in query_dict:
                continue
            this_descriptions = query_dict[imcrop_name]
            for description in this_descriptions:
                flat_query_dict[imname].append((imcrop_name, gt_bbox, description))

    ################################################################################
    # Testing
    ################################################################################

    cum_I, cum_U = 0.0, 0.0
    eval_seg_iou_list = [0.5, 0.6, 0.7, 0.8, 0.9]
    seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32)
    seg_total = 0.0

    # Pre-allocate arrays
    imcrop_val = np.zeros((config.N, config.input_H, config.input_W, 3), dtype=np.float32)
    text_seq_val = np.zeros((config.T, config.N), dtype=np.int32)

    num_im = len(imlist)
    for n_im in tqdm(range(num_im)):
        imname = imlist[n_im]

        # Extract visual features from all proposals
        im = skimage.io.imread(config.image_dir + imname)
        processed_im = skimage.img_as_ubyte(
            im_processing.resize_and_pad(im, config.input_H, config.input_W))
                                                                         
        if processed_im.ndim == 2:
            processed_im = np.tile(processed_im[:, :, np.newaxis], (1, 1, 3))

        imcrop_val[...] = processed_im.astype(np.float32) - seg_model.channel_mean
        imcrop_val_trans = imcrop_val.transpose((0, 3, 1, 2))

        # Extract spatial features
        spatial_val = processing_tools.generate_spatial_batch(config.N,
                                                              config.featmap_H,
                                                              config.featmap_W)
        spatial_val = spatial_val.transpose((0, 3, 1, 2))

        for imcrop_name, _, description in flat_query_dict[imname]:
            mask = load_gt_mask(config.mask_dir + imcrop_name + '.mat').astype(np.float32)
            labels = (mask > 0)
            processed_labels = im_processing.resize_and_pad(mask, config.input_H, config.input_W)
            processed_labels = processed_labels > 0

            text_seq_val[:, 0] = text_processing.preprocess_sentence(description, vocab_dict, config.T)
            cont_val = text_processing.create_cont(text_seq_val)

            net.blobs['language'].data[...] = text_seq_val
            net.blobs['cont'].data[...] = cont_val
            net.blobs['image'].data[...] = imcrop_val_trans
            net.blobs['spatial'].data[...] = spatial_val
            net.blobs['label'].data[...] = processed_labels

            net.forward()
            upscores = net.blobs['upscores'].data[...].copy()
            upscores = np.squeeze(upscores)

            # Evaluate the segmentation performance of using bounding box segmentation
            pred_raw = (upscores >= config.score_thresh).astype(np.float32)
            predicts = im_processing.resize_and_crop(pred_raw, im.shape[0], im.shape[1])
            I, U = eval_tools.compute_mask_IU(predicts, labels)
            cum_I += I
            cum_U += U
            this_IoU = I/float(U)
            for n_eval_iou in range(len(eval_seg_iou_list)):
                eval_seg_iou = eval_seg_iou_list[n_eval_iou]
                seg_correct[n_eval_iou] += (I/float(U) >= eval_seg_iou)
            seg_total += 1


    # Print results
    print('Final results on the whole test set')
    result_str = ''
    for n_eval_iou in range(len(eval_seg_iou_list)):
        result_str += 'precision@%s = %f\n' % \
            (str(eval_seg_iou_list[n_eval_iou]), seg_correct[n_eval_iou]/seg_total)
    result_str += 'overall IoU = %f\n' % (cum_I/cum_U)
    print(result_str)