コード例 #1
0
ファイル: model.py プロジェクト: rpmcruz/avito
def read_files(dtype):
    tic()
    files = sorted(os.listdir('../out'))
    files = [f for f in files
             if f.startswith('features-') and f.endswith('-%s.csv' % dtype)]
    names, X = zip(*map(read_file, files))
    toc('read %s data' % dtype)
    return names, np.concatenate(X, 1)
コード例 #2
0
ファイル: extract.py プロジェクト: rpmcruz/avito
def extract(info_filename, pairs_filename, mode):
    info_filename = os.path.join('../data', info_filename)
    pairs_filename = os.path.join('../data', pairs_filename)

    tic()
    info_df = pd.read_csv(
        info_filename,
        dtype={'itemID': int, 'categoryID': int, 'price': float},
        usecols=(0, 1, 6, 7, 8, 9, 10), index_col=0)
    info_df['line'] = np.arange(len(info_df), dtype=int)
    toc('info file')

    info_reader = MyCSVReader(info_filename)
    toc('info reader')

    cols = (0, 1) if mode == 'train' else (1, 2)
    pairs = np.genfromtxt(pairs_filename, int, delimiter=',', skip_header=1,
                          usecols=cols)
    toc('pairs file')

    # transforma ItemID em linhas do ficheiro CSV e da matriz info
    a = info_df.ix[pairs[:, 0]]['line']
    b = info_df.ix[pairs[:, 1]]['line']
    pairs_lines = np.c_[a, b]
    toc('pairs lines')

    params = (info_filename, info_reader, info_df, pairs_lines)
    modules = [module[:-3] for module in sorted(os.listdir('features'))
               if module.startswith('extract-')]
    csvs = ['../out/features-%s-%s.csv' % (module[8:], mode)
            for module in modules]

    # create features from modules that have been created or changed
    #pool = multiprocessing.Pool(multiprocessing.cpu_count()/2)
    #res = []
    for module, csv in itertools.izip(modules, csvs):
        #res.append(pool.apply_async(sync_extract, (module, csv, params)))
        sync_extract(module, csv, params)
    #for r in res:
    #    r.get()

    # remove whatever has been created by extiguish modules
    vestiges = [os.path.join('../out', f) for f in os.listdir('../out')
                if f.startswith('features-') and f.endswith('-%s.csv' % mode)]
    for v in vestiges:
        if v not in csvs:
            print 'removing old %s...' % v
            os.remove(v)
コード例 #3
0
ファイル: extract.py プロジェクト: rpmcruz/avito
def sync_extract(module, csv, params):
    create = not os.path.exists(csv)
    if not create:
        m1 = os.path.getmtime(csv)
        m2 = os.path.getmtime('features/' + module + '.py')
        create = m2 > m1
    if create:
        tic()
        i = importlib.import_module('features.' + module)
        X, names = i.fn(*params)
        toc(module[8:])
        if len(X):
            if len(X[0].shape) == 1:
                X = [x[:, np.newaxis] for x in X]
            X = np.concatenate(X, 1)
            assert X.shape[1] == len(names)
            #names = ['"' + name + '"' for name in names]
            header = ','.join(names)
            fmt = '%d' if X.dtype == int else '%.6f'
            np.savetxt(csv, X, fmt, delimiter=',', header=header, comments='')
コード例 #4
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = "resnet_v1_101_fpn_dcn_rcnn"  if not args.rfcn_only else "resnet_v1_101_fpn_rcnn"
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
               'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
               'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
               'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
               'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
    # test
    # find all videos
    video_path = "../../tmp"#"../../aic2018/track1/track1_videos"
    video_files = sorted([ x for x in os.listdir(video_path) if x.endswith(".mp4")])
    save_path = "../../tmp/output"#"../../aic2018/track1/output"
    if not os.path.isdir(save_path):
        os.makedirs(save_path)
    
    print("processing {} videos...".format(len(video_files)))
    pbar = tqdm(total=len(video_files))
    for vf in video_files:
        vid = imageio.get_reader(os.path.join(video_path, vf),'ffmpeg')
        data = []
        for idx, im in enumerate(vid):
            if idx == 0:
                #assert os.path.exists(im_path + im_name), ('%s does not exist'.format(im_path + im_name))
                #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
                target_size = config.SCALES[0][0]
                max_size = config.SCALES[0][1]
                im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
                im_tensor = transform(im, config.network.PIXEL_MEANS)
                im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
                data.append({'data': im_tensor, 'im_info': im_info})
            else:
                break
                #data.append({'data': None, 'im_info': None})
        
        # get predictor
        data_names = ['data', 'im_info']
        label_names = []
        data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
        max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
        provide_label = [None for i in xrange(len(data))]

        print("hhhhh")
        print(provide_data, provide_label)
        print("hhhhh")  

        arg_params, aux_params = load_param(cur_path + '/../model/demo_model/' + ('fpn_dcn_coco' if not args.rfcn_only else 'fpn_coco'), 0, process=True)

        #print(type(arg_params), type(aux_params))

        predictor = Predictor(sym, data_names, label_names,
                              context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                              provide_data=provide_data, provide_label=provide_label,
                              arg_params=arg_params, aux_params=aux_params)
        nms = gpu_nms_wrapper(config.TEST.NMS, 0)

        print("successfully load model")
        
        vout = []
        # write to video
        writer = skvideo.io.FFmpegWriter(os.path.join(save_path, vf.replace(".mp4","_out.mp4")), outputdict={'-vcodec': 'libx264', '-b': '300000000'})
        for frame_idx, im in enumerate(vid):
            #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            im_original = im.copy()
            
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)

            data_idx = [{"data": im_tensor, "im_info": im_info}]
            data_idx = [[mx.nd.array(data_idx[i][name]) for name in data_names] for i in xrange(len(data_idx))]
            data_batch = mx.io.DataBatch(data=[data_idx[0]], label=[], pad=0, index=idx,
                                         provide_data=[[(k, v.shape) for k, v in zip(data_names, data_idx[0])]],
                                         provide_label=[None])

            scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

            tic()
            scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
            boxes = boxes[0].astype('f')
            scores = scores[0].astype('f')
            dets_nms = []
            num_dets = 0
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.65, :]
                dets_nms.append(cls_dets)
                num_dets += cls_dets.shape[0]
            
            print 'testing {} the {} th frame at {:.4f}s, detections {}'.format(vf, frame_idx, toc(), num_dets)
            # save results
            #im = cv2.imread(im_path + im_name)
            #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            #im_bbox = show_boxes(im, dets_nms, classes, 1)
            #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox)
            save_im, outputs = show_boxes(im_original, dets_nms, classes, 1, False)
            #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im)
            writer.writeFrame(save_im)
            
            for out in outputs:
                vout.append([frame_idx] + out)
        
        # save the whole video detection into pickle file
        writer.close()
        with open(os.path.join(save_path, vf.replace(".mp4", "_detect.pkl")), "wb") as f:
            pickle.dump(vout, f, protocol=2)
        pbar.update(1)
        
    pbar.close()    
    print 'done'
コード例 #5
0
# -*- coding: utf-8 -*-

import sys
sys.dont_write_bytecode = True
import os
from utils.tictoc import tic, toc
import pickle
import numpy as np
import pandas as pd
from scipy import stats

print 'load items info...'
tic()
Xinfo = pd.read_csv('../../data/ItemInfo_train.csv', index_col=0,
                    usecols=[0, 1, 6, 7, 8, 9, 10])
toc()
print 'load items pairs...'
tic()
Xpair = pd.read_csv('../../data/ItemPairs_train.csv', usecols=[0, 1, 2])
toc()

# idxmap is an efficient mapping between item-id and row index
# we could also use Xinfo.ix[indices], but this approach seems
# slightly faster
tic()
print 'load items mapping...'
if os.path.exists('idxmap.pickle'):
    with open('idxmap.pickle', 'rb') as f:
        idxmap = pickle.load(f)
else:
    lastid = Xinfo.index[-1]
コード例 #6
0
def main():
    # get symbol
    pprint.pprint(config)
    #config.symbol = "resnet_v1_101_fpn_dcn_rcnn"  if not args.rfcn_only else "resnet_v1_101_fpn_rcnn"
    config.symbol = "resnet_v1_101_fpn_dcn_rcnn"
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 5
    classes = ["car", "bus", "van", "others"]

    # load demo videos
    im_path = '../../aic2018/track1/images/'
    image_names = [
        x for x in os.listdir('../../aic2018/track1/images/')
        if (x.endswith(".jpg") and (x.startswith("9_1") or x.startswith("9_1"))
            ) and not x.endswith("_bbox.jpg")
    ]
    data = []
    for idx, im_name in enumerate(image_names[:1]):
        if idx == 0:
            assert os.path.exists(im_path + im_name), (
                '%s does not exist'.format(im_path + im_name))
            im = cv2.imread(im_path + im_name,
                            cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})
        else:
            data.append({'data': None, 'im_info': None})

    print(data)

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[0][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    # what does provide_data and provide_label work for?
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[0])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    ## load parameters
    arg_params, aux_params = load_param(cur_path + '/../model/' + 'fpn_detrac',
                                        1,
                                        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    print("successfully load model")

    # find all videos
    video_path = "../../tmp"
    video_files = [x for x in os.listdir(video_path) if x.endswith(".mp4")]
    save_path = "../../tmp/output"
    if not os.path.isdir(save_path):
        os.makedirs(save_path)

    print("processing {} videos...".format(len(video_files)))
    pbar = tqdm(total=len(video_files))
    for vf in video_files:
        vid = imageio.get_reader(os.path.join(video_path, vf), 'ffmpeg')
        vout = []
        for frame_idx, im in enumerate(vid):
            #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)

            data_idx = [{"data": im_tensor, "im_info": im_info}]
            data_idx = [[
                mx.nd.array(data_idx[i][name]) for name in data_names
            ] for i in xrange(len(data_idx))]
            data_batch = mx.io.DataBatch(
                data=[data_idx[0]],
                label=[],
                pad=0,
                index=idx,
                provide_data=[[(k, v.shape)
                               for k, v in zip(data_names, data_idx[0])]],
                provide_label=[None])

            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]

            tic()
            scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                 data_names, scales, config)
            boxes = boxes[0].astype('f')
            scores = scores[0].astype('f')
            dets_nms = []
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:,
                                  4:8] if config.CLASS_AGNOSTIC else boxes[:,
                                                                           j *
                                                                           4:
                                                                           (j +
                                                                            1
                                                                            ) *
                                                                           4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                dets_nms.append(cls_dets)

            print 'testing {} the {} th frame at {:.4f}s, detections {}'.format(
                vf, frame_idx, toc(), len(dets_nms))
            # save results
            #im = cv2.imread(im_path + im_name)
            #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            #im_bbox = show_boxes(im, dets_nms, classes, 1)
            #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox)
            save_im, outputs = show_boxes(im, dets_nms, classes, 1)
            #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im)

            for out in outputs:
                vout.append([frame_idx] + out)

        # save the whole video detection into pickle file
        with open(os.path.join(save_path, vf.replace(".mp4", ".pkl")),
                  "wb") as f:
            pickle.dump(vout, f, protocol=2)
        pbar.update(1)

    pbar.close()
    print 'done'
コード例 #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("indir",
                        type=lambda s: unicode(s, 'utf8'),
                        help="Directory containing list of images")
    parser.add_argument("outfile",
                        type=lambda s: unicode(s, 'utf8'),
                        help="Path to write predictions")
    parser.add_argument("-d",
                        "--device",
                        type=int,
                        default=0,
                        help="Device ID to use")
    args = parser.parse_args()
    params = vars(args)

    # ---------------------------------------------------------- Read config
    ctx_id = [int(i) for i in config.gpus.split(',')]
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]
    config['gpus'] = str(params['device'])

    # ---------------------------------------------------------- Load Images
    image_path_list = []
    data = []
    scale_factor = 1.0
    img_dir = osp.abspath(params['indir'])
    det_thresh = 0.7

    # Load abs paths of images
    for f in sorted(os.listdir(img_dir)):
        _, f_ext = osp.splitext(f)
        if f_ext in ['.jpg', '.png', '.jpeg']:
            f_path = osp.join(img_dir, f)
            image_path_list.append(f_path)

    print 'Loading {} images into memory...'.format(len(image_path_list))

    for image_path in image_path_list:
        im = cv2.imread(image_path,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        height, width = im.shape[:2]
        im = cv2.resize(
            im, (int(scale_factor * width), int(scale_factor * height)))
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    print 'Loaded {} images'.format(len(image_path_list))

    # ---------------------------------------------------------- Predict
    predictions = []

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(
        '/BS/orekondy2/work/opt/FCIS/model/fcis_coco', 0, process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)

    # test
    for idx, image_path in enumerate(image_path_list):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, scales, config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes, scores[0], num_classes, 100, im_width, im_height,
                config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
        print '{} testing {} {:.4f}s'.format(idx, image_path, toc())
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > det_thresh)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread(image_path_list[idx])
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        org_height, org_width = cv2.imread(image_path_list[idx]).shape[:2]
        # im = cv2.resize(im,(int(scale_factor*org_width), int(scale_factor*org_height)))
        """
        visualize all detections in one image
        :param im_array: [b=1 c h w] in rgb
        :param detections: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ]
        :param class_names: list of names in imdb
        :param scale: visualize the scaled image
        :return:
        """
        detections = dets
        class_names = classes
        cfg = config
        scale = 1.0

        person_idx = class_names.index('person')
        dets = detections[person_idx]
        msks = masks[person_idx]

        for mask_idx, (det, msk) in enumerate(zip(dets, msks)):
            inst_arr = np.zeros_like(im[:, :, 0])  # Create a 2D W x H array
            bbox = det[:4] * scale
            cod = bbox.astype(int)
            if im[cod[1]:cod[3], cod[0]:cod[2], 0].size > 0:
                msk = cv2.resize(
                    msk, im[cod[1]:cod[3] + 1, cod[0]:cod[2] + 1, 0].T.shape)
                bimsk = (msk >= cfg.BINARY_THRESH).astype('uint8')

                # ------- Create bit-mask for this instance
                inst_arr[cod[1]:cod[3] + 1, cod[0]:cod[2] +
                         1] = bimsk  # Add thresholded binary mask
                rs_inst_arr = scipy.misc.imresize(inst_arr,
                                                  (org_height, org_width))
                rle = mask.encode(np.asfortranarray(rs_inst_arr))

                predictions.append({
                    'image_path': image_path,
                    'label': 'person',
                    'segmentation': rle,
                    'bbox': bbox.tolist(),
                    'score': det[-1],
                })

                del msk
                del bimsk
                del rs_inst_arr

    print 'Created {} predictions'.format(len(predictions))

    # ---------------------------------------------------------- Write output
    with open(params['outfile'], 'wb') as wf:
        json.dump(predictions, wf, indent=2)
コード例 #8
0
def main(video_file):
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)
    arg_params, aux_params = load_param('./output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road', 19, process=True)

    # set up class names; Don't count the background in, even we are treat the background as label '0'
    num_classes = 4
    classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights']

    cap = cv2.VideoCapture(video_path)
    fps = math.floor(cap.get(5))
    # based on testing, this code process every frame takes around 0.25s. So my interval take 0.25s ~= 7frames
    fps = 8
    while (cap.isOpened()):
        frame_id = cap.get(1)
        ret, frame = cap.read()
        if frame_id % fps != 0:
            # print('Frame ID: {}'.format(str(frame_id)))
            cv2.imshow('video', frame)
            continue
        tic()
        data = []
        target_size = config.SCALES[0][1]
        max_size = config.SCALES[0][1]
        frame, im_scale = resize(frame, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(frame, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

        # get predictor
        data_names = ['data', 'im_info']
        label_names = []
        data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
        # print('Debug: [data] shape: {}, cont: {}'.format(type(data), data))
        max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
        # print('Debug: [max_data_shape] shape: {}, cont: {}'.format(type(max_data_shape), max_data_shape))
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
        # print('Debug: [provide_data] shape: {}, cont: {}'.format(type(provide_data), provide_data))
        provide_label = [None for i in xrange(len(data))]
        # print('Debug: [provide_label] shape: {}, cont: {}'.format(type(provide_label), provide_label))
        predictor = Predictor(sym, data_names, label_names,
                              context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                              provide_data=provide_data, provide_label=provide_label,
                              arg_params=arg_params, aux_params=aux_params)
        nms = gpu_nms_wrapper(config.TEST.NMS, 0)

        # Process video frame
        image_names=['frame']
        for idx, _ in enumerate(image_names):
            data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                         provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                         provide_label=[None])
            scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
            # print('Debug: [scales] cont: {}'.format(scales))

            scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
            boxes = boxes[0].astype('f')
            scores = scores[0].astype('f')
            dets_nms = []
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                dets_nms.append(cls_dets)

            frame_with_bbox = draw_bbox_on_frame(frame, dets_nms, classes, scale=scales[0])
        cv2.imshow('video', frame_with_bbox)
        print 'Processing frame {} in {:.4f}s'.format(frame_id, toc())
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    print 'done'
コード例 #9
0
ファイル: demo.py プロジェクト: xueluli/Accel
def main():

    # settings
    num_classes = 19
    snip_len = 30
    version = str(args.version)
    interv = args.interval
    num_ex = args.num_ex
    avg_acc = args.avg_acc

    # validate params
    if version not in ['18', '34', '50', '101']:
        raise ValueError(
            "Invalid Accel version '%s' - must be one of Accel-{18,34,50,101}"
            % version)
    if interv < 1:
        raise ValueError("Invalid interval %d - must be >=1" % interv)
    if num_ex < 1:
        raise ValueError("Invalid num_ex %d - must be >=1" % num_ex)

    # get symbol
    pprint.pprint(config)
    config.symbol = 'accel_' + version
    model1 = '/../model/rfcn_dff_flownet_vid'
    model2 = '/../model/accel-' + version
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym = sym_instance.get_key_test_symbol(config)
    cur_sym = sym_instance.get_cur_test_symbol(config)

    path_demo_data = '/ebs/Accel/data/cityscapes/'
    path_demo_labels = '/ebs/Accel/data/cityscapes/'
    if path_demo_data == '' or path_demo_labels == '':
        raise ValueError("Must set path to demo data + labels")

    # load demo data
    image_names = sorted(
        glob.glob(path_demo_data + 'leftImg8bit_sequence/val/frankfurt/*.png'))
    image_names += sorted(
        glob.glob(path_demo_data + 'leftImg8bit_sequence/val/lindau/*.png'))
    image_names += sorted(
        glob.glob(path_demo_data + 'leftImg8bit_sequence/val/munster/*.png'))
    image_names = image_names[:snip_len * num_ex]
    label_files = sorted(
        glob.glob(path_demo_labels + 'gtFine/val/frankfurt/*trainIds.png'))
    label_files += sorted(
        glob.glob(path_demo_labels + 'gtFine/val/lindau/*trainIds.png'))
    label_files += sorted(
        glob.glob(path_demo_labels + 'gtFine/val/munster/*trainIds.png'))
    output_dir = cur_path + '/../demo/deeplab_dff/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = interv

    #
    lb_pos = 19
    image_names_trunc = []
    for i in range(num_ex):
        snip_pos = i * snip_len
        if avg_acc:
            offset = i % interv
        else:
            offset = interv - 1
        start_pos = lb_pos - offset
        image_names_trunc.extend(image_names[snip_pos + start_pos:snip_pos +
                                             start_pos + interv])
    image_names = image_names_trunc

    data = []
    key_im_tensor = None
    prev_im_tensor = None
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        if prev_im_tensor is None:
            prev_im_tensor = im_tensor
        data.append({
            'data':
            im_tensor,
            'im_info':
            im_info,
            'data_key':
            prev_im_tensor,
            'feat_key':
            np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1))
        })
        prev_im_tensor = im_tensor

    # get predictor
    data_names = ['data', 'data_key', 'feat_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in config.SCALES]),
                  max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    arg_params, aux_params = load_param(cur_path + model1, 0, process=True)
    arg_params_dcn, aux_params_dcn = load_param(cur_path + model2,
                                                0,
                                                process=True)
    arg_params.update(arg_params_dcn)
    aux_params.update(aux_params_dcn)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            output_all, feat = im_segment(cur_predictor, data_batch)
            output_key = 'croped_score_output' if version == '101' else 'correction_output'
            output_all = [
                mx.ndarray.argmax(output[output_key], axis=1).asnumpy()
                for output in output_all
            ]

    print "warmup done"
    # test
    time = 0
    count = 0
    hist = np.zeros((num_classes, num_classes))
    lb_idx = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        if idx % key_frame_interval == 0:
            print '\n\nframe {} (key)'.format(idx)
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            print '\nframe {} (intermediate)'.format(idx)
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            output_all, feat = im_segment(cur_predictor, data_batch)
            output_key = 'croped_score_output' if version == '101' else 'correction_output'
            output_all = [
                mx.ndarray.argmax(output[output_key], axis=1).asnumpy()
                for output in output_all
            ]

        elapsed = toc()
        time += elapsed
        count += 1
        print 'testing {} {:.4f}s [{:.4f}s]'.format(im_name, elapsed,
                                                    time / count)

        pred = np.uint8(np.squeeze(output_all))
        segmentation_result = Image.fromarray(pred)
        pallete = getpallete(256)
        segmentation_result.putpalette(pallete)
        _, im_filename = os.path.split(im_name)
        segmentation_result.save(output_dir + '/seg_' + im_filename)

        # compute accuracy
        label = None

        _, lb_filename = os.path.split(label_files[lb_idx])
        im_comps = im_filename.split('_')
        lb_comps = lb_filename.split('_')
        # check if annotation available for frame
        if im_comps[1] == lb_comps[1] and im_comps[2] == lb_comps[2]:
            print 'label {}'.format(lb_filename)
            label = np.asarray(Image.open(label_files[lb_idx]))
            if lb_idx < len(label_files) - 1:
                lb_idx += 1

        if label is not None:
            curr_hist = fast_hist(pred.flatten(), label.flatten(), num_classes)
            hist += curr_hist
            print 'mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(curr_hist)) * 100, 2))
            print '(cum) mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(hist)) * 100, 2))

    ious = per_class_iu(hist) * 100
    print ' '.join('{:.03f}'.format(i) for i in ious)
    print '===> final mIoU {mIoU:.3f}'.format(mIoU=round(np.nanmean(ious), 2))

    print 'done'
コード例 #10
0
ファイル: demo.py プロジェクト: xtanitfy/Deep-Feature-Flow
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    model = '/../model/rfcn_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_test_symbol(config)

    # set up class names
    num_classes = 31
    classes = ['airplane', 'antelope', 'bear', 'bicycle',
               'bird', 'bus', 'car', 'cattle',
               'dog', 'domestic_cat', 'elephant', 'fox',
               'giant_panda', 'hamster', 'horse', 'lion',
               'lizard', 'monkey', 'motorcycle', 'rabbit',
               'red_panda', 'sheep', 'snake', 'squirrel',
               'tiger', 'train', 'turtle', 'watercraft',
               'whale', 'zebra']

    # load demo data
    image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    #

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    time = 0
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        time += toc()
        count += 1
        print 'testing {} {:.4f}s'.format(im_name, time/count)

        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)

        # visualize
        im = cv2.imread(im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename,out_im)

    print 'done'
コード例 #11
0
def process_image_fun(imagesPath=None,
                      fileOp=None,
                      vis=None,
                      model_params_list=None,
                      count=0):
    # init rfcn dcn detect model (mxnet)
    # model_params_list = init_detect_model()

    # num_classes = RFCN_DCN_CONFIG['num_classes']  # 0 is background,
    classes = RFCN_DCN_CONFIG['num_classes_name_list']
    min_threshold = min(list(
        RFCN_DCN_CONFIG['need_label_thresholds'].values()))

    im_name = imagesPath
    all_can_read_image = []
    data = []
    all_can_read_image.append(im_name)
    target_size = config.SCALES[0][0]
    max_size = config.SCALES[0][1]
    im, im_scale = resize(im_name,
                          target_size,
                          max_size,
                          stride=config.network.IMAGE_STRIDE)
    im_tensor = transform(im, config.network.PIXEL_MEANS)
    im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                       dtype=np.float32)
    data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    predictor = Predictor(model_params_list[0],
                          data_names,
                          label_names,
                          context=[mx.gpu(1)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=model_params_list[1],
                          aux_params=model_params_list[2])
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    for idx, im_name in enumerate(all_can_read_image):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > min_threshold, :]
            dets_nms.append(cls_dets)
        #print('testing {} {:.4f}s'.format(im_name, toc()))
        im = show_boxes_write_rg(im=im_name,
                                 dets=dets_nms,
                                 classes=classes,
                                 scale=1,
                                 vis=vis,
                                 fileOp=fileOp,
                                 count=count)
    return im
コード例 #12
0
ファイル: segbaby.py プロジェクト: Mschikay/FCIS-babycam
    def Seg(self):
        for i in xrange(2):
            data_batch = mx.io.DataBatch(
                data=[self.data],
                label=[],
                pad=0,
                index=0,
                provide_data=[[(k, v.shape)
                               for k, v in zip(self.data_names, self.data)]],
                provide_label=[None])
            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]
            _, _, _, _ = im_detect(self.predictor, data_batch, self.data_names,
                                   scales, config)

        data_batch = mx.io.DataBatch(
            data=[self.data],
            label=[],
            pad=0,
            index=0,
            provide_data=[[(k, v.shape)
                           for k, v in zip(self.data_names, self.data)]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, masks, data_dict = im_detect(self.predictor, data_batch,
                                                    self.data_names, scales,
                                                    config)
        #print masks #right
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(self.num_classes)]
            all_masks = [[] for _ in xrange(self.num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, self.num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, self.num_classes)]
            masks = [all_masks[j] for j in range(1, self.num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes, scores[0], self.num_classes, 100, im_width,
                im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                config.BINARY_THRESH, self.ctx_id[0])

            dets = [result_dets[j] for j in range(1, self.num_classes)]
            masks = [
                result_masks[j][:, 0, :, :]
                for j in range(1, self.num_classes)
            ]

        for i in xrange(1, len(dets)):
            keep = np.where(dets[i][:, -1] > 1)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]

        keep = np.where(dets[0][:, -1] > 0.8)
        dets[0] = dets[0][keep]
        masks[0] = masks[0][keep]

        newmask = show_masks(self.fg, dets, masks, self.classes,
                             config)  #!!!!!!!! wrong mask
        self.result = newmask
        return newmask
コード例 #13
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_deeplab_dcn' if not args.deeplab_only else 'resnet_v1_101_deeplab'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 19

    # load demo data
    image_names = [
        'frankfurt_000001_073088_leftImg8bit.png',
        'lindau_000024_000019_leftImg8bit.png'
    ]
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data']
    label_names = ['softmax_label']
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in range(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in range(len(data))]
    provide_label = [None for i in range(len(data))]
    arg_params, aux_params = load_param(
        cur_path + '/../model/' +
        ('deeplab_dcn_cityscapes'
         if not args.deeplab_only else 'deeplab_cityscapes'),
        0,
        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for j in range(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        output_all = predictor.predict(data_batch)
        output_all = [
            mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy()
            for output in output_all
        ]

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])

        tic()
        output_all = predictor.predict(data_batch)
        output_all = [
            mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy()
            for output in output_all
        ]
        pallete = getpallete(256)

        segmentation_result = np.uint8(np.squeeze(output_all))
        segmentation_result = Image.fromarray(segmentation_result)
        segmentation_result.putpalette(pallete)
        print('testing {} {:.4f}s'.format(im_name, toc()))
        pure_im_name, ext_im_name = os.path.splitext(im_name)
        segmentation_result.save(cur_path + '/../demo/seg_' + pure_im_name +
                                 '.png')
        # visualize
        im_raw = cv2.imread(cur_path + '/../demo/' + im_name)
        seg_res = cv2.imread(cur_path + '/../demo/seg_' + pure_im_name +
                             '.png')
        cv2.imshow('Raw Image', im_raw)
        cv2.imshow('segmentation_result', seg_res)
        cv2.waitKey(0)
    print('done')
コード例 #14
0
ファイル: demo.py プロジェクト: pistachio0812/LSTS
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'impression_network_dynamic_offset_sparse'
    model = '/../local_run_output/impression_dynamic_offset-lr-10000-times-neighbor-4-dense-4'
    first_sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym_instance = eval(config.symbol + '.' + config.symbol)()
    cur_sym_instance = eval(config.symbol + '.' + config.symbol)()

    first_sym = first_sym_instance.get_first_test_symbol_impression(config)
    key_sym = key_sym_instance.get_key_test_symbol_impression(config)
    cur_sym = cur_sym_instance.get_cur_test_symbol_impression(config)

    # set up class names
    num_classes = 31
    classes = [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]

    # load demo data
    image_names = glob.glob(cur_path +
                            '/../demo/ILSVRC2015_val_00011005/*.JPEG')
    output_dir = cur_path + '/../demo/motion-prior-output-00011005/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = 10
    image_names.sort()
    data = []
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            if idx == 0:
                data_oldkey = im_tensor.copy()
                data_newkey = im_tensor.copy()
                data_cur = im_tensor.copy()
            else:
                data_oldkey = data_newkey.copy()
                data_newkey = im_tensor
        else:
            data_cur = im_tensor
        shape = im_tensor.shape
        infer_height = int(np.ceil(shape[2] / 16.0))
        infer_width = int(np.ceil(shape[3] / 16.0))
        data.append({
            'data_oldkey':
            data_oldkey,
            'data_newkey':
            data_newkey,
            'data_cur':
            data_cur,
            'im_info':
            im_info,
            'impression':
            np.zeros(
                (1, config.network.DFF_FEAT_DIM, infer_height, infer_width)),
            'key_feat_task':
            np.zeros(
                (1, config.network.DFF_FEAT_DIM, infer_height, infer_width))
        })

    # get predictor
    data_names = [
        'data_oldkey', 'data_cur', 'data_newkey', 'im_info', 'impression',
        'key_feat_task'
    ]
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data_oldkey', (1, 3, max([v[0] for v in config.SCALES]),
                         max([v[1] for v in config.SCALES]))),
        ('data_newkey', (1, 3, max([v[0] for v in config.SCALES]),
                         max([v[1] for v in config.SCALES]))),
        ('data_cur', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
        ('impression', (1, 1024, 38, 63)), ('key_feat_task', (1, 1024, 38, 63))
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 4, process=True)
    first_predictor = Predictor(first_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)
    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][3].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:  # keyframe
            if j == 0:  # first frame
                scores, boxes, data_dict, conv_feat, _, _, _ = im_detect_impression_online(
                    first_predictor, data_batch, data_names, scales, config)
                feat_task = conv_feat
                impression = conv_feat
            else:  # keyframe
                data_batch.data[0][-2] = impression
                data_batch.provide_data[0][-2] = ('impression',
                                                  impression.shape)
                scores, boxes, data_dict, conv_feat, impression, feat_task = im_detect_impression_online(
                    key_predictor, data_batch, data_names, scales, config)
        else:  # current frame
            data_batch.data[0][-1] = feat_task
            data_batch.provide_data[0][-1] = ('key_feat_task', feat_task.shape)
            scores, boxes, data_dict, _, _, _, _ = im_detect_impression_online(
                cur_predictor, data_batch, data_names, scales, config)
    print "warmup done"
    # test
    time = 0
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][3].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        tic()
        print(idx)
        if idx % key_frame_interval == 0:  # keyframe
            if idx == 0:  # first frame
                scores, boxes, data_dict, conv_feat, _, _, _ = im_detect_impression_online(
                    first_predictor, data_batch, data_names, scales, config)
                feat_task = conv_feat
                impression = conv_feat
                feat_task_numpy = feat_task.asnumpy()
                np.save("features/impression_%s.npy" % (idx), feat_task_numpy)
            else:  # keyframe
                data_batch.data[0][-2] = impression
                data_batch.provide_data[0][-2] = ('impression',
                                                  impression.shape)

                scores, boxes, data_dict, conv_feat, impression, feat_task, _ = im_detect_impression_online(
                    key_predictor, data_batch, data_names, scales, config)
                feat_task_key_numpy = feat_task.asnumpy()
                np.save("features/impression_%s.npy" % (idx),
                        feat_task_key_numpy)
        else:  # current frame
            data_batch.data[0][-1] = feat_task
            data_batch.provide_data[0][-1] = ('key_feat_task', feat_task.shape)
            scores, boxes, data_dict, _, _, _, feat_task_cur = im_detect_impression_online(
                cur_predictor, data_batch, data_names, scales, config)
            if idx >= 1:
                feat_task_cur_numpy = feat_task_cur.asnumpy()
                np.save("features/impression_%s.npy" % (idx),
                        feat_task_cur_numpy)
                #import pdb;pdb.set_trace()
        time += toc()
        count += 1
        print 'testing {} {:.4f}s'.format(im_name, time / count)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        # visualize
        im = cv2.imread(im_name)
        #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename, out_im)
    print 'done'
コード例 #15
0
def main():
    # get symbol

    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_fpn_dcn_rcnn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)
    max_per_image = config.TEST.max_per_image

    # Print the test scales
    print("Train scales: %s" % str(config.SCALES))
    print("Test scales: %s" % str(config.TEST_SCALES))

    # load demo data
    #dataBaseDir = '/b_test/pkhan/datasets/Receipts/data/'
    dataBaseDir = '/netscratch/queling/data/'
    outputBaseDir = '/netscratch/queling/Deformable/output/fpn/deep_receipt/results/' + EXPERIMENT_NAME
    #outputBaseDir = '/b_test/pkhan/Code/Deformable/output/' + EXPERIMENT_NAME

    if os.path.exists(outputBaseDir):
        shutil.rmtree(outputBaseDir)
    os.mkdir(outputBaseDir)

    outputFile = open(os.path.join(outputBaseDir, 'output.txt'), 'w')
    outputFile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    errorStatsFile = open(
        os.path.join(outputBaseDir, 'incorrect-detections.txt'), 'w')

    incorrectDetectionResultsPath = os.path.join(outputBaseDir,
                                                 'IncorrectDetections')
    if not os.path.exists(incorrectDetectionResultsPath):
        os.mkdir(incorrectDetectionResultsPath)

    detectionResultsPath = os.path.join(outputBaseDir, 'Detections')
    if not os.path.exists(detectionResultsPath):
        os.mkdir(detectionResultsPath)

    annotationResultsPath = os.path.join(outputBaseDir, 'Annotations')
    if not os.path.exists(annotationResultsPath):
        os.mkdir(annotationResultsPath)

    statistics = {}
    for cls_ind, cls in enumerate(CLASSES):
        statistics[cls] = {}
        for thresh in IoU_THRESHOLDS:
            statistics[cls][thresh] = {}
            statistics[cls][thresh]["truePositives"] = 0
            statistics[cls][thresh]["falsePositives"] = 0
            statistics[cls][thresh]["falseNegatives"] = 0
            statistics[cls][thresh]["precision"] = 0
            statistics[cls][thresh]["recall"] = 0
            statistics[cls][thresh]["fMeasure"] = 0

    im_names_file = open(os.path.join(dataBaseDir, 'ImageSets/image.txt'),
                         'r')  #test.txt for whole dataset, image.txt for one

    for im_name in im_names_file:
        im_name = im_name.strip()
        # print ("Processing file: %s" % (im_name))

        found = False
        for ext in IMAGE_EXTENSIONS:

            im_name_with_ext = im_name + ext
            im_path = os.path.join(
                dataBaseDir, 'Test',
                im_name_with_ext)  #Images for whole dataset, Test for one

            if os.path.exists(im_path):
                found = True
                break
        if not found:
            print("Error: Unable to locate file %s" % (im_name))
            exit(-1)

        # Load GT annotations

        xml_path = os.path.join(dataBaseDir, 'Annotations', im_name + '.xml')

        #gtBBoxes = loadGTAnnotationsFromXML(xml_path)

        tic()

        dets_nms = [[] for j in range(len(TOTAL_CLASSES) - 1)]

        for testScale in config.SCALES:
            data = []
            im = cv2.imread(im_path,
                            cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = testScale[0]
            max_size = testScale[1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})

            # get predictor
            data_names = ['data', 'im_info']
            label_names = []
            data = [[mx.nd.array(data[i][name]) for name in data_names]
                    for i in xrange(len(data))]
            max_data_shape = [[('data', (1, 3, testScale[0], testScale[1]))]]
            provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                            for i in xrange(len(data))]
            provide_label = [None for i in xrange(len(data))]
            # arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True)
            arg_params, aux_params = load_param(MODEL_PATH,
                                                MODEL_EPOCH,
                                                process=True)
            predictor = Predictor(sym,
                                  data_names,
                                  label_names,
                                  context=[mx.gpu(0)],
                                  max_data_shapes=max_data_shape,
                                  provide_data=provide_data,
                                  provide_label=provide_label,
                                  arg_params=arg_params,
                                  aux_params=aux_params)

            # # warm up
            for j in xrange(2):
                data_batch = mx.io.DataBatch(
                    data=[data[0]],
                    label=[],
                    pad=0,
                    index=0,
                    provide_data=[[(k, v.shape)
                                   for k, v in zip(data_names, data[0])]],
                    provide_label=[None])
                scales = [
                    data_batch.data[i][1].asnumpy()[0, 2]
                    for i in xrange(len(data_batch.data))
                ]
                scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                     data_names, scales,
                                                     config)

            # test
            image_names = [im_name]  # Way around
            for idx, im_name in enumerate(image_names):
                data_batch = mx.io.DataBatch(
                    data=[data[idx]],
                    label=[],
                    pad=0,
                    index=idx,
                    provide_data=[[(k, v.shape)
                                   for k, v in zip(data_names, data[idx])]],
                    provide_label=[None])
                scales = [
                    data_batch.data[i][1].asnumpy()[0, 2]
                    for i in xrange(len(data_batch.data))
                ]

                scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                     data_names, scales,
                                                     config)
                boxes = boxes[0].astype('f')
                scores = scores[0].astype('f')

                # TODO: Multi-scale testing
                for j in range(1, scores.shape[1]):
                    cls_scores = scores[:, j, np.newaxis]
                    cls_boxes = boxes[:, 4:
                                      8] if config.CLASS_AGNOSTIC else boxes[:,
                                                                             j *
                                                                             4:
                                                                             (j
                                                                              +
                                                                              1
                                                                              )
                                                                             *
                                                                             4]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    # if config.TEST.USE_SOFTNMS:
                    #     soft_nms = py_softnms_wrapper(config.TEST.SOFTNMS_THRESH, max_dets=max_per_image)
                    #     cls_dets = soft_nms(cls_dets)
                    # else:
                    #     nms = py_nms_wrapper(config.TEST.NMS)
                    #     keep = nms(cls_dets)
                    #     cls_dets = cls_dets[keep, :]
                    # cls_dets = cls_dets[cls_dets[:, -1] > confidenceThreshold, :]
                    # dets_nms.append(cls_dets)
                    if len(dets_nms[j - 1]) == 0:
                        dets_nms[j - 1] = cls_dets
                    else:
                        dets_nms[j - 1] += cls_dets

        finalDetections = []
        for clsIter in range(len(dets_nms)):
            # print ("Performing NMS on cls %d with %d boxes" % (clsIter, len(dets_nms[clsIter])))
            if config.TEST.USE_SOFTNMS:
                soft_nms = py_softnms_wrapper(config.TEST.SOFTNMS_THRESH,
                                              max_dets=max_per_image)
                # cls_dets = soft_nms(dets_nms[clsIter])
                dets_nms[clsIter] = soft_nms(dets_nms[clsIter])
            else:
                nms = py_nms_wrapper(config.TEST.NMS)
                keep = nms(dets_nms[clsIter])
                # cls_dets = dets_nms[clsIter][keep, :]
                dets_nms[clsIter] = dets_nms[clsIter][keep, :]
            dets_nms[clsIter] = dets_nms[clsIter][
                dets_nms[clsIter][:, -1] > CONFIDENCE_THRESHOLD, :]

        # if max_per_image > 0:
        #     for idx_im in range(0, num_images):
        #         image_scores = np.hstack([all_boxes[j][idx_im][:, -1]
        #                                   for j in range(1, imdb.num_classes)])
        #         if len(image_scores) > max_per_image:
        #             image_thresh = np.sort(image_scores)[-max_per_image]
        #             for j in range(1, imdb.num_classes):
        #                 keep = np.where(all_boxes[j][idx_im][:, -1] >= image_thresh)[0]
        #                 all_boxes[j][idx_im] = all_boxes[j][idx_im][keep, :]

        print 'Processing image: {} {:.4f}s'.format(im_name, toc())

        # Add detections on the image
        im = cv2.imread(
            im_path)  # Reload the image since the previous one was scaled

        item = 0
        price = 0
        asd = 0
        row = 0

        for cls_idx, cls_name in enumerate(CONCERNED_ERRORS):
            cls_dets = dets_nms[cls_idx]
            for det in cls_dets:
                predictedBBox = det[:4]
                cv2.rectangle(im,
                              (int(predictedBBox[0]), int(predictedBBox[1])),
                              (int(predictedBBox[2]), int(predictedBBox[3])),
                              (0, 0, 255), 1)
                w = predictedBBox[2] - predictedBBox[0]
                cv2.putText(im, cls_name,
                            (int(predictedBBox[0] +
                                 (w / 2.0) - 100), int(predictedBBox[1] - 5)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 255, 0), 1)

                crop_im = im[int(predictedBBox[1]):int(predictedBBox[3]),
                             int(predictedBBox[0]):int(predictedBBox[2])]
                gray = cv2.cvtColor(crop_im, cv2.COLOR_BGR2GRAY)

                if cls_name == "price":
                    asd = price + 1
                    price = price + 1
                    new_path = outputBaseDir + "/price/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)
                    outputImagePath = os.path.join(
                        new_path, cls_name + str(asd) + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    cv2.imwrite(outputImagePath, crop_im)

                elif cls_name == "item_name":
                    item = item + 1
                    asd = item
                    new_path = outputBaseDir + "/item/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(
                        new_path, cls_name + str(asd) + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)

                    cv2.imwrite(outputImagePath, gray)

                elif cls_name == "row":
                    row = row + 1
                    asd = row
                    new_path = outputBaseDir + "/row/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(
                        new_path, cls_name + str(asd) + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)
                    cv2.imwrite(outputImagePath, gray)

                elif cls_name == 'total_price':
                    print("Found Total")
                    new_path = outputBaseDir + "/total/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(new_path, cls_name + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)
                    cv2.imwrite(outputImagePath, gray)

                elif cls_name == 'header':
                    new_path = outputBaseDir + "/header/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(new_path, cls_name + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)
                    cv2.imwrite(outputImagePath, gray)

                outputImagePath = os.path.join(outputBaseDir,
                                               cls_name + str(asd) + ".jpg")
                # print ("Writing image: %s" % (outputImagePath))
                cv2.imwrite(outputImagePath, crop_im)
                text = pytesseract.image_to_string(Image.open(outputImagePath))
                #if text != "":
                # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
                #print("")
                #print(cls_name+": "+text)
                #print(" ")
        items = []
        for k in range(1, item):
            path_item = outputBaseDir + "/item/item_name" + str(k) + ".jpg"
            text_item = pytesseract.image_to_string(Image.open(path_item))
            #text_item = spellCheck.main(text_item, "product")
            print(str(k) + ": " + text_item)

            if text_item == "":
                print("empty and not relevant")
                #print(type(text_item))
            else:
                import unicodedata
                #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))
                #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')))
                items = items + [text_item]

        print("-------------------------------------------------------------")
        prices = []
        for k in range(1, price):
            path_item = outputBaseDir + "/price/price" + str(k) + ".jpg"
            text_item = pytesseract.image_to_string(Image.open(path_item),
                                                    config="--psm 13")
            print(str(k) + ": " + text_item)
            if text_item == "":
                print("empty and not relevant")
                #print(type(text_item))
            else:
                import unicodedata
                #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))
                #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')))
                prices = prices + [text_item]

            print(
                "-------------------------------------------------------------"
            )

        rows = []
        for k in range(1, row):
            path_item = outputBaseDir + "/row/row" + str(k) + ".jpg"
            text_item = pytesseract.image_to_string(Image.open(path_item))
            if text_item == "":
                print("empty and not relevant")
                #print(type(text_item))
            else:
                import unicodedata
                #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))
                #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')))
                rows = rows + [text_item]

            print(str(k) + ": " + text_item)

        # write total in result.txt
        path_item = outputBaseDir + "/total/total_price.jpg"
        text_item = pytesseract.image_to_string(Image.open(path_item))

        f = open("/netscratch/queling/Deformable/fpn/results.txt", "a")
        f.write(text_item + "\n")
        f.close()

        #path_item = outputBaseDir+"/header/header.jpg"
        #text_item = pytesseract.image_to_string(Image.open(path_item))
        #print("Header: "+text_item)

        found = False

        for k in range(0, len(items)):
            for l in range(0, len(rows)):
                #print(type(items[k]))
                #print(type(rows[l]))
                if items[k].encode('ascii', 'ignore') in rows[l].encode(
                        'ascii', 'ignore'):
                    for m in range(0, len(prices)):
                        #print(type(prices[m].encode('ascii' ,'ignore')))
                        if prices[k].encode('ascii',
                                            'ignore') in rows[l].encode(
                                                'ascii', 'ignore'):
                            #items[k] = spellCheck.main(items[k], "product")
                            f = open(
                                "/netscratch/queling/Deformable/fpn/results.txt",
                                "a")
                            f.write(items[k] + "\n")
                            f.write(str(prices[m]) + "\n")
                            f.close()
                            found = True

            # Product not found in row
            if (found == False):
                #items[k] = spellCheck.main(items[k], "product")
                f = open("/netscratch/queling/Deformable/fpn/results.txt", "a")
                f.write(items[k] + "\n")
                f.write(" " + "\n")
                f.close()

            found = False

        # Add gt annotations
        #for bbox in gtBBoxes:
        #    if bbox[5] in CONCERNED_ERRORS:
        #        cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1)

        # Computate the statistics for the current image
        #statistics, classificationErrorMessage = computeStatistics(dets_nms, gtBBoxes, statistics, IoU_THRESHOLDS)
        #if classificationErrorMessage is not None:
        #    print ("Writing incorrect image: %s" % (im_name))
        #    errorStatsFile.write("%s: %s\n" % (im_name, classificationErrorMessage))
        #    cv2.imwrite(os.path.join(incorrectDetectionResultsPath, im_name + '.jpg'), im)

        # Write the output in ICDAR Format
        outputFile.write(convertToXML(im_name_with_ext, dets_nms))

        if WRITE_DETECTION_RESULTS:
            # visualize
            # im = cv2.imread(im_path)
            # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

            # # Get also the plot for saving on server
            # _, plt = show_boxes(im, dets_nms, CLASSES, 1, returnPlt=True)
            # plt.savefig(os.path.join(outputBaseDir, 'Detections', im_name[:im_name.rfind('.')] + ".png"))

            outputImagePath = os.path.join(detectionResultsPath,
                                           im_name + ".jpg")
            print("Writing image: %s" % (outputImagePath))
            cv2.imwrite(outputImagePath, im)

        if WRITE_ANNOTATION_RESULTS:
            exportToPascalVOCFormat(im_name, im_path, dets_nms,
                                    annotationResultsPath)

    outputFile.close()
    errorStatsFile.close()

    total_classes = 0
    total_F_Meausere = 0
    average_F_Meausere = 0
    # Compute final precision and recall
    outputFile = open(
        os.path.join(outputBaseDir,
                     'output-stats-' + EXPERIMENT_NAME + '.txt'), 'w')
コード例 #16
0
ファイル: demo.py プロジェクト: mrlooi/FCIS
def main():
    import argparse

    """Parse input arguments."""
    parser = argparse.ArgumentParser(description='FCIS demo')
    parser.add_argument('--cfg', dest='cfg_file', help='required config file (YAML file)', 
                        required=True, type=str)
    parser.add_argument('--model', dest='model', help='path to trained model (.params file)',
                        required=True, type=str)
    parser.add_argument('--img_dir', dest='img_dir', help='path to directory of images for demo',
                        required=True, type=str)
    parser.add_argument('--min_score', dest='min_score', help='Minimum score. Default 0.85',
                            default=0.85, type=float)
    parser.add_argument('--save', dest='save', help='Saves inference data per image as JSON files (stored in img_dir directory)',
                         action='store_true')
    parser.add_argument('--novis', dest='novis', help='Turn off visualization of inference',
                         action='store_true')
    parser.add_argument('--wait', dest='wait', help='Set the wait time in between frames in opencv waitKey() (default 0 i.e. pause until button pressed)',
                         default=0, type=int)

    args = parser.parse_args()

    # load image demo directory
    img_dir = args.img_dir
    assert osp.exists(img_dir), ("Could not find image directory %s"%(img_dir))

    image_names = nts(glob.glob(osp.join(img_dir,"*.jpg")))

    if len(image_names) == 0:
        print("No files in %s"%(img_dir))
        return

    cfg_path = args.cfg_file
    model_path = args.model

    # load net
    fcis_net = FCISNet(cfg_path, model_path)
    CLASSES = fcis_net.classes

    # test: run predictions
    CONF_THRESH = args.min_score
    print("Using min score of %.3f...\n"%(CONF_THRESH))

    for idx, im_name in enumerate(image_names):
        im = cv2.imread(im_name, cv2.IMREAD_COLOR)# | cv2.IMREAD_IGNORE_ORIENTATION)
        if im is None:
            print("Could not read %s"%(im_name))
            continue
        # im_copy = im.copy()
        
        tic()
        dets, masks = fcis_net.forward(im, conf_thresh=CONF_THRESH)
        print('inference time %s: %.4fs'%(im_name, toc()))

        if args.save:
            im_name_basename = im_name[:im_name.rfind('.')]
            json_file = osp.join(img_dir, im_name_basename + ".json")
            reformatted_data = reformat_data(dets, masks, CLASSES)
            write_reformat_data_json(reformatted_data, json_file)

        # vis
        im_out_file = "/home/vincent/hd/deep_learning/tmp/FCIS/%s"%(im_name.split("/")[-1])
        if not args.novis:
            plt_show = args.wait == 0 
            im_seg = show_masks(im, dets, masks, CLASSES, config.BINARY_THRESH, show=plt_show)
            im_seg = cv2.resize(im_seg, (960,540))
            cv2.imwrite(im_out_file,im_seg)
            print("Saved to %s"%(im_out_file))
            if not plt_show:
                cv2.imshow("seg", im_seg)
                cv2.waitKey(args.wait) 

    print('\nDONE\n')
コード例 #17
0
def main():
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 2
    classes = [
        '__background__',  # always index 0
        '1'
    ]

    # load demo data
    image_names = []
    names_dirs = os.listdir(cur_path + '/../' + test_dir)
    for im_name in names_dirs:
        if im_name[-4:] == '.jpg' or im_name[-4:] == '.png':
            image_names.append(im_name)

    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../' + test_dir + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../' + test_dir + im_name,
                        cv2.IMREAD_COLOR | long(128))
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        #print "before scale: "
        #print im.shape
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        #print "after scale: "
        #print im.shape
        #im_scale = 1.0
        #print "scale ratio: "
        #print im_scale
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        #print im_tensor.shape
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../' + model_dir,
                                        0,
                                        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)
    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, [1.0], config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]
        #print im_shapes

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes[0], scores[0], num_classes, 100, im_shapes[0][1],
                im_shapes[0][0], config.TEST.NMS,
                config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > 0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread(cur_path + '/../' + test_dir + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_masks(im, dets, masks, classes, config, 1.0 / scales[0], False)

        # Save img
        cv2.imwrite(cur_path + '/../' + result_dir + im_name,
                    cv2.cvtColor(im, cv2.COLOR_BGR2RGB))

    print 'done'
コード例 #18
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/../model/rfcn_dff_flownet_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_batch_test_symbol(config)
    sym.save('dff_rfcn.json')
    #print config.network.get_internals()
    #mx.visualization.plot_network(sym).view()
    #print sym.get_intervals()
    #x = input()
    # set up class names
    num_classes = 31
    classes = [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]

    # load demo data
    image_names = glob.glob(cur_path + '/../demo/sample/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn_dff_batch/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = 10

    #

    data = []
    key_im_tensor = None
    cur_im_tensor = []
    im_info_tensor = []
    image_names_list = []
    image_names_batch = []
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(
            im_name)  #, cv2.IMREAD_COLOR)# | cv2.IMREAD_IGNORE_ORIENTATION)
        #im = cv2.resize(im, (176,176,3))
        #height, width, channel = img.shape
        #gray = im = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        #im = np.zeros(height * width * channel).reshape((height, width, channel))
        #im[:,:,0] = gray
        #im[:,:,1] = gray
        #im[:,:,2] = gray
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        #print im.shape
        #print im_scale.shape
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        else:
            cur_im_tensor.append(im_tensor)
        im_info_tensor.append(im_info)
        image_names_batch.append(im_name)
        if (idx + 1) % key_frame_interval == 0 or idx == len(image_names) - 1:
            data.append({
                'data_other': np.concatenate(cur_im_tensor),
                'im_info': np.concatenate(im_info_tensor),
                'data_key': key_im_tensor
            })
            key_im_tensor = None
            cur_im_tensor = []
            im_info_tensor = []
            image_names_list.append(image_names_batch)
            image_names_batch = []

    # get predictor
    data_names = ['data_other', 'im_info', 'data_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data_other', (key_frame_interval - 1, 3,
                        max([v[0] for v in config.SCALES]),
                        max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    #print predictor
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(1):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[:, 2]
            for i in xrange(len(data_batch.data))
        ]
        print scales[0].shape
        scores_all, boxes_all, data_dict = im_batch_detect(
            predictor, data_batch, data_names, scales, config)

    print "warmup done"
    # test
    time = 0
    count = 0
    for idx, im_names in enumerate(image_names_list):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[:, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores_all, boxes_all, data_dict = im_batch_detect(
            predictor, data_batch, data_names, scales, config)
        time += toc()
        count += len(scores_all)
        print 'testing {} {:.4f}s x {:d}'.format(im_names[0], time / count,
                                                 len(scores_all))
        '''
        for batch_idx in xrange(len(scores_all)):
            boxes = boxes_all[batch_idx].astype('f')
            scores = scores_all[batch_idx].astype('f')
            dets_nms = []
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                dets_nms.append(cls_dets)
            # visualize
            im = cv2.imread(im_names[batch_idx])
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            # show_boxes(im, dets_nms, classes, 1)
            out_im = draw_boxes(im, dets_nms, classes, 1)
            _, filename = os.path.split(im_names[batch_idx])
            cv2.imwrite(output_dir + filename,out_im)
	'''

    print 'done'
コード例 #19
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]

    # load demo data
    image_names = [
        'COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg'
    ]
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(
        cur_path + '/../model/' +
        ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'),
        0,
        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        im = cv2.imread(cur_path + '/../demo/' + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_boxes(im, dets_nms, classes, 1)

    print 'done'
コード例 #20
0
def process_video_frame(raw_frame_queue, bbox_frame_queue):
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)
    arg_params, aux_params = load_param(
        './output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road',
        19,
        process=True)

    # set up class names; Don't count the background in, even we are treat the background as label '0'
    num_classes = 4
    classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights']

    target_size = config.SCALES[0][1]
    max_size = config.SCALES[0][1]

    while True:
        tic()
        i = 0
        data = []
        frame_list = []
        while len(data) < 15:
            frame = raw_frame_queue.get()
            if frame is None:
                continue
            if i < 2:
                i += 1
                frame, im_scale = resize(frame,
                                         target_size,
                                         max_size,
                                         stride=config.network.IMAGE_STRIDE)
                bbox_frame_queue.put(frame)
                continue
            frame, im_scale = resize(frame,
                                     target_size,
                                     max_size,
                                     stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(frame, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})
            frame_list.append(frame)

        # get predictor
        data_names = ['data', 'im_info']
        label_names = []
        data = [[mx.nd.array(data[i][name]) for name in data_names]
                for i in xrange(len(data))]
        # print('Debug: [data] shape: {}, cont: {}'.format(type(data), data))
        max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                     max([v[1] for v in config.SCALES])))]]
        # print('Debug: [max_data_shape] shape: {}, cont: {}'.format(type(max_data_shape), max_data_shape))
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                        for i in xrange(len(data))]
        # print('Debug: [provide_data] shape: {}, cont: {}'.format(type(provide_data), provide_data))
        provide_label = [None for i in xrange(len(data))]
        # print('Debug: [provide_label] shape: {}, cont: {}'.format(type(provide_label), provide_label))
        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
        nms = gpu_nms_wrapper(config.TEST.NMS, 0)

        # Process video frame
        # image_names = ['frame']
        # for idx, frame in enumerate(frame_list):
        data_batch = mx.io.DataBatch(data=data,
                                     label=[],
                                     pad=0,
                                     provide_data=provide_data,
                                     provide_label=provide_label)
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        # print("length: {}".format(len(data_batch.data)))
        # print('Debug: [scales] cont: {}'.format(scales))
        scores_all, boxes_all, data_dict_all = im_detect(
            predictor, data_batch, data_names, scales, config)
        # print('scores_all: Type: {}, Values: {}, Length: {}'.format(type(scores_all), scores_all, len(scores_all)))
        # print('boxes_all: Type: {}, Values: {}, Length: {}'.format(type(boxes_all), boxes_all, len(boxes_all)))
        # print('data_dict_all: Type: {}, Values: {}, length: {}'.format(type(data_dict_all), data_dict_all, len(data_dict_all)))
        # print('frame_list: Type: {}, Values: {}, Length: {}'.format(type(frame_list), frame_list, len(frame_list)))

        # print('scores_all: Type: {}, Length: {}, Values: {}'.format(type(scores_all[0]), len(scores_all[0]), scores_all[0]))
        # print(scores_all[0].shape)
        # print('boxes_all: Type: {}, Length: {}'.format(type(boxes_all), len(boxes_all)))
        # print(boxes_all[0].shape)
        # print('data_dict_all: Type: {}, length: {}'.format(type(data_dict_all), len(data_dict_all)))
        # print('frame_list: Type: {}, Length: {}'.format(type(frame_list), len(frame_list)))

        for idx, frame in enumerate(frame_list):
            # print('index: {}'.format(str(idx)))
            boxes = boxes_all[0].astype('f')
            scores = scores_all[0].astype('f')
            dets_nms = []
            # print(scores.shape)
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:,
                                  4:8] if config.CLASS_AGNOSTIC else boxes[:,
                                                                           j *
                                                                           4:
                                                                           (j +
                                                                            1
                                                                            ) *
                                                                           4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                dets_nms.append(cls_dets)

            bbox_frame_queue.put(
                draw_bbox_on_frame(frame, dets_nms, classes,
                                   scale=scales[idx]))
        print(toc())
コード例 #21
0
def predict_on_image_names(
        image_names,
        config,
        model_path_id="/home/data/output/resnet_v1_101_coco_fcis_end2end_ohem-nebraska/train-nebraska/e2e",
        epoch=8):
    import argparse
    import os
    import sys
    import logging
    import pprint
    import cv2
    from utils.image import resize, transform
    import numpy as np
    # get config
    os.environ['PYTHONUNBUFFERED'] = '1'
    os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
    os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
    cur_path = os.path.abspath(".")
    sys.path.insert(
        0, os.path.join(cur_path, '../external/mxnet', config.MXNET_VERSION))
    import mxnet as mx
    print("use mxnet at", mx.__file__)
    from core.tester import im_detect, Predictor
    from symbols import *
    from utils.load_model import load_param
    from utils.show_masks import show_masks
    from utils.tictoc import tic, toc
    from nms.nms import py_nms_wrapper
    from mask.mask_transform import gpu_mask_voting, cpu_mask_voting
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 2
    classes = ['cp']

    # load demo data
    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    # loading the last epoch that was trained, 8
    arg_params, aux_params = load_param(model_path_id, epoch, process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    all_classes = []
    all_configs = []
    all_masks = []
    all_dets = []
    all_ims = []

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, scales, config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes, scores[0], num_classes, 100, im_width, im_height,
                config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
        print('testing {} {:.4f}s'.format(im_name, toc()))
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > 0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]

        all_classes.append(classes)
        all_configs.append(config)
        all_masks.append(masks)
        all_dets.append(dets)
        im = cv2.imread(im_name)
        all_ims.append(im)
    return all_ims, all_dets, all_masks, all_configs, all_classes
コード例 #22
0
ファイル: demo.py プロジェクト: cory8249/Deep-Feature-Flow
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/../model/rfcn_dff_flownet_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym = sym_instance.get_key_test_symbol(config)
    cur_sym = sym_instance.get_cur_test_symbol(config)

    # set up class names
    num_classes = 31
    classes = [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]

    # load demo data
    image_names = glob.glob(cur_path +
                            '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    image_names.sort()
    output_dir = cur_path + '/../demo/rfcn_dff/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = 10

    #

    data = []
    key_im_tensor = None
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        data.append({
            'data':
            im_tensor,
            'im_info':
            im_info,
            'data_key':
            key_im_tensor,
            'feat_key':
            np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1))
        })

    # get predictor
    data_names = ['data', 'im_info', 'data_key', 'feat_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in config.SCALES]),
                  max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:
            scores, boxes, data_dict, feat = im_detect(key_predictor,
                                                       data_batch, data_names,
                                                       scales, config)
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch,
                                                    data_names, scales, config)

    print "warmup done"
    # test
    time = 0
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        mark = ''
        if idx % key_frame_interval == 0:
            scores, boxes, data_dict, feat = im_detect(key_predictor,
                                                       data_batch, data_names,
                                                       scales, config)
            mark = '+'
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch,
                                                    data_names, scales, config)
        time_elapsed = toc()
        time += time_elapsed
        count += 1
        print 'testing {} {:.4f}s {:.4f}s {}'.format(im_name, time / count,
                                                     time_elapsed, mark)

        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        # visualize
        im = cv2.imread(im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename, out_im)
        if enable_cv2_imshow:
            cv2.imshow('out_im', out_im)
            cv2.waitKey(1)

    print 'done'
コード例 #23
0
def process_one_batch_images_fun(isUrlFlag=False,
                                 one_batch_images_list=None,
                                 init_model_param=None,
                                 fileOp=None,
                                 vis=None):
    num_classes = RFCN_DCN_CONFIG['num_classes']  # 0 is background,
    classes = RFCN_DCN_CONFIG['num_classes_name_list']
    image_names = one_batch_images_list
    if len(image_names) <= 0:
        return
    all_can_read_image = []
    data = []
    for im_name in image_names:
        #print("process : %s"%(im_name))
        im = readImage_fun(isUrlFlag=isUrlFlag, imagePath=im_name)
        # 判断 这个图片是否可读
        if np.shape(im) == ():
            print("ReadImageError : %s" % (im_name))
            continue
        if im.shape[2] != 3:
            print("%s channel is not 3" % (im_name))
            continue
        all_can_read_image.append(im_name)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    predictor = Predictor(init_model_param[0],
                          data_names,
                          label_names,
                          context=[mx.gpu(int(args.gpuId))],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=init_model_param[1],
                          aux_params=init_model_param[2])
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    for idx, im_name in enumerate(all_can_read_image):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > min_threshold, :]
            dets_nms.append(cls_dets)
        print('testing {} {:.4f}s'.format(im_name, toc()))
        show_boxes(isUrlFlag=isUrlFlag,
                   im_name=im_name,
                   dets=dets_nms,
                   classes=classes,
                   scale=1,
                   vis=vis,
                   fileOp=fileOp,
                   flag=args.outputFileFlag)
    print('process one batch images done')
    pass
コード例 #24
0
def main():
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]

    # load demo data
    image_names = [
        'COCO_test2015_000000000275.jpg', 'COCO_test2015_000000001412.jpg',
        'COCO_test2015_000000073428.jpg', 'COCO_test2015_000000393281.jpg'
    ]
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/fcis_coco',
                                        convert=True,
                                        0,
                                        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.cpu()],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, scales, config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = cpu_mask_voting(
                masks, boxes, scores[0], num_classes, 100, im_width, im_height,
                config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                config.BINARY_THRESH)

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]

        print 'testing {} {:.4f}s'.format(im_name, toc())

        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > 0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]

        for i in range(len(dets)):
            if len(dets[i]) > 0:
                for j in range(len(dets[i])):
                    print('{name}: {score} ({loc})'.format(
                        name=classes[i],
                        score=dets[i][j][-1],
                        loc=dets[i][j][:-1].tolist()))

    print 'done'
コード例 #25
0
def Pcs_each_vdo(vid_list, sv_dir, GPU):
    # process each_video
    for order, ec_vid in enumerate(vid_list):
        ec_vid = str(ec_vid)
        tic()
        fm_lis = glob.glob(ec_vid + '/*')
        fm_num = len(fm_lis)  # OK count right
        vd_name = ec_vid.split('/')[-1]
        sv_ph = sv_dir + '/' + vd_name
        # judge whether the json exist
        json_fl = sv_ph + '.json'  # if exist and the key length is frame then PASS
        if os.path.exists(json_fl):
            with open(json_fl, 'r') as load_f:
                load_file = json.load(load_f)
                det_frm_nm = len(list(load_file.keys()))
                if det_frm_nm == fm_num:
                    print 'the file name  ', order + 1, ' ', vd_name, '  Exist OK  Passed'
                    continue
        print 'The next vid name:  ', vd_name, '  Frm num: ', fm_num
        jpg_bs_name = fm_lis[0][:-10]
        frm_list = [
            jpg_bs_name + '%06d.jpg' % No for No in range(1, fm_num + 1)
        ]
        ft_im = cv2.imread(frm_list[0], 1 | 128)
        ft_im, im_scale = resize(ft_im,
                                 target_size,
                                 max_size,
                                 stride=config.network.IMAGE_STRIDE)
        im_tenssp = transform(ft_im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tenssp.shape[2], im_tenssp.shape[3], im_scale]],
            dtype=np.float32)
        data = []
        # for fm_dir in frm_list:
        # to change back to change back to change back to change back to change back to change back to change back

        # ====================  get each video frames and generate data ====================
        for fm_dir in frm_list:
            fm = cv2.imread(fm_dir, 1 | 128)
            #target_size  #max_size
            # im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
            fm, _ = resize(fm,
                           target_size,
                           max_size,
                           stride=config.network.IMAGE_STRIDE)
            fm_sptensor = transform(fm, config.network.PIXEL_MEANS)
            # im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
            data.append({'data': fm_sptensor, 'im_info': im_info})

        for x in locals().keys():
            del locals()[x]
        Memery = gc.collect()
        del fm_sptensor, fm
        Memery = gc.collect()

        # process
        data = [[mx.nd.array(data[i][name]) for name in data_names]
                for i in xrange(len(data))]
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                        for i in xrange(len(data))]
        provide_label = [None for i in xrange(len(data))]

        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(GPU)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
        # nms  discleared  PASSED
        # test

        # ====================  process each to get bbox  ====================
        each_video_det = {}
        for idx, im_name in enumerate(frm_list):
            data_batch = mx.io.DataBatch(
                data=[data[idx]],
                label=[],
                pad=0,
                index=idx,
                provide_data=[[(k, v.shape)
                               for k, v in zip(data_names, data[idx])]],
                provide_label=[None])
            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]
            scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                 data_names, scales, config)
            boxes = boxes[0].astype('f')
            scores = scores[0].astype('f')
            dets_nms = []
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:,
                                  4:8] if config.CLASS_AGNOSTIC else boxes[:,
                                                                           j *
                                                                           4:
                                                                           (j +
                                                                            1
                                                                            ) *
                                                                           4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                if len(cls_dets) > 0:
                    dets_nms.append(
                        np.insert(cls_dets, 5, values=j, axis=1).tolist())

            each_video_det[im_name.split('/')[-1][-10:-4]] = dets_nms
            # print 'testing {} {:.4f}s'.format(im_name, toc())
            # # visualize
            # im = cv2.imread(im_name)
            # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            # show_boxes(im, dets_nms, classes, 1)
        save_each_video_det(video_det=each_video_det,
                            save_dir=sv_dir,
                            video_name=vd_name)
        print order + 1, '/', len(vid_list), 'time {:.4f}s'.format(
            toc()), vd_name
コード例 #26
0
def inference_rcnn_AICity(cfg, dataset, image_set, root_path, dataset_path,
              ctx, prefix, epoch,
              vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None):
    if not logger:
        assert False, 'require a logger'

    # print cfg
    pprint.pprint(cfg)
    logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg)))

    # load symbol and testing data
    if has_rpn:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol(cfg, is_train=False)
        imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
        roidb = imdb.gt_roidb_Shuo()
	#roidb = imdb.gt_roidb()
    else:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol_rfcn(cfg, is_train=False)
        imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path)
        gt_roidb = imdb.gt_roidb_Shuo()
        roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb)

    print 'len(roidb):',len(roidb)
    # get test data iter
    test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn)

    # load model
    arg_params, aux_params = load_param(prefix, epoch, process=True)
    print 'inferring: ',prefix,' epoch: ',epoch
    
    """# write parameters to file
    print 'type(arg_params):',type(arg_params)
    print 'type(aux_params):',type(aux_params)
    thefile1 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/arg_params.txt','w')
    thefile2 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/aux_params.txt','w')
    for item_arg in arg_params.items():
	thefile1.write(item_arg[0] + str(type(item_arg[1])) + str(item_arg[1].shape)+'\n')
    for item_aux in aux_params.items():
	thefile2.write(item_aux[0] + str(type(item_aux[1])) + str(item_aux[1].shape)+'\n')
    """
 
    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)

    sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = None
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]]
    if not has_rpn:
        max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))

    # create predictor
    predictor = Predictor(sym, data_names, label_names,
                          context=ctx, max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data, provide_label=test_data.provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    nms = gpu_nms_wrapper(cfg.TEST.NMS, 0)
    # start detection
    # pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
    print 'test_data.size',test_data.size
    print 'test_data:',test_data
    print 'data_names:',data_names
    print 'test_data.provide_data:',test_data.provide_data
    print 'test_data.provide_label:',test_data.provide_label
    nnn = 0
    #classes = ['__background','vehicle']
    classes = ['Car','SUV','SmallTruck','MediumTruck','LargeTruck','Pedestrian','Bus','Van','GroupOfPeople','Bicycle', 'Motorcycle']
    #,'Pedestrian', 'GroupOfPeople','Bicycle', 'Motorcycle','TrafficSignal-Green', 'TrafficSignal-Yellow', 'TrafficSignal-Red'
    for im_info, data_batch in test_data:
        print nnn
        #print 'roidb[nnn]:',roidb[nnn]['image']
        image_name = roidb[nnn]['image']
        tic()
        scales = [iim_info[0, 2] for iim_info in im_info]
        scores_all, boxes_all, data_dict_all = im_detect(predictor, data_batch, data_names, scales, cfg)
        boxes = boxes_all[0].astype('f')
        scores = scores_all[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            threshold = 0.2 # confidence thrshold between 0 and 1
            cls_dets = cls_dets[cls_dets[:, -1] > threshold, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(image_name, toc())
        # visualize
        im = cv2.imread(image_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        
        #print 'cls_dets:',cls_dets
        show_boxes(im, dets_nms, classes, 1)
        nnn = nnn + 1
        image_name_length = len(image_name.split('/'))
        magefile_name = image_name.split('/')[image_name_length-1]
        image_name_lean = image_name.split('.')[0]
        
        if not os.path.exists(os.path.join('data', 'output')):
            os.makedirs(os.path.join('data', 'output'))
        
	import datetime,time
        output_file = os.path.join('data', 'output', str(datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) + '.txt')
        
        thefile = open(output_file,'a')
        
        #det_id = 0
        #for x_small,y_small,x_large,y_large,prob in dets_nms[0]:
        #det_id += 1

        for cls_idx, cls_name in enumerate(classes):
            cls_dets = dets_nms[cls_idx]
            for x_small,y_small,x_large,y_large,prob in cls_dets:
                thefile.write(cls_name+' '+str(x_small)+' '+str(y_small)+' '+str(max(x_small+0.01,x_large))+' '+str(max(y_small+0.01,y_large))+' '+str(prob)+'\n')
        thefile.write("----next frame----")
コード例 #27
0
ファイル: demo.py プロジェクト: jacke121/Deep-Feature-Flow
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    model = '/../model/rfcn_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_test_symbol(config)

    # set up class names
    num_classes = 31
    classes = [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]

    # load demo data
    image_names = glob.glob(cur_path +
                            '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    #

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in range(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in range(len(data))]
    provide_label = [None for i in range(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in range(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in range(len(data_batch.data))
        ]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)

    # test
    time = 0
    import datetime
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in range(len(data_batch.data))
        ]

        tic()
        time_old = datetime.datetime.now()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        time += toc()
        count += 1
        print('testing {} {:.4f}s'.format(im_name, time / count))
        print('requests', (datetime.datetime.now() - time_old).microseconds)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)

        # visualize
        im = cv2.imread(im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        cv2.imshow("asdf", out_im)
        cv2.waitKey(0)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename, out_im)

    print('done')
コード例 #28
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
               'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
               'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
               'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
               'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

    # load demo data
    image_names = ['COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg']
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        im = cv2.imread(cur_path + '/../demo/' + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_boxes(im, dets_nms, classes, 1)

    print 'done'
コード例 #29
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_deeplab_dcn' if not args.deeplab_only else 'resnet_v1_101_deeplab'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 19

    # load demo data
    image_names = ['frankfurt_000001_073088_leftImg8bit.png', 'lindau_000024_000019_leftImg8bit.png']
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data']
    label_names = ['softmax_label']
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/' + ('deeplab_dcn_cityscapes' if not args.deeplab_only else 'deeplab_cityscapes'), 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        output_all = predictor.predict(data_batch)
        output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all]

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])

        tic()
        output_all = predictor.predict(data_batch)
        output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all]
        pallete = getpallete(256)

        segmentation_result = np.uint8(np.squeeze(output_all))
        segmentation_result = Image.fromarray(segmentation_result)
        segmentation_result.putpalette(pallete)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        pure_im_name, ext_im_name = os.path.splitext(im_name)
        segmentation_result.save(cur_path + '/../demo/seg_' + pure_im_name + '.png')
        # visualize
        im_raw = cv2.imread(cur_path + '/../demo/' + im_name)
        seg_res = cv2.imread(cur_path + '/../demo/seg_' + pure_im_name + '.png')
        cv2.imshow('Raw Image', im_raw)
        cv2.imshow('segmentation_result', seg_res)
        cv2.waitKey(0)
    print 'done'