Ejemplo n.º 1
0
def main():
    with open(settings.DATA_LIST) as f:
        data_list = json.load(f)
    m = dict()
    for root in settings.IMAGE_SOURCES_ROOTS:
        for p, r, fn in common_tools.each_file_tuple(root):
            m[fn] = p

    def cp(file_name):
        src = m[file_name]
        dst = os.path.join(copy_dest, file_name)
        os.symlink(os.path.relpath(src, os.path.dirname(dst)), dst)

    copy_dest = settings.TRAINVAL_IMAGE_DIR
    if os.path.isdir(copy_dest):
        shutil.rmtree(copy_dest)
    os.makedirs(copy_dest)
    file_names = list(
        {meta['file_name']
         for meta in data_list['train'] + data_list['val']})
    common_tools.multithreaded(cp, file_names, num_thread=8)

    copy_dest = settings.TEST_IMAGE_DIR
    if os.path.isdir(copy_dest):
        shutil.rmtree(copy_dest)
    os.makedirs(copy_dest)
    file_names = list({
        meta['file_name']
        for meta in data_list['test_cls'] + data_list['test_det']
    })
    common_tools.multithreaded(cp, file_names, num_thread=8)
Ejemplo n.º 2
0
    def get_feed_dict_sync(self):
        a = [
            random.randrange(self.num_classes)
            for i in range(self.FLAGS.batch_size)
        ]
        labels_feed = []
        for c in a:
            v = [0.] * self.num_classes
            v[c] = 1.
            labels_feed.append(v)
        images_feed = [
            self.labels[a[i]][random.randrange(0, len(self.labels[a[i]]))]
            for i in range(self.FLAGS.batch_size)
        ]

        def job(i):
            img = images_feed[i]
            images_feed[i] = self.cv_preprocess_image(
                img, self.FLAGS.train_image_size, self.FLAGS.train_image_size,
                True)

        common_tools.multithreaded(job, range(len(images_feed)), num_thread=8)
        return {
            self.image_holder: np.array(images_feed, dtype=np.float),
            self.label_holder: np.array(labels_feed, dtype=np.float)
        }
Ejemplo n.º 3
0
def main():
    assert six.PY3

    # with open(settings.TRAIN) as f:
    #     lines = f.read().splitlines()
    # with open(settings.VAL) as f:
    #     lines += f.read().splitlines()
    # train = [[] for _ in lines]
    with open('../data/icpr_data_char_1/train_15962.tags') as f:
        lines = f.read().splitlines()

    train = [[] for _ in lines]


    def load_train(i):
        if i % 100 == 0:
            print('trainval', i, '/', len(lines))
        # anno = json.loads(lines[i].strip())
        anno = lines[i].strip().split()
        image = misc.imread(anno[0])
        # assert image.shape == (anno['height'], anno['width'], 3)
        assert len(anno[1]) == 1
        # for char in anno[1]:
        train[i].append([image, anno[1]])

    common_tools.multithreaded(load_train, range(len(lines)), num_thread=8)
    train = common_tools.reduce_sum(train)
    with open('self_synth.pkl', 'wb') as f:
        cPickle.dump(train, f)
    train = None  # release memory
Ejemplo n.º 4
0
def main():
    assert six.PY3

    with open(settings.TRAIN) as f:
        lines = f.read().splitlines()
    with open(settings.VAL) as f:
        lines += f.read().splitlines()
    train = [[] for _ in lines]

    def load_train(i):
        if i % 100 == 0:
            print('trainval', i, '/', len(lines))
        anno = json.loads(lines[i].strip())
        image = misc.imread(
            os.path.join(settings.TRAINVAL_IMAGE_DIR, anno['file_name']))
        assert image.shape == (anno['height'], anno['width'], 3)
        for char in anno_tools.each_char(anno):
            if not char['is_chinese']:
                continue
            cropped = crop(image, char['adjusted_bbox'])
            train[i].append([cropped, char['text']])

    common_tools.multithreaded(load_train, range(len(lines)), num_thread=8)
    train = common_tools.reduce_sum(train)
    max_bytes = 2**31 - 1
    train_out = pickle.dumps(train, protocol=pickle.HIGHEST_PROTOCOL)
    with open(settings.TRAINVAL_PICKLE, 'wb') as f:
        for idx in range(0, len(train_out), max_bytes):
            f.write(train_out[idx:idx + max_bytes])
    train = None  # release memory
    train_out = None

    with open(settings.TEST_CLASSIFICATION) as f:
        lines = f.read().splitlines()
    test = [[] for _ in lines]

    def load_test(i):
        if i % 100 == 0:
            print('test', i, '/', len(lines))
        anno = json.loads(lines[i].strip())
        image = misc.imread(
            os.path.join(settings.TEST_IMAGE_DIR, anno['file_name']))
        #assert image.shape == (anno['height'], anno['width'], 3)
        for char in anno['proposals']:
            cropped = crop(image, char['adjusted_bbox'])
            test[i].append([cropped, None])

    common_tools.multithreaded(load_test, range(len(lines)), num_thread=8)
    test = common_tools.reduce_sum(test)
    max_bytes = 2**31 - 1
    test_out = pickle.dumps(test, protocol=pickle.HIGHEST_PROTOCOL)
    with open(settings.TEST_PICKLE, 'wb') as f:
        for idx in range(0, len(test_out), max_bytes):
            f.write(test_out[idx:idx + max_bytes])
    test = None  # release memory
    test_out = None
Ejemplo n.º 5
0
def crop_test_images(list_file_name):

    with open(settings.CATES) as f:
        cates = json.load(f)
    text2cate = {c['text']: c['cate_id'] for c in cates}

    if not os.path.isdir(settings.TEST_CROPPED_DIR):
        os.makedirs(settings.TEST_CROPPED_DIR)

    imgPath_list = glob.glob(os.path.join(settings.TEST_IMAGE_DIR, '*.jpg'))
    imgPath_list += glob.glob(os.path.join(settings.TEST_IMAGE_DIR, '*.png'))
    testset_size = len(imgPath_list)

    def crop_once(imgPath, testName_list):
        image = cv2.imread(imgPath)
        imshape = image.shape
        fileName = os.path.basename(imgPath)
        image_id = os.path.splitext(fileName)[0]
        image_type = os.path.splitext(fileName)[1]
        cropped_list = []
        for level_id, (cropratio,
                       cropoverlap) in enumerate(settings.TEST_CROP_LEVELS):
            cropshape = (int(round(settings.TEST_IMAGE_SIZE // cropratio)),
                         int(round(settings.TEST_IMAGE_SIZE // cropratio)))
            for o in darknet_tools.get_crop_bboxes(imshape, cropshape,
                                                   (cropoverlap, cropoverlap)):
                xlo = o['xlo']
                xhi = xlo + cropshape[1]
                ylo = o['ylo']
                yhi = ylo + cropshape[0]
                basename = '{}_{}_{}'.format(image_id, level_id, o['name'])
                cropped_file_name = os.path.join(settings.TEST_CROPPED_DIR,
                                                 basename + image_type)
                cropped_list.append(cropped_file_name)
                cropped = image[ylo:yhi, xlo:xhi]
                cv2.imwrite(cropped_file_name, cropped)
        testName_list += cropped_list

    q_i = queue.Queue()
    q_i.put(0)

    def foo(*args):
        i = q_i.get()
        if i % 100 == 0:
            print('crop test', i, '/', testset_size)
        q_i.put(i + 1)
        crop_once(*args)

    testName_list = []
    common_tools.multithreaded(foo, [(imgPath, testName_list)
                                     for imgPath in imgPath_list],
                               num_thread=4)
    with open(list_file_name, 'w') as f:
        for file_name in testName_list:
            f.write(file_name)
            f.write('\n')
Ejemplo n.º 6
0
def crop_test_images(list_file_name):
    imshape = (2048, 2048, 3)

    with open(settings.CATES) as f:
        cates = json.load(f)
    text2cate = {c['text']: c['cate_id'] for c in cates}

    if not os.path.isdir(settings.TEST_CROPPED_DIR):
        os.makedirs(settings.TEST_CROPPED_DIR)

    with open(settings.DATA_LIST) as f:
        data_list = json.load(f)
    test_det = data_list['test_det']

    def crop_once(anno, write_images):
        image_id = anno['image_id']
        if write_images:
            image = cv2.imread(os.path.join(settings.TEST_IMAGE_DIR, anno['file_name']))
            assert image.shape == imshape
        cropped_list = []
        for level_id, (cropratio, cropoverlap) in enumerate(settings.TEST_CROP_LEVELS):
            cropshape = (int(round(settings.TEST_IMAGE_SIZE // cropratio)), int(round(settings.TEST_IMAGE_SIZE // cropratio)))
            for o in darknet_tools.get_crop_bboxes(imshape, cropshape, (cropoverlap, cropoverlap)):
                xlo = o['xlo']
                xhi = xlo + cropshape[1]
                ylo = o['ylo']
                yhi = ylo + cropshape[0]
                basename = '{}_{}_{}'.format(image_id, level_id, o['name'])
                cropped_file_name = os.path.join(settings.TEST_CROPPED_DIR, '{}.jpg'.format(basename))
                cropped_list.append(cropped_file_name)
                if write_images:
                    cropped = image[ylo:yhi, xlo:xhi]
                    cv2.imwrite(cropped_file_name, cropped)
        return cropped_list

    q_i = queue.Queue()
    q_i.put(0)

    def foo(*args):
        i = q_i.get()
        if i % 100 == 0:
            print('crop test', i, '/', len(test_det))
        q_i.put(i + 1)
        crop_once(*args)
    common_tools.multithreaded(foo, [(anno, True) for anno in test_det], num_thread=4)
    testset = []
    for i, anno in enumerate(test_det):
        if i % 1000 == 0:
            print('list test', i, '/', len(test_det))
        testset += crop_once(anno, False)
    with open(list_file_name, 'w') as f:
        for file_name in testset:
            f.write(file_name)
            f.write('\n')
Ejemplo n.º 7
0
def crop_test_images(list_file_name):

    with open(settings.CATES) as f:
        cates = json.load(f)
    text2cate = {c['text']: c['cate_id'] for c in cates}

    if not os.path.isdir(settings.TEST_CROPPED_DIR):
        os.makedirs(settings.TEST_CROPPED_DIR)

    with open(settings.DATA_LIST) as f:
        data_list = json.load(f)
    test_det = data_list['test_det']
    #delete no image in info.json///  create own info, don't need
    #    test_det2=[]
    #    path = '../data/images/test/'
    #    for anno in test_det:
    #	filename=anno['file_name']
    #	file = Path(path+filename)
    #	if file.exists():
    #	    test_det2.append(anno)
    #   test_det = test_det2
    print(test_det)

    def crop_once(anno, write_images):
        image_id = anno['image_id']
        print(image_id)
        #if write_images:
        image = cv2.imread(
            os.path.join(settings.TEST_IMAGE_DIR, anno['file_name']))
        imshape = image.shape  # height,width,channel
        #            assert image.shape == imshape
        cropped_list = []
        #TEST_CROP_LEVELS=((1,32),(0.5,96),(.25,96)
        #TEST_IMAGE_SIZE=(128,128), so cropped image is 128*128 and 256*256 and 512*512,
        for level_id, (cropratio,
                       cropoverlap) in enumerate(settings.TEST_CROP_LEVELS):
            print(cropratio, cropoverlap)
            cropshape = (int(round(settings.TEST_IMAGE_SIZE // cropratio)),
                         int(round(settings.TEST_IMAGE_SIZE // cropratio)))
            for o in darknet_tools.get_crop_bboxes(imshape, cropshape,
                                                   (cropoverlap, cropoverlap)):
                xlo = o['xlo']
                xhi = xlo + cropshape[1]
                ylo = o['ylo']
                yhi = ylo + cropshape[0]
                basename = '{}_{}_{}'.format(image_id, level_id, o['name'])
                cropped_file_name = os.path.join(settings.TEST_CROPPED_DIR,
                                                 '{}.jpg'.format(basename))
                cropped_list.append(cropped_file_name)
                if write_images:
                    if xhi > imshape[1]:
                        image = cv2.resize(image, (xhi, imshape[0]),
                                           cv2.INTER_CUBIC)
                        imshape = image.shape
                    if yhi > imshape[0]:
                        image = cv2.resize(image, (imshape[1], yhi),
                                           cv2.INTER_CUBIC)
                        imshape = image.shape
                    cropped = image[ylo:yhi, xlo:xhi]
                    cv2.imwrite(cropped_file_name, cropped)
        return cropped_list

    q_i = queue.Queue()
    q_i.put(0)
    print('crop')

    def foo(*args):
        i = q_i.get()
        #if i % 100 == 0:
        print('crop test', i, '/', len(test_det))
        q_i.put(i + 1)
        crop_once(*args)


# after crop, save to ssd/products/test

    common_tools.multithreaded(foo, [(anno, True) for anno in test_det],
                               num_thread=1)
    testset = []
    #    testlists=glob.glob("../ssd/products/test/*.jpg")
    #    for item in testlists:
    #	testset.append( os.path.join(settings.TEST_CROPPED_DIR,os.path.basename(item)))
    for i, anno in enumerate(test_det):
        print('list test', i, '/', len(test_det))
        testset += crop_once(anno, False)
    with open(list_file_name, 'w') as f:
        for file_name in testset:
            f.write(file_name)
            print(file_name)
            f.write('\n')
Ejemplo n.º 8
0
def crop_train_images():
    imshape = (2048, 2048, 3)
    cropshape = (settings.TRAIN_IMAGE_SIZE, settings.TRAIN_IMAGE_SIZE)
    cropoverlap = (16, 16)

    with open(settings.CATES) as f:
        cates = json.load(f)
    text2cate = {c['text']: c['cate_id'] for c in cates}

    def in_image_ratio(bbox):  # bbox is in darknet bbox representation
        xmid, ymid, w, h = bbox

        def cutto01(x):
            return max(0, min(1, x))

        Acut = (cutto01(xmid + w / 2) - cutto01(xmid - w / 2)) * (
            cutto01(ymid + h / 2) - cutto01(ymid - h / 2))
        return Acut / (w * h)

    def crop_once(line, write_images):
        anno = json.loads(line.strip())
        image_id = anno['image_id']
        all = []
        for char in anno_tools.each_char(anno):
            if not char['is_chinese']:
                continue
            cate_id = text2cate[char['text']]
            if cate_id >= settings.NUM_CHAR_CATES:
                cate_id = settings.NUM_CHAR_CATES
            all.append((char['adjusted_bbox'], cate_id))
        if write_images:
            image = cv2.imread(
                os.path.join(settings.TRAINVAL_IMAGE_DIR, anno['file_name']))
            assert image.shape == imshape
            for o in anno['ignore']:
                poly = (np.array(o['polygon'])).astype(np.int32)
                cv2.fillConvexPoly(image, poly, (128, 128, 128))
        cropped_list = list()
        for o in darknet_tools.get_crop_bboxes(imshape, cropshape,
                                               cropoverlap):
            xlo = o['xlo']
            xhi = xlo + cropshape[1]
            ylo = o['ylo']
            yhi = ylo + cropshape[0]
            labels = []
            for bbox, cate_id in all:
                x, y, w, h = bbox
                if x > xhi or x + w < xlo or y > yhi or y + h < ylo:
                    continue
                bbox = ((x + w / 2 - xlo) / cropshape[1],
                        (y + h / 2 - ylo) / cropshape[0], w / cropshape[1],
                        h / cropshape[0])
                if 0.5 < in_image_ratio(bbox):
                    labels.append((bbox, cate_id))
            if 0 < len(labels):
                basename = '{}_{}'.format(image_id, o['name'])
                cropped_file_name = os.path.join(settings.TRAINVAL_CROPPED_DIR,
                                                 '{}.jpg'.format(basename))
                cropped_xml_name = os.path.join(settings.TRAINVAL_CROPPED_DIR,
                                                '{}.xml'.format(basename))
                cropped_list.append('{} {}'.format(cropped_file_name,
                                                   cropped_xml_name))
                if write_images:
                    cropped = image[ylo:yhi, xlo:xhi]
                    cv2.imwrite(cropped_file_name, cropped)
                    with open(
                            os.path.join(settings.TRAINVAL_CROPPED_DIR,
                                         '{}.xml'.format(basename)), 'w') as f:
                        write_xml(labels, cropshape, f)
        return cropped_list

    if not os.path.isdir(settings.TRAINVAL_CROPPED_DIR):
        os.makedirs(settings.TRAINVAL_CROPPED_DIR)

    lines = []
    with open(settings.TRAIN) as f:
        lines += f.read().splitlines()
    with open(settings.VAL) as f:
        lines += f.read().splitlines()

    q_i = queue.Queue()
    q_i.put(0)

    def foo(*args):
        i = q_i.get()
        if i % 100 == 0:
            print('crop trainval', i, '/', len(lines))
        q_i.put(i + 1)
        crop_once(*args)

    common_tools.multithreaded(foo, [(line, True) for line in lines],
                               num_thread=4)
    trainset = []
    for i, line in enumerate(lines):
        if i % 1000 == 0:
            print('list trainval', i, '/', len(lines))
        trainset += crop_once(line, False)
    random.shuffle(trainset)
    with open(settings.TRAINVAL_LIST, 'w') as f:
        for file_name in trainset:
            f.write(file_name)
            f.write('\n')

    with open(settings.VAL) as f:
        lines = f.read().splitlines()
    valset = []
    for i, line in enumerate(lines):
        if i % 1000 == 0:
            print('list val', i, '/', len(lines))
        valset += crop_once(line, False)
    random.shuffle(valset)
    with open(settings.VAL_LIST, 'w') as f:
        for file_name in valset:
            f.write(file_name)
            f.write('\n')
    with open(settings.VAL_NAME_SIZE, 'w') as f:
        for line in valset:
            cropped_file_name, cropped_xml_name = line.split()
            f.write('{} {} {}\n'.format(cropped_file_name,
                                        settings.TRAIN_IMAGE_SIZE,
                                        settings.TRAIN_IMAGE_SIZE))
Ejemplo n.º 9
0
def work(selected, ext):
    with open(settings.DATA_LIST) as f:
        data_list = json.load(f)
    with open(settings.TEST_DETECTION_GT) as f:
        gts = f.read().splitlines()
    with open('../detection/products/detections.jsonl') as f:
        dts = f.read().splitlines()
    assert len(gts) == len(dts)

    def gt2array(gt, draw_ignore):
        color = '#f00'
        color_ignore = '#ff0'
        a = list()
        for char in anno_tools.each_char(gt):
            if char['is_chinese']:
                a.append({
                    'bbox': char['adjusted_bbox'],
                    'text': char['text'],
                    'color': color
                })
        if draw_ignore:
            for char in gt['ignore']:
                a.append({
                    'bbox': char['bbox'],
                    'text': '',
                    'color': color_ignore
                })
        return a

    def dt2array(dtobj, gtobj, draw_ignore, draw_proposal):
        iou_thresh = settings.IOU_THRESH
        charset = set()
        proposal = False

        def in_size(_):
            return True

        dt = dtobj['detections']
        dt.sort(
            key=lambda o: -o['score']
        )  # sort must be stable, otherwise mAP will be slightly different
        dt = [(o['bbox'], o.get('text'), o['score']) for o in dt]

        ig = [(o['bbox'], None) for o in gtobj['ignore']]
        gt = []
        for char in anno_tools.each_char(gtobj):
            if char['is_chinese']:
                charset.add(char['text'])
                gt.append(
                    (char['adjusted_bbox'], char['text'], char['attributes']))

        dt_matches = [[] for i in range(len(dt))]
        dt_ig = [False] * len(dt)
        for i_dt, dtchar in enumerate(dt):
            for i_gt, gtchar in enumerate(gt):
                if proposal or dtchar[1] == gtchar[1]:
                    miou = eval_tools.iou(dtchar[0], gtchar[0])
                    if miou > iou_thresh:
                        dt_matches[i_dt].append((i_gt, miou))
            for igchar in ig:
                miou = eval_tools.a_in_b(dtchar[0], igchar[0])
                if miou > iou_thresh:
                    dt_ig[i_dt] = True
        for matches in dt_matches:
            matches.sort(
                key=lambda t: -t[1]
            )  # sort must be stable, otherwise you shoule use key=lambda t: (-t[1], t[0])

        dt_matched = [
            0 if in_size(o[0]) and False == b else 2
            for o, b in zip(dt, dt_ig)
        ]
        gt_taken = [(0, None) if in_size(o[0]) else (2, None) for o in gt]
        for i_dt, matches in enumerate(dt_matches):
            for i_gt, _ in matches:
                if 1 != dt_matched[i_dt] and 1 != gt_taken[i_gt][0]:
                    if 0 == gt_taken[i_gt][0]:
                        dt_matched[i_dt] = 1
                        gt_taken[i_gt] = (1, i_dt)
                    else:
                        dt_matched[i_dt] = 2

        a = list()
        minscore = 1.
        colormap = {0: '#ff0', 1: '#0f0', 2: '#0ff'}
        for i in range(len(dt)):
            if len(a) >= len(gt):
                break
            bbox, text, score = dt[i]
            taken = dt_matched[i]
            if 2 != taken or draw_ignore:
                flag = True
                for o in a:
                    if settings.IOU_THRESH < eval_tools.a_in_b(
                            bbox, o['bbox']
                    ) or settings.IOU_THRESH < eval_tools.a_in_b(
                            o['bbox'], bbox):
                        flag = False
                if flag:
                    a.append({
                        'bbox': bbox,
                        'text': text or '■',
                        'color': colormap[taken]
                    })
                    minscore = score
        if draw_proposal:
            for o in sorted(dtobj['proposals'], key=lambda o: -o['score']):
                bbox, score = o['bbox'], o['score']
                if score >= minscore:
                    s = 0
                    for igbbox, _ in ig:
                        s += eval_tools.a_in_b(bbox, igbbox)
                    for o in a:
                        s += max(eval_tools.a_in_b(o['bbox'], bbox),
                                 eval_tools.a_in_b(bbox, o['bbox']))
                    if s <= settings.IOU_THRESH:
                        a.append({'bbox': bbox, 'text': '', 'color': '#00f'})
        return list(reversed(a))

    if selected is None:
        selected = [(o['image_id'], 0, 0, 2048, 2048)
                    for i, o in enumerate(data_list['test_det'])
                    if i % 200 == 0]
    draw_gt = False

    if not os.path.isdir(settings.PRINTTEXT_DRAWING_DIR):
        os.makedirs(settings.PRINTTEXT_DRAWING_DIR)
    tasks = []
    for image_id, x, y, w, h in sorted(selected):
        i = [o['image_id'] for o in data_list['test_det']].index(image_id)
        gt = json.loads(gts[i])
        dt = json.loads(dts[i])
        crop = (x, y, w, h)
        file_name = os.path.join(settings.TEST_IMAGE_DIR, gt['file_name'])
        if draw_gt:
            tasks.append(
                (file_name,
                 os.path.join(
                     settings.PRINTTEXT_DRAWING_DIR,
                     '{}_{}_{}_{}_{}_gt.{}'.format(image_id, crop[0], crop[1],
                                                   crop[2], crop[3], ext)),
                 {
                     'boxes': gt2array(gt, draw_ignore=True),
                     'crop': crop,
                     'place': 'force',
                 }))
        tasks.append(
            (file_name,
             os.path.join(
                 settings.PRINTTEXT_DRAWING_DIR,
                 '{}_{}_{}_{}_{}_dt.{}'.format(image_id, crop[0], crop[1],
                                               crop[2], crop[3], ext)),
             {
                 'boxes': dt2array(dt,
                                   gt,
                                   draw_ignore=False,
                                   draw_proposal=False),
                 'crop': crop,
                 'place': 'force',
             }))
    if print_text.concurrent:
        common_tools.multithreaded(print_text, tasks, num_thread=cpu_count())
    else:
        for task in tasks:
            print_text(*task)
Ejemplo n.º 10
0
def main():
    assert six.PY3
    random.seed(0)

    polygons = get_polygons()
    print('polygons loaded')

    if not os.path.isfile(settings.DATASET_CROPPED):
        lines = []
        with open(settings.TRAIN) as f:
            lines += [('train', s) for s in f.read().splitlines()]
        with open(settings.VAL) as f:
            lines += [('val', s) for s in f.read().splitlines()]
        with open(settings.TEST_CLASSIFICATION) as f1, open(
                settings.TEST_CLASSIFICATION_GT) as f2:
            prs = f1.read().splitlines()
            gts = f2.read().splitlines()
            lines += [('test_cls', pr, gt) for pr, gt in zip(prs, gts)]
        with open(settings.TEST_DETECTION_GT) as f:
            lines += [('test_det', s) for s in f.read().splitlines()]
        all = [[] for _ in lines]

        def load_train(i):
            if i % 100 == 0:
                print('dataset', i, '/', len(lines))
            line = lines[i]
            if line[0] == 'test_cls':
                prs, gts = line[1:]
                prs, gts = json.loads(prs), json.loads(gts)
                image = cv2.imread(
                    os.path.join(settings.TEST_IMAGE_DIR, prs['file_name']))
                assert image.shape == (prs['height'], prs['width'], 3)
                for pr, gt in zip(prs['proposals'], gts['ground_truth']):
                    cropped = predictions2html.crop(image, pr['adjusted_bbox'],
                                                    64)
                    all[i].append([cropped, gt['attributes']])
            else:
                anno = json.loads(line[1].strip())
                image = cv2.imread(
                    os.path.join(
                        settings.TRAINVAL_IMAGE_DIR if line[0]
                        in {'train', 'val'} else settings.TEST_IMAGE_DIR,
                        anno['file_name']))
                assert image.shape == (anno['height'], anno['width'], 3)
                for char in anno_tools.each_char(anno):
                    if not char['is_chinese']:
                        continue
                    cropped = predictions2html.crop(image,
                                                    char['adjusted_bbox'], 64)
                    all[i].append([cropped, char['attributes']])

        common_tools.multithreaded(load_train, range(len(lines)), num_thread=8)
        all = common_tools.reduce_sum(all)
        with open(settings.DATASET_CROPPED, 'wb') as f:
            cPickle.dump(all, f)

    with open(settings.DATASET_CROPPED, 'rb') as f:
        all = cPickle.load(f)
    print('cropped loaded')

    belongs = defaultdict(list)
    for i, (_, attrs) in enumerate(all):
        for attr in settings.ATTRIBUTES:
            if attr in attrs:
                belongs[attr].append(i)
            else:
                belongs['not-{}'.format(attr)].append(i)
    x, y, cropsize = 25, 20, 64
    forbidden = {
        'bgcomplex': {
            29784, 30793, 54066, 60905, 80018, 85910, 92040, 93009, 108277,
            117829, 145939, 159277, 166330, 166891, 168897, 174142, 181156,
            181461, 185076, 197249, 197278, 197390, 197483, 197736, 233084,
            241839, 267090, 278862, 282057, 304974, 305250, 309420, 309505,
            311269, 312195, 317930, 318505, 366441, 366568, 366798, 367485,
            367889, 369698, 371721, 372093, 372993, 373129, 378209, 438060,
            438222, 451115, 451181, 452219, 476071, 494493, 511841, 537930,
            568693, 591312, 593805, 604174, 604482, 607029, 613924, 620551,
            624535, 630988, 644721, 662645, 685127, 692149, 697909, 713403,
            718308, 722478, 731465, 744738, 752377, 756651, 756983, 757500,
            838142, 840193, 853782, 868967, 870828, 895909, 897855, 914324,
            924949, 926112, 948187, 949547, 951922, 966718, 990845
        },
        'distorted': {
            4272, 11545, 31628, 66623, 77833, 80068, 82815, 101418, 101429,
            101461, 101475, 102750, 103990, 106226, 122330, 122629, 122864,
            127239, 137058, 138722, 165570, 215740, 215956, 241371, 244889,
            300272, 311629, 351641, 354914, 355965, 407871, 493919, 516472,
            520137, 531795, 545332, 560774, 564422, 568087, 568530, 580687,
            580940, 584408, 584961, 587857, 595353, 605587, 635609, 646782,
            659324, 674405, 676416, 677631, 763798, 776019, 794110, 799670,
            799845, 817479, 827616, 891523, 903911, 909307, 925630, 938309,
            942272, 951543, 971224, 974709, 993244, 999040, 1000422
        },
        'handwritten': {
            329601, 408868, 512096, 821406, 978196, 982861, 982872, 982875,
            1008997
        },
        'not-bgcomplex': {
            9246, 15606, 16655, 36299, 61621, 68809, 262313, 270993, 272949,
            282797, 310460, 314072, 352096, 399965, 403986, 405162, 606926,
            677085, 703288, 779237, 822430, 827693, 850763, 875473, 915644,
            922101, 933638, 938877, 990631, 996811, 1010739
        },
        'not-distorted': {
            89019, 288330, 289995, 303808, 316933, 420413, 489284, 534030,
            585589, 590783, 639562, 652671, 687953, 774776, 845746, 886599,
            887318, 955774, 957490
        },
        'not-handwritten': set(),
        'not-occluded': {
            4608, 5314, 209656, 242426, 288072, 323822, 434571, 500784, 567581,
            569271, 666036, 693361, 854716
        },
        'not-raised': {
            32737, 196852, 301792, 320325, 476295, 534636, 652281, 704042,
            910982, 915965, 950785
        },
        'not-wordart': {
            3645, 50260, 50296, 57502, 82294, 96621, 109453, 204976, 262595,
            269395, 284668, 350879, 382009, 414268, 509196, 513846, 516881,
            524924, 557900, 567024, 582488, 644754, 647845, 670733, 672294,
            683110, 685146, 697891, 704277, 711269, 718931, 731280, 734266,
            757216, 792544, 805942, 806092, 814184, 821115, 826304, 836793,
            881432, 882749, 882764, 887005, 890080, 898900, 918417, 941640,
            944208
        },
        'occluded': {
            873, 5730, 8228, 13395, 15541, 41004, 47143, 51186, 61060, 74182,
            123631, 124450, 135852, 147417, 157442, 172524, 184918, 185294,
            190257, 190412, 190643, 192596, 197677, 224041, 226001, 227348,
            227590, 230286, 232497, 235413, 245273, 246929, 248790, 252125,
            257097, 272903, 272904, 277566, 284177, 284181, 306556, 309870,
            310208, 310823, 312420, 313796, 315943, 320599, 324504, 325094,
            345861, 348978, 350766, 355099, 355197, 361102, 363122, 364891,
            371829, 375936, 378402, 383816, 385305, 385587, 406896, 429101,
            441511, 457760, 460850, 464921, 532963, 532972, 537419, 537840,
            566316, 567464, 570925, 575854, 576324, 580475, 580786, 582479,
            587189, 601068, 612825, 627320, 629511, 645262, 648763, 660725,
            670146, 671016, 676628, 702305, 728385, 734681, 735671, 745464,
            747664, 767248, 778261, 779184, 779315, 781154, 786504, 786792,
            789390, 797219, 799166, 810608, 836407, 837725, 843622, 843863,
            851364, 864668, 868260, 870504, 872464, 888636, 892626, 939872,
            940036, 941901, 956912, 976644, 979918, 992558, 1000251, 1006844
        },
        'raised': {
            6962, 58512, 60516, 61103, 80469, 85491, 94437, 94524, 116556,
            125124, 165233, 185106, 222793, 231913, 234829, 238321, 244816,
            253130, 264975, 275946, 275958, 282919, 293993, 294043, 302357,
            305192, 308649, 315404, 316111, 320636, 392341, 429254, 431867,
            431870, 432207, 444736, 447251, 486993, 488383, 510305, 511770,
            515521, 537062, 537275, 543490, 566084, 568212, 570926, 574094,
            575914, 576594, 580506, 583107, 586520, 701097, 703253, 735409,
            748760, 754485, 757556, 757604, 768041, 776067, 791019, 831144,
            853508, 884373, 888685, 899910, 903512, 903602, 939832, 952561,
            956100, 965107, 968079, 974788, 975073, 983868, 1010585, 1011033
        },
        'wordart': {
            31715, 39716, 44876, 74919, 75133, 104696, 108556, 110006, 113592,
            117784, 140866, 143122, 143125, 145951, 149959, 150049, 150213,
            150279, 150281, 150428, 150490, 151687, 154129, 156874, 159778,
            159895, 160120, 160247, 160276, 160283, 162846, 163105, 163216,
            164079, 164761, 166267, 168230, 171234, 171790, 172286, 172298,
            172308, 172335, 175266, 175334, 175831, 176590, 176806, 177410,
            177554, 179221, 179305, 180561, 181293, 181310, 182794, 183106,
            183209, 183290, 184192, 185667, 186031, 186403, 186514, 187030,
            187343, 190446, 194951, 197172, 198183, 198237, 200187, 201238,
            201769, 202088, 202370, 202382, 203726, 207168, 207507, 208267,
            208991, 212503, 212525, 213482, 223470, 226107, 227287, 232970,
            233894, 233940, 234845, 236981, 237720, 241485, 242131, 244615,
            244620, 244624, 245553, 246792, 247779, 247807, 251942, 254875,
            255095, 258723, 259914, 259966, 262937, 263127, 263146, 263239,
            267506, 268151, 268235, 276946, 278369, 283787, 290607, 292262,
            292299, 292365, 293734, 294791, 296802, 297370, 298140, 298342,
            305806, 306567, 306734, 307309, 310559, 310642, 312263, 312802,
            327995, 329859, 335910, 337662, 338453, 343304, 353405, 353413,
            353893, 354529, 355161, 355494, 355623, 355691, 355774, 355950,
            355954, 356051, 356179, 356242, 356497, 356706, 357194, 357264,
            373665, 373717, 376825, 404495, 419551, 422019, 422612, 427949,
            433690, 435115, 435506, 436128, 436294, 436315, 436376, 436654,
            437268, 437576, 437642, 438235, 438558, 439054, 439080, 439472,
            439674, 441572, 455623, 476376, 481154, 485066, 527590, 529010,
            531947, 538763, 539110, 540596, 542057, 542098, 545024, 546352,
            551044, 556749, 560492, 560528, 565064, 567446, 585918, 611633,
            675438, 676105, 678361, 678431, 682537, 683559, 683671, 684947,
            714507, 747645, 748163, 749913, 764697, 770430, 772118, 772585,
            774869, 774876, 776310, 777338, 796911, 798872, 817377, 826692,
            833591, 838845, 840866, 876796, 878192, 883355, 887083, 887166,
            887344, 894624, 903599, 912879, 915091, 922486, 935850, 945271,
            945286, 946310, 947397, 951044, 951139, 951591, 952854, 952928,
            953157, 954346, 959683, 960200, 962644, 968523, 977304, 981412,
            988359, 990304, 998700, 1000188, 1003561
        },
    }
    for attr, imgset in sorted(belongs.items()):
        random.shuffle(imgset)
        for id in forbidden[attr]:
            imgset.remove(id)
        imgset = list(filter(lambda id: min(all[id][0].shape[:2]) > 16,
                             imgset))
        if 'distorted' in attr:
            imgset = list(
                filter(lambda id: polygon_in_center(polygons[id], 512),
                       imgset))
        else:
            imgset = list(
                filter(lambda id: polygon_in_center(polygons[id], 10), imgset))
        imgset = imgset[:x * y]
        if not os.path.isdir(settings.ATTR_SAMPLE_DIR):
            os.makedirs(settings.ATTR_SAMPLE_DIR)
        file_path = os.path.join(settings.ATTR_SAMPLE_DIR,
                                 '{}.png'.format(attr))
        print(file_path)
        canvas = np.zeros((y * cropsize, x * cropsize, 3), dtype=np.uint8)
        for i, j in enumerate(imgset):
            cropped = all[j][0]
            resized = cv2.resize(cropped, (cropsize, cropsize))
            canvas[(i // x) * cropsize:(i // x + 1) * cropsize,
                   (i % x) * cropsize:(i % x + 1) * cropsize] = resized
        cv2.imwrite(file_path, canvas)
        with open(
                os.path.join(settings.ATTR_SAMPLE_DIR, '{}.json'.format(attr)),
                'w') as f:
            json.dump(imgset, f)
Ejemplo n.º 11
0
def encode(s1,
           s2,
           mode,
           version,
           level1,
           level2,
           image=None,
           timeout=100,
           debug=False,
           *,
           different_mask=0,
           do_reduce=1,
           merge_bound_thresh=300,
           lamb=.1,
           seed=0):
    mode1, encode1 = qrhelper.mode_and_encode(s1)
    mode2, encode2 = qrhelper.mode_and_encode(s2)
    v1 = qrhelper.minimal_version(mode1, level1, encode1)
    v2 = qrhelper.minimal_version(mode2, level2, encode2)
    ver = max(v1, v2, int(version))
    code1 = qrhelper.indicators(ver, mode1, len(s1)) + encode1
    code2 = qrhelper.indicators(ver, mode2, len(s2)) + encode2

    n = 17 + ver * 4
    groups1, ecc_num1 = qrhelper.group_and_ecc(ver, level1)
    groups2, ecc_num2 = qrhelper.group_and_ecc(ver, level2)

    _make_exe()

    if mode == 'art':
        if image:
            input = io.BytesIO(image)
            img = Image.open(input)
        else:
            img = Image.open('qrmerge/logo.png')
        img = img.convert('L')
        img = img.resize((n, n), resample=Image.BILINEAR)
        img = np.array(img.getdata()).reshape((n, n))
        img = np.array(img <= 127, dtype=np.int).tolist()
    start_time = time.time()

    class Result():
        pass

    def encode_once(maskid1, maskid2):
        input = []
        input.append(
            '{:d} {:s} {:s} {:d} {:d} {:d} {:d} {:d} {:f} {:d}'.format(
                ver, level1, level2, maskid1, maskid2, int(mode == 'art'),
                do_reduce, merge_bound_thresh, lamb, seed))
        input.append(code1)
        input.append(code2)
        if mode == 'art':
            for line in img:
                input.append(' '.join([str(b) for b in line]))
        res = Result()
        res.maskid1 = maskid1
        res.maskid2 = maskid2

        isTimeout = [False]

        def timeout_target(p):
            isTimeout[0] = True
            p.kill()

        if debug:
            stderr = None
        else:
            stderr = open(os.devnull, 'w')
        p = subprocess.Popen([
            os.path.join(os.path.abspath(os.path.dirname(__file__)),
                         'main.exe')
        ],
                             cwd=os.path.dirname(__file__),
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=stderr)
        timer = threading.Timer(timeout, timeout_target, [p])
        timer.start()
        output, errout = p.communicate('\n'.join(input).encode())
        timer.cancel()
        if isTimeout[0]:
            res.error = 1
            res.msg = 'time out'
            return res
        assert 0 == p.wait(), p.returncode
        if stderr is not None:
            stderr.close()
        output = output.decode()

        output = output.split('\n', 3)
        retcode = int(output[0].split()[0])
        if retcode != 0:
            res.error = 1
            res.msg = 'mask id not match'
            return res
        res.error = 0
        error_format_bits_1, error_format_bits_2, max_merge_bound, cpp_time_cost = output[
            0].split()[1:]
        res.error_format_bits = [error_format_bits_1, error_format_bits_2]
        res.max_merge_bound = int(max_merge_bound)
        res.cpp_time_cost = float(cpp_time_cost)
        res.err1 = [int(x) for x in output[1].split()]
        res.err2 = [int(x) for x in output[2].split()]
        res.matrixes = output[3]
        return res

    q = queue.Queue()

    def target(maskid1, maskid2):
        res = encode_once(maskid1, maskid2)
        q.put(res)

    if different_mask:
        tasks = [(maskid1, maskid2) for maskid1 in range(8)
                 for maskid2 in range(8)]
    else:
        tasks = [(maskid, maskid) for maskid in range(8)]
    common_tools.multithreaded(target,
                               tasks,
                               num_thread=min(8, len(tasks),
                                              multiprocessing.cpu_count()))
    results = []
    while not q.empty():
        res = q.get()
        results.append(res)
    assert len(results) == len(tasks)
    results.sort(key=lambda res: (res.maskid1, res.maskid2))
    max_merge_bounds = []
    cpp_time_costs = []
    for res in results:
        if not res.error:
            max_merge_bounds.append(res.max_merge_bound)
            cpp_time_costs.append(res.cpp_time_cost)
        if debug:
            if res.error:
                print('maskid', res.maskid1, res.maskid2, res.msg)
            else:
                print('maskid', res.maskid1, res.maskid2, '#misencoded',
                      res.err1, res.err2)

    rank = list(range(len(results)))

    def cost(result_id):
        res = results[result_id]
        if res.error:
            return (1, )

        def recovery_ratio(groups, ecc_num, err):
            x = fractions.Fraction(1, 1)
            for a in err[:groups[0]]:
                x = min(
                    x, fractions.Fraction(ecc_num // 2 - a,
                                          groups[1] + ecc_num))
            for a in err[groups[0]:]:
                x = min(
                    x, fractions.Fraction(ecc_num // 2 - a,
                                          groups[3] + ecc_num))
            return x

        return (0, -min(recovery_ratio(groups1, ecc_num1, res.err1),
                        recovery_ratio(groups2, ecc_num2, res.err2)),
                max(res.error_format_bits),
                0 if res.maskid1 == res.maskid2 else 1,
                sorted(res.err1 + res.err2, reverse=True))

    rank.sort(key=cost)
    res = results[rank[0]]
    time_cost = time.time() - start_time
    if res.error:
        return {
            'error': 1,
            'msg': 'solver timeout',
            'time_cost': time_cost,
        }

    block_length1 = [
        x + ecc_num1
        for x in [groups1[1]] * groups1[0] + [groups1[3]] * groups1[2]
    ]
    block_length2 = [
        x + ecc_num2
        for x in [groups2[1]] * groups2[0] + [groups2[3]] * groups2[2]
    ]
    matrixes = res.matrixes.split('\n')
    m1 = [[int(c) for c in line.split()] for line in matrixes[0:n]]
    m2 = [[int(c) for c in line.split()] for line in matrixes[1 * n:2 * n]]
    l1 = [[int(c) for c in line.split()] for line in matrixes[2 * n:3 * n]]
    l2 = [[int(c) for c in line.split()] for line in matrixes[3 * n:4 * n]]
    o1 = [[int(c) for c in line.split()] for line in matrixes[4 * n:5 * n]]
    o2 = [[int(c) for c in line.split()] for line in matrixes[5 * n:6 * n]]

    return {
        'error': 0,
        'version': ver,
        'level': [level1, level2],
        'mask_id': [res.maskid1, res.maskid2],
        'ecc_num': [ecc_num1, ecc_num2],
        'block_length': [block_length1, block_length2],
        'timeout': timeout,
        'is_art': mode == 'art',
        'different_mask': different_mask,
        'do_reduce': do_reduce,
        'max_merge_bounds': max_merge_bounds,
        'seed': seed,
        'cpp_time_costs': cpp_time_costs,
        'time_cost': time_cost,
        'string_left': s1,
        'string_right': s2,
        'reference_left': m1,
        'reference_right': m2,
        'layer_down': l1,
        'layer_up': l2,
        'qrcode_left': o1,
        'qrcode_right': o2,
        'error_left': res.err1,
        'error_right': res.err2,
        'error_format_bits': res.error_format_bits,
    }