Exemple #1
0
def main():
    assert six.PY3

    with open(settings.TRAIN) as f:
        lines = f.read().splitlines()
    with open(settings.VAL) as f:
        lines += f.read().splitlines()
    train = [[] for _ in lines]

    def load_train(i):
        if i % 100 == 0:
            print('trainval', i, '/', len(lines))
        anno = json.loads(lines[i].strip())
        image = misc.imread(
            os.path.join(settings.TRAINVAL_IMAGE_DIR, anno['file_name']))
        assert image.shape == (anno['height'], anno['width'], 3)
        for char in anno_tools.each_char(anno):
            if not char['is_chinese']:
                continue
            cropped = crop(image, char['adjusted_bbox'])
            train[i].append([cropped, char['text']])

    common_tools.multithreaded(load_train, range(len(lines)), num_thread=8)
    train = common_tools.reduce_sum(train)
    max_bytes = 2**31 - 1
    train_out = pickle.dumps(train, protocol=pickle.HIGHEST_PROTOCOL)
    with open(settings.TRAINVAL_PICKLE, 'wb') as f:
        for idx in range(0, len(train_out), max_bytes):
            f.write(train_out[idx:idx + max_bytes])
    train = None  # release memory
    train_out = None

    with open(settings.TEST_CLASSIFICATION) as f:
        lines = f.read().splitlines()
    test = [[] for _ in lines]

    def load_test(i):
        if i % 100 == 0:
            print('test', i, '/', len(lines))
        anno = json.loads(lines[i].strip())
        image = misc.imread(
            os.path.join(settings.TEST_IMAGE_DIR, anno['file_name']))
        #assert image.shape == (anno['height'], anno['width'], 3)
        for char in anno['proposals']:
            cropped = crop(image, char['adjusted_bbox'])
            test[i].append([cropped, None])

    common_tools.multithreaded(load_test, range(len(lines)), num_thread=8)
    test = common_tools.reduce_sum(test)
    max_bytes = 2**31 - 1
    test_out = pickle.dumps(test, protocol=pickle.HIGHEST_PROTOCOL)
    with open(settings.TEST_PICKLE, 'wb') as f:
        for idx in range(0, len(test_out), max_bytes):
            f.write(test_out[idx:idx + max_bytes])
    test = None  # release memory
    test_out = None
Exemple #2
0
def do_nms_sort(unmerged, nms):
    all = defaultdict(list)
    i_time = 0
    for image_id, proposals in unmerged.items():
        if i_time % 200 == 0:
            print('nms sort', i_time, '/', len(unmerged))
        i_time += 1
        cates = defaultdict(list)
        for proposal in proposals:
            cates[proposal['cate_id']].append(proposal)
        for cate_id, proposal in cates.items():
            a = sorted(proposal, key=lambda o: -o['prob'])
            na = []
            for o in a:
                covered = 0
                for no in na:
                    covered += eval_tools.a_in_b(o['bbox'], no['bbox'])
                    if covered > nms:
                        break
                if covered <= nms:
                    na.append(o)
                    if len(na) >= settings.MAX_DET_PER_IMAGE:
                        break
            cates[cate_id] = na
        all[image_id] = common_tools.reduce_sum(cates.values())
    return all
def main():
    assert six.PY3

    # with open(settings.TRAIN) as f:
    #     lines = f.read().splitlines()
    # with open(settings.VAL) as f:
    #     lines += f.read().splitlines()
    # train = [[] for _ in lines]
    with open('../data/icpr_data_char_1/train_15962.tags') as f:
        lines = f.read().splitlines()

    train = [[] for _ in lines]


    def load_train(i):
        if i % 100 == 0:
            print('trainval', i, '/', len(lines))
        # anno = json.loads(lines[i].strip())
        anno = lines[i].strip().split()
        image = misc.imread(anno[0])
        # assert image.shape == (anno['height'], anno['width'], 3)
        assert len(anno[1]) == 1
        # for char in anno[1]:
        train[i].append([image, anno[1]])

    common_tools.multithreaded(load_train, range(len(lines)), num_thread=8)
    train = common_tools.reduce_sum(train)
    with open('self_synth.pkl', 'wb') as f:
        cPickle.dump(train, f)
    train = None  # release memory
Exemple #4
0
def main():
    assert six.PY3
    random.seed(0)

    polygons = get_polygons()
    print('polygons loaded')

    if not os.path.isfile(settings.DATASET_CROPPED):
        lines = []
        with open(settings.TRAIN) as f:
            lines += [('train', s) for s in f.read().splitlines()]
        with open(settings.VAL) as f:
            lines += [('val', s) for s in f.read().splitlines()]
        with open(settings.TEST_CLASSIFICATION) as f1, open(
                settings.TEST_CLASSIFICATION_GT) as f2:
            prs = f1.read().splitlines()
            gts = f2.read().splitlines()
            lines += [('test_cls', pr, gt) for pr, gt in zip(prs, gts)]
        with open(settings.TEST_DETECTION_GT) as f:
            lines += [('test_det', s) for s in f.read().splitlines()]
        all = [[] for _ in lines]

        def load_train(i):
            if i % 100 == 0:
                print('dataset', i, '/', len(lines))
            line = lines[i]
            if line[0] == 'test_cls':
                prs, gts = line[1:]
                prs, gts = json.loads(prs), json.loads(gts)
                image = cv2.imread(
                    os.path.join(settings.TEST_IMAGE_DIR, prs['file_name']))
                assert image.shape == (prs['height'], prs['width'], 3)
                for pr, gt in zip(prs['proposals'], gts['ground_truth']):
                    cropped = predictions2html.crop(image, pr['adjusted_bbox'],
                                                    64)
                    all[i].append([cropped, gt['attributes']])
            else:
                anno = json.loads(line[1].strip())
                image = cv2.imread(
                    os.path.join(
                        settings.TRAINVAL_IMAGE_DIR if line[0]
                        in {'train', 'val'} else settings.TEST_IMAGE_DIR,
                        anno['file_name']))
                assert image.shape == (anno['height'], anno['width'], 3)
                for char in anno_tools.each_char(anno):
                    if not char['is_chinese']:
                        continue
                    cropped = predictions2html.crop(image,
                                                    char['adjusted_bbox'], 64)
                    all[i].append([cropped, char['attributes']])

        common_tools.multithreaded(load_train, range(len(lines)), num_thread=8)
        all = common_tools.reduce_sum(all)
        with open(settings.DATASET_CROPPED, 'wb') as f:
            cPickle.dump(all, f)

    with open(settings.DATASET_CROPPED, 'rb') as f:
        all = cPickle.load(f)
    print('cropped loaded')

    belongs = defaultdict(list)
    for i, (_, attrs) in enumerate(all):
        for attr in settings.ATTRIBUTES:
            if attr in attrs:
                belongs[attr].append(i)
            else:
                belongs['not-{}'.format(attr)].append(i)
    x, y, cropsize = 25, 20, 64
    forbidden = {
        'bgcomplex': {
            29784, 30793, 54066, 60905, 80018, 85910, 92040, 93009, 108277,
            117829, 145939, 159277, 166330, 166891, 168897, 174142, 181156,
            181461, 185076, 197249, 197278, 197390, 197483, 197736, 233084,
            241839, 267090, 278862, 282057, 304974, 305250, 309420, 309505,
            311269, 312195, 317930, 318505, 366441, 366568, 366798, 367485,
            367889, 369698, 371721, 372093, 372993, 373129, 378209, 438060,
            438222, 451115, 451181, 452219, 476071, 494493, 511841, 537930,
            568693, 591312, 593805, 604174, 604482, 607029, 613924, 620551,
            624535, 630988, 644721, 662645, 685127, 692149, 697909, 713403,
            718308, 722478, 731465, 744738, 752377, 756651, 756983, 757500,
            838142, 840193, 853782, 868967, 870828, 895909, 897855, 914324,
            924949, 926112, 948187, 949547, 951922, 966718, 990845
        },
        'distorted': {
            4272, 11545, 31628, 66623, 77833, 80068, 82815, 101418, 101429,
            101461, 101475, 102750, 103990, 106226, 122330, 122629, 122864,
            127239, 137058, 138722, 165570, 215740, 215956, 241371, 244889,
            300272, 311629, 351641, 354914, 355965, 407871, 493919, 516472,
            520137, 531795, 545332, 560774, 564422, 568087, 568530, 580687,
            580940, 584408, 584961, 587857, 595353, 605587, 635609, 646782,
            659324, 674405, 676416, 677631, 763798, 776019, 794110, 799670,
            799845, 817479, 827616, 891523, 903911, 909307, 925630, 938309,
            942272, 951543, 971224, 974709, 993244, 999040, 1000422
        },
        'handwritten': {
            329601, 408868, 512096, 821406, 978196, 982861, 982872, 982875,
            1008997
        },
        'not-bgcomplex': {
            9246, 15606, 16655, 36299, 61621, 68809, 262313, 270993, 272949,
            282797, 310460, 314072, 352096, 399965, 403986, 405162, 606926,
            677085, 703288, 779237, 822430, 827693, 850763, 875473, 915644,
            922101, 933638, 938877, 990631, 996811, 1010739
        },
        'not-distorted': {
            89019, 288330, 289995, 303808, 316933, 420413, 489284, 534030,
            585589, 590783, 639562, 652671, 687953, 774776, 845746, 886599,
            887318, 955774, 957490
        },
        'not-handwritten': set(),
        'not-occluded': {
            4608, 5314, 209656, 242426, 288072, 323822, 434571, 500784, 567581,
            569271, 666036, 693361, 854716
        },
        'not-raised': {
            32737, 196852, 301792, 320325, 476295, 534636, 652281, 704042,
            910982, 915965, 950785
        },
        'not-wordart': {
            3645, 50260, 50296, 57502, 82294, 96621, 109453, 204976, 262595,
            269395, 284668, 350879, 382009, 414268, 509196, 513846, 516881,
            524924, 557900, 567024, 582488, 644754, 647845, 670733, 672294,
            683110, 685146, 697891, 704277, 711269, 718931, 731280, 734266,
            757216, 792544, 805942, 806092, 814184, 821115, 826304, 836793,
            881432, 882749, 882764, 887005, 890080, 898900, 918417, 941640,
            944208
        },
        'occluded': {
            873, 5730, 8228, 13395, 15541, 41004, 47143, 51186, 61060, 74182,
            123631, 124450, 135852, 147417, 157442, 172524, 184918, 185294,
            190257, 190412, 190643, 192596, 197677, 224041, 226001, 227348,
            227590, 230286, 232497, 235413, 245273, 246929, 248790, 252125,
            257097, 272903, 272904, 277566, 284177, 284181, 306556, 309870,
            310208, 310823, 312420, 313796, 315943, 320599, 324504, 325094,
            345861, 348978, 350766, 355099, 355197, 361102, 363122, 364891,
            371829, 375936, 378402, 383816, 385305, 385587, 406896, 429101,
            441511, 457760, 460850, 464921, 532963, 532972, 537419, 537840,
            566316, 567464, 570925, 575854, 576324, 580475, 580786, 582479,
            587189, 601068, 612825, 627320, 629511, 645262, 648763, 660725,
            670146, 671016, 676628, 702305, 728385, 734681, 735671, 745464,
            747664, 767248, 778261, 779184, 779315, 781154, 786504, 786792,
            789390, 797219, 799166, 810608, 836407, 837725, 843622, 843863,
            851364, 864668, 868260, 870504, 872464, 888636, 892626, 939872,
            940036, 941901, 956912, 976644, 979918, 992558, 1000251, 1006844
        },
        'raised': {
            6962, 58512, 60516, 61103, 80469, 85491, 94437, 94524, 116556,
            125124, 165233, 185106, 222793, 231913, 234829, 238321, 244816,
            253130, 264975, 275946, 275958, 282919, 293993, 294043, 302357,
            305192, 308649, 315404, 316111, 320636, 392341, 429254, 431867,
            431870, 432207, 444736, 447251, 486993, 488383, 510305, 511770,
            515521, 537062, 537275, 543490, 566084, 568212, 570926, 574094,
            575914, 576594, 580506, 583107, 586520, 701097, 703253, 735409,
            748760, 754485, 757556, 757604, 768041, 776067, 791019, 831144,
            853508, 884373, 888685, 899910, 903512, 903602, 939832, 952561,
            956100, 965107, 968079, 974788, 975073, 983868, 1010585, 1011033
        },
        'wordart': {
            31715, 39716, 44876, 74919, 75133, 104696, 108556, 110006, 113592,
            117784, 140866, 143122, 143125, 145951, 149959, 150049, 150213,
            150279, 150281, 150428, 150490, 151687, 154129, 156874, 159778,
            159895, 160120, 160247, 160276, 160283, 162846, 163105, 163216,
            164079, 164761, 166267, 168230, 171234, 171790, 172286, 172298,
            172308, 172335, 175266, 175334, 175831, 176590, 176806, 177410,
            177554, 179221, 179305, 180561, 181293, 181310, 182794, 183106,
            183209, 183290, 184192, 185667, 186031, 186403, 186514, 187030,
            187343, 190446, 194951, 197172, 198183, 198237, 200187, 201238,
            201769, 202088, 202370, 202382, 203726, 207168, 207507, 208267,
            208991, 212503, 212525, 213482, 223470, 226107, 227287, 232970,
            233894, 233940, 234845, 236981, 237720, 241485, 242131, 244615,
            244620, 244624, 245553, 246792, 247779, 247807, 251942, 254875,
            255095, 258723, 259914, 259966, 262937, 263127, 263146, 263239,
            267506, 268151, 268235, 276946, 278369, 283787, 290607, 292262,
            292299, 292365, 293734, 294791, 296802, 297370, 298140, 298342,
            305806, 306567, 306734, 307309, 310559, 310642, 312263, 312802,
            327995, 329859, 335910, 337662, 338453, 343304, 353405, 353413,
            353893, 354529, 355161, 355494, 355623, 355691, 355774, 355950,
            355954, 356051, 356179, 356242, 356497, 356706, 357194, 357264,
            373665, 373717, 376825, 404495, 419551, 422019, 422612, 427949,
            433690, 435115, 435506, 436128, 436294, 436315, 436376, 436654,
            437268, 437576, 437642, 438235, 438558, 439054, 439080, 439472,
            439674, 441572, 455623, 476376, 481154, 485066, 527590, 529010,
            531947, 538763, 539110, 540596, 542057, 542098, 545024, 546352,
            551044, 556749, 560492, 560528, 565064, 567446, 585918, 611633,
            675438, 676105, 678361, 678431, 682537, 683559, 683671, 684947,
            714507, 747645, 748163, 749913, 764697, 770430, 772118, 772585,
            774869, 774876, 776310, 777338, 796911, 798872, 817377, 826692,
            833591, 838845, 840866, 876796, 878192, 883355, 887083, 887166,
            887344, 894624, 903599, 912879, 915091, 922486, 935850, 945271,
            945286, 946310, 947397, 951044, 951139, 951591, 952854, 952928,
            953157, 954346, 959683, 960200, 962644, 968523, 977304, 981412,
            988359, 990304, 998700, 1000188, 1003561
        },
    }
    for attr, imgset in sorted(belongs.items()):
        random.shuffle(imgset)
        for id in forbidden[attr]:
            imgset.remove(id)
        imgset = list(filter(lambda id: min(all[id][0].shape[:2]) > 16,
                             imgset))
        if 'distorted' in attr:
            imgset = list(
                filter(lambda id: polygon_in_center(polygons[id], 512),
                       imgset))
        else:
            imgset = list(
                filter(lambda id: polygon_in_center(polygons[id], 10), imgset))
        imgset = imgset[:x * y]
        if not os.path.isdir(settings.ATTR_SAMPLE_DIR):
            os.makedirs(settings.ATTR_SAMPLE_DIR)
        file_path = os.path.join(settings.ATTR_SAMPLE_DIR,
                                 '{}.png'.format(attr))
        print(file_path)
        canvas = np.zeros((y * cropsize, x * cropsize, 3), dtype=np.uint8)
        for i, j in enumerate(imgset):
            cropped = all[j][0]
            resized = cv2.resize(cropped, (cropsize, cropsize))
            canvas[(i // x) * cropsize:(i // x + 1) * cropsize,
                   (i % x) * cropsize:(i % x + 1) * cropsize] = resized
        cv2.imwrite(file_path, canvas)
        with open(
                os.path.join(settings.ATTR_SAMPLE_DIR, '{}.json'.format(attr)),
                'w') as f:
            json.dump(imgset, f)