예제 #1
0
def pretrain(product):
    from CONFIG import Config
    import gensim
    conf=Config(product)
    model=gensim.models.Word2Vec.load(conf.WORD2VEC_PATH)
    ent_set=read_set(conf.WHOLE_PART_PATH)
    attr_set=read_set(conf.ENTITY_ATTRIBUTE_PATH)-ent_set
    similarity_ent=pretrain_set(model, ent_set)
    similarity_attr=pretrain_set(model, attr_set)
    write_dict(conf.SIMILARITY_ENTITY_PATH,similarity_ent)
    write_dict(conf.SIMILARITY_ATTRIBUTE_PATH,similarity_attr)
예제 #2
0
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from sklearn.metrics import confusion_matrix
import cv2
import numpy as np

from CONFIG import Config
cfg = Config()


class CNN:

    def __init__(self):
        self.data = None
        self.model = None

        self.image_shape = cfg.image_shape
        self.class_number = cfg.class_number
        self.class_map = cfg.class_map
        self.MODEL_PATH = cfg.MODEL_PATH

    def network(self):
        # block 1
        self.model.add(Conv2D(64, (3, 3), activation='relu', input_shape=self.image_shape, padding='same'))
        self.model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
        self.model.add(MaxPool2D(pool_size=(2, 2)))
        self.model.add(Dropout(0.25))
        # block 2
        self.model.add(Flatten())
        self.model.add(Dense(128, activation='relu'))
        self.model.add(Dropout(0.5))
import cv2
import numpy as np

import ip_draw as draw
import ip_detection_utils as util
import ocr_classify_text as ocr
from CONFIG import Config

C = Config()


def get_corner(boundaries):
    """
    Get the top left and bottom right points of boundary
    :param boundaries: boundary: [top, bottom, left, right]
                        -> up, bottom: (column_index, min/max row border)
                        -> left, right: (row_index, min/max column border) detect range of each row
    :return: corners: [(top_left, bottom_right)]
                        -> top_left: (column_min, row_min)
                        -> bottom_right: (column_max, row_max)
    """
    corners = []
    for boundary in boundaries:
        top_left = (min(boundary[0][0][0], boundary[1][-1][0]),
                    min(boundary[2][0][0], boundary[3][-1][0]))
        bottom_right = (max(boundary[0][0][0], boundary[1][-1][0]),
                        max(boundary[2][0][0], boundary[3][-1][0]))
        corner = (top_left, bottom_right)
        corners.append(corner)
    return corners
from os.path import join as pjoin, exists
import time

import ocr_east as ocr
import ip
import merge

from CONFIG import Config

# choose functionality
is_ocr = True
is_ip = True
is_merge = True
# initialization
is_clip = False
C = Config()
C.build_output_folders(is_clip)
resize_by_height = 600

# set input root directory and sort all images by their indices
input_paths_img = glob.glob(pjoin(C.ROOT_INPUT, '*.jpg'))
input_paths_img = sorted(
    input_paths_img,
    key=lambda x: int(x.split('\\')[-1][:-4]))  # sorted by index
# set the range of target inputs' indices
start_index = 24
end_index = 50
for input_path_img in input_paths_img:
    index = input_path_img.split('\\')[-1][:-4]
    if int(index) < start_index:
        continue
예제 #5
0
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile

sys.path.append(os.getcwd())
from lib_ctpn.fast_rcnn.config import cfg, cfg_from_file
from lib_ctpn.fast_rcnn.test import _get_blobs
from lib_ctpn.text_connector.detectors import TextDetector
from lib_ctpn.text_connector.text_connect_cfg import Config as TextLineCfg
from lib_ctpn.rpn_msr.proposal_layer_tf import proposal_layer


from CONFIG import Config
UI2CODECfg = Config()


def resize_im(im, scale, max_scale=None):
    f = float(scale) / min(im.shape[0], im.shape[1])
    if max_scale != None and f * max(im.shape[0], im.shape[1]) > max_scale:
        f = float(max_scale) / max(im.shape[0], im.shape[1])
    return cv2.resize(im, None, None, fx=f, fy=f, interpolation=cv2.INTER_LINEAR), f


def draw_boxes(img, boxes, scale, output_path_label, output_path_img):
    with open(output_path_label, 'w') as f:
        for box in boxes:
            if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3] - box[0]) < 5:
                continue
            if box[8] >= 0.9:
import body
import ip_preprocessing as pre
import ip_draw as draw
import file_utils as file
from CONFIG import Config
from MODEL import CNN

import cv2
import time
import glob
from os.path import join as pyjoin

# initialization
C = Config()
C.build_output_folders(False)
input_root = C.ROOT_INPUT
input_paths = glob.glob(pyjoin(input_root, '*.png'))
input_paths = sorted(input_paths, key=lambda x: int(x.split('\\')[-1][:-4]))  # sorted by index


def save(index, org, binary, corners_block, corners_img, corners_compo, compos_class, corners_text):

    out_img_gradient = pyjoin(C.ROOT_IMG_GRADIENT, index + '.png')
    out_img_draw = pyjoin(C.ROOT_IMG_DRAWN, index + '.png')
    out_img_clean = pyjoin(C.ROOT_IMG_CLEAN, index + '.png')
    out_label = pyjoin(C.ROOT_LABEL, index + '.json')

    # *** Step 7 *** post-processing: remove img elements from original image and segment into smaller size
    img_clean = draw.draw_bounding_box(org, corners_img, color=(255, 255, 255), line=-1)
    # draw results
    draw_bounding = draw.draw_bounding_box_class(org, corners_compo, compos_class)
import time
import glob
from os.path import join as pyjoin

# choose functionality
is_merge_img = True
is_shrink_img = True
is_detect_compo_in_img = True
is_classify = True
is_ocr = True
is_segment = False
is_save = True
is_clip = False

# initialization
C = Config()
C.build_output_folders(is_segment)
input_root = C.ROOT_INPUT
input_paths = glob.glob(pyjoin(input_root, '*.png'))
input_paths = sorted(
    input_paths, key=lambda x: int(x.split('\\')[-1][:-4]))  # sorted by index
CNN = CNN()
CNN.load()

# start image and end image
start_index = 20
end_index = 100

for input_path in input_paths:
    index = input_path.split('\\')[-1][:-4]
    if int(index) < start_index: