def uied():
    start = time.clock()
    models, input_path, output_root, note_success_file, note_fail_file, uied_params = params.get_params(
    )
    print("UIED Running on:", input_path, " Write to:", output_root)

    resized_height = resize_height_by_longest_edge(input_path)

    if is_ocr:
        import ocr_east as ocr
        os.makedirs(pjoin(output_root, 'ocr'), exist_ok=True)
        ocr.east(input_path,
                 output_root,
                 models['text'],
                 resize_by_height=resized_height,
                 show=False)

    if is_ip:
        os.makedirs(pjoin(output_root, 'ip'), exist_ok=True)
        ip.compo_detection(input_path,
                           output_root,
                           uied_params=uied_params,
                           classifier=models['compo'],
                           resize_by_height=resized_height,
                           show=False)

    if is_merge:
        # os.makedirs(pjoin(output_root, 'merge'), exist_ok=True)
        import merge
        name = input_path.split('/')[-1][:-4]
        compo_path = pjoin(output_root, 'ip', str(name) + '.json')
        ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
        merge.incorporate(input_path,
                          compo_path,
                          ocr_path,
                          output_root,
                          resize_by_height=resized_height,
                          show=False)

    open(note_success_file, 'a').write(output_root + '\n')
    print("[UIED complete in %.3fs]" % (time.clock() - start))
    print(time.ctime(), '\n')
Example #2
0
def run_single(input_path_img, output_dir, models):
    # input_path_img = '/Users/yixue/Documents/Research/UsageTesting/UsageTesting-Repo/video_data_examples/6pm-video-signin-1/ir_data/bbox-0189-screen.jpg'
    single_output = os.path.join(output_dir, 'result.jpg')
    if os.path.exists(single_output):
        return

    print('running', input_path_img)

    resized_height = resize_height_by_longest_edge(input_path_img)

    if is_ocr:
        print('ocr...')
        os.makedirs(pjoin(output_dir, 'ocr'), exist_ok=True)
        ocr.east(input_path_img, output_dir, models, key_params['max-word-inline-gap'],
                 resize_by_height=resized_height, show=False)

    if is_ip:
        print('ip...')
        os.makedirs(pjoin(output_dir, 'ip'), exist_ok=True)
        # switch of the classification func
        classifier = None
        if is_clf:
            classifier = {}
            # classifier['Image'] = CNN('Image')
            classifier['Elements'] = CNN('Elements')
            # classifier['Noise'] = CNN('Noise')
        ip.compo_detection(input_path_img, output_dir, key_params,
                           classifier=classifier, resize_by_height=resized_height, show=False)

    if is_merge:
        print('merge...')
        name = input_path_img.split('/')[-1][:-4]
        compo_path = pjoin(output_dir, 'ip', str(name) + '.json')
        ocr_path = pjoin(output_dir, 'ocr', str(name) + '.json')
        merge_optimized.incorporate(input_path_img, compo_path, ocr_path, output_dir, params=key_params,
                          resize_by_height=resized_height, show=False)
Example #3
0
        start_time, start_clock = time.time(), time.clock()
        resized_height = resize_height_by_longest_edge(input_img)
        if is_ocr:
            ocr.east(input_img,
                     output_root,
                     ocr_model,
                     key_params['max-word-inline-gap'],
                     resize_by_height=resized_height,
                     show=args.show,
                     batch=True)

        if is_ip:
            ip.compo_detection(input_img,
                               output_root,
                               key_params,
                               batch=True,
                               classifier=compo_classifier,
                               resize_by_height=resized_height,
                               show=args.show)

        if is_merge:
            name = input_img.split('/')[-1][:-4]
            compo_path = pjoin(output_root, 'ip', str(name) + '.json')
            ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
            merge.incorporate(input_img,
                              compo_path,
                              ocr_path,
                              output_root,
                              params=key_params,
                              resize_by_height=resized_height,
                              show=args.show,
Example #4
0
                 show=False)

    if is_ip:
        import detect_compo.ip_region_proposal as ip
        os.makedirs(pjoin(output_root, 'ip'), exist_ok=True)
        # switch of the classification func
        classifier = None
        if is_clf:
            classifier = {}
            from cnn.CNN import CNN
            # classifier['Image'] = CNN('Image')
            classifier['Elements'] = CNN('Elements')
            # classifier['Noise'] = CNN('Noise')
        ip.compo_detection(input_path_img,
                           output_root,
                           classifier=classifier,
                           resize_by_height=resized_height,
                           show=False)

    if is_merge:
        import merge
        name = input_path_img.split('/')[-1][:-4]
        compo_path = pjoin(output_root, 'ip', str(name) + '.json')
        ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
        merge.incorporate(input_path_img,
                          compo_path,
                          ocr_path,
                          output_root,
                          resize_by_height=resized_height,
                          show=True)
Example #5
0
                 show=False)

    if is_ip:
        import detect_compo.ip_region_proposal as ip
        os.makedirs(pjoin(output_root, 'ip'), exist_ok=True)
        # switch of the classification func
        classifier = None
        if is_clf:
            classifier = {}
            from cnn.CNN import CNN
            # classifier['Image'] = CNN('Image')
            classifier['Elements'] = CNN('Elements')
            # classifier['Noise'] = CNN('Noise')
        ip.compo_detection(input_path_img,
                           output_root,
                           key_params,
                           classifier=classifier,
                           resize_by_height=resized_height,
                           show=True)

    if is_merge:
        import merge
        name = input_path_img.split('/')[-1][:-4]
        compo_path = pjoin(output_root, 'ip', str(name) + '.json')
        ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
        merge.incorporate(input_path_img,
                          compo_path,
                          ocr_path,
                          output_root,
                          params=key_params,
                          resize_by_height=resized_height,
                          show=True)
        if int(index) < start_index:
            continue
        if int(index) > end_index:
            break

        if is_ocr:
            ocr.east(input_img,
                     output_root,
                     ocr_model,
                     resize_by_height=resized_height,
                     show=False)

        if is_ip:
            ip.compo_detection(input_img,
                               output_root,
                               classifier=compo_classifier,
                               resize_by_height=resized_height,
                               show=True)

        if is_merge:
            import merge
            compo_path = pjoin(output_root, 'ip', str(index) + '.json')
            ocr_path = pjoin(output_root, 'ocr', str(index) + '.json')
            merge.incorporate(input_img,
                              compo_path,
                              ocr_path,
                              output_root,
                              resize_by_height=resized_height,
                              show=True)

        num += 1
Example #7
0

    if is_ocr:
        import detect_text_east.ocr_east as ocr
        import detect_text_east.lib_east.eval as eval
        os.makedirs(pjoin(path, 'ocr'), exist_ok=True)
        models = eval.load()
        ocr.east(input_path_img, path, models, key_params['max-word-inline-gap'],
                 resize_by_height=resized_height, show=False)

    if is_ip:
        import detect_compo.ip_region_proposal as ip
        os.makedirs(pjoin(path, 'ip'), exist_ok=True)
        # switch of the classification func
        classifier = None
        ip.compo_detection(input_path_img, path, key_params,
                           classifier=classifier, resize_by_height=resized_height, show=False)
        if is_clf:
            classifier = {}
            from cnn.CNN import CNN
            # classifier['Image'] = CNN('Image')
            classifier['Elements'] = CNN('Elements')
            # classifier['Noise'] = CNN('Noise')
        ip.compo_detection(input_path_img, path, key_params,
                           classifier=classifier, resize_by_height=resized_height, show=False)

    if is_merge:
        import merge
        name = input_path_img.split('/')[-1][:-4]
        #print(name)
        compo_path = pjoin(path, 'ip', str(name) + '.json')
        ocr_path = pjoin(path, 'ocr', str(name) + '.json')
Example #8
0
def uied(input_path,
         output_root,
         params=None,
         is_ip=True,
         is_clf=False,
         is_ocr=True,
         is_merge=True):
    '''
            ele:min-grad: gradient threshold to produce binary map
            ele:ffl-block: fill-flood threshold
            ele:min-ele-area: minimum area for selected elements
            ele:merge-contained-ele: if True, merge elements contained in others
            text:max-word-inline-gap: words with smaller distance than the gap are counted as a line
            text:max-line-gap: lines with smaller distance than the gap are counted as a paragraph

            Tips:
            1. Larger *min-grad* produces fine-grained binary-map while prone to over-segment element to small pieces
            2. Smaller *min-ele-area* leaves tiny elements while prone to produce noises
            3. If not *merge-contained-ele*, the elements inside others will be recognized, while prone to produce noises
            4. The *max-word-inline-gap* and *max-line-gap* should be dependent on the input image size and resolution

            mobile: {'min-grad':4, 'ffl-block':5, 'min-ele-area':25, 'max-word-inline-gap':6, 'max-line-gap':1}
            web   : {'min-grad':3, 'ffl-block':5, 'min-ele-area':25, 'max-word-inline-gap':4, 'max-line-gap':4}
        '''
    if params is None:
        params = {
            'min-grad': 4,
            'ffl-block': 5,
            'min-ele-area': 25,
            'merge-contained-ele': True,
            'max-word-inline-gap': 4,
            'max-line-gap': 4
        }
    else:
        params = json.loads(params)
        for i in params:
            if params[i] == 'True':
                params[i] = True
            elif params[i] == 'False':
                params[i] = False
            else:
                params[i] = int(params[i])
    print(params)

    start = time.clock()
    resized_height = resize_height_by_longest_edge(input_path)

    if is_ocr:
        import ocr_east as ocr
        import lib_east.eval as eval
        os.makedirs(pjoin(output_root, 'ocr'), exist_ok=True)
        models = eval.load()
        ocr.east(input_path,
                 output_root,
                 models,
                 params['max-word-inline-gap'],
                 resize_by_height=resized_height,
                 show=False)

    if is_ip:
        import detect_compo.ip_region_proposal as ip
        os.makedirs(pjoin(output_root, 'ip'), exist_ok=True)
        # switch of the classification func
        classifier = None
        if is_clf:
            classifier = {}
            from CNN import CNN
            # classifier['Image'] = CNN('Image')
            classifier['Elements'] = CNN('Elements')
            # classifier['Noise'] = CNN('Noise')
        ip.compo_detection(input_path,
                           output_root,
                           uied_params=params,
                           classifier=classifier,
                           resize_by_height=resized_height,
                           show=False)

    if is_merge:
        import merge
        # os.makedirs(pjoin(output_root, 'merge'), exist_ok=True)
        name = input_path.split('/')[-1][:-4]
        compo_path = pjoin(output_root, 'ip', str(name) + '.json')
        ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
        merge.incorporate(input_path,
                          compo_path,
                          ocr_path,
                          output_root,
                          params=params,
                          resize_by_height=resized_height,
                          show=False)

    print("[UIED complete in %.3fs]" % (time.clock() - start))
    print(time.ctime(), '\n')
Example #9
0
def fun(filename, output):
    input_path_img = filename
    output_root = output

    key_params = {
        'min-grad': 10,
        'ffl-block': 5,
        'min-ele-area': 50,
        'merge-contained-ele': True,
        'max-word-inline-gap': 4,
        'max-line-gap': 4
    }

    resized_height = resize_height_by_longest_edge(input_path_img)

    is_ip = True
    is_clf = True
    is_ocr = True
    is_merge = True
    is_html = True
    is_tes = True
    color = True
    new_html = True

    directory = 'out_' + input_path_img.split('/')[-1].split('.')[0]
    path = os.path.join(output_root, directory).replace('\\', '/')
    os.makedirs(path, exist_ok=True)

    if is_ocr:
        import detect_text_east.ocr_east as ocr
        import detect_text_east.lib_east.eval as eval
        os.makedirs(pjoin(path, 'ocr'), exist_ok=True)
        models = eval.load()
        ocr.east(input_path_img,
                 path,
                 models,
                 key_params['max-word-inline-gap'],
                 resize_by_height=resized_height,
                 show=False)

    if is_ip:
        import detect_compo.ip_region_proposal as ip
        os.makedirs(pjoin(path, 'ip'), exist_ok=True)
        # switch of the classification func
        classifier = None
        ip.compo_detection(input_path_img,
                           path,
                           key_params,
                           classifier=classifier,
                           resize_by_height=resized_height,
                           show=False)
        if is_clf:
            classifier = {}
            from cnn.CNN import CNN
            # classifier['Image'] = CNN('Image')
            classifier['Elements'] = CNN('Elements')
            # classifier['Noise'] = CNN('Noise')
        ip.compo_detection(input_path_img,
                           path,
                           key_params,
                           classifier=classifier,
                           resize_by_height=resized_height,
                           show=False)

    if is_merge:
        import merge
        name = input_path_img.split('/')[-1][:-4]
        #print(name)
        compo_path = pjoin(path, 'ip', str(name) + '.json')
        ocr_path = pjoin(path, 'ocr', str(name) + '.json')
        merge.incorporate(input_path_img,
                          compo_path,
                          ocr_path,
                          path,
                          params=key_params,
                          resize_by_height=resized_height,
                          show=False)

    if is_html:
        from obj.Compos_DF import ComposDF
        # from obj.Compo_HTML import *
        # from obj.List import *
        # from obj.Block import *

        # from obj.Group import *
        # from obj.React import *
        # from obj.Page import *
        # from obj.Tree import *
        import lib.draw as draw
        from lib.list_item_gethering import gather_lists_by_pair_and_group

        # add path to compos.json name = 'data/input/wireframe/o3/compo'
        try:
            compos = ComposDF(json_file=path + '/compo'
                              '.json',
                              img_file=input_path_img)
            img = compos.img.copy()
            img_shape = compos.img_shape
            img_re = cv2.resize(img, img_shape)

            # ***Step 1*** repetitive list recognition
            compos.repetitive_group_recognition()
            compos.pair_groups()
            compos.list_item_partition()

            # ***Step 2*** mark compos in same group as a single list, mark compos in same group_pair as a multiple list
            lists, non_listed_compos = gather_lists_by_pair_and_group(
                compos.compos_dataframe[1:])
            generate_lists_html_css(lists)

            # ***Step 3*** slice compos as blocks
            compos_html = [li.list_obj for li in lists] + non_listed_compos
            blocks, non_blocked_compos = slice_blocks(compos_html, 'v')

            # ***Step 4*** assembly html and css as web page, and react program
            html, css = export_html_and_css(blocks, path + '/page')
            blk, index = export_react_program(blocks, path + '/react')
            tree = export_tree(blocks, path + '/tree')

            #ADD PATH
            print("CONVERT TO HTML SAVED TO PATH:", path + '/page')
        except:
            ('cant')
    if is_tes:
        # get current dir
        with open(path + '/compo.json') as f:  # make this path variable
            #parse json
            data = json.load(f)
            # get clips
            for i in data['compos']:
                if i['clip_path'] == "REDUNDANT":
                    continue
                else:
                    clip_path = i['clip_path']
                    # get path of project directory +"tesseract E:\\smart-ui\\uied\\final\\"  E:\\smart-ui\\uied\\final\\
                    command = 'cmd /c ' + "tesseract " + clip_path.replace(
                        "/", "\\") + " stdout -l eng > temp.txt"
                    os.system(command)  #"E:\\smart-ui\\uied\\final\\
                    a = open("temp.txt", "r")
                    var = a.read()
                    # set var
                    i["ocr"] = var
            # make new json
            with open(path + '/compo_ocr.json', 'w') as json_file:
                json.dump(data, json_file)
    if color:

        #print(km.cluster_centers_[0][0])
        with open(path + '/compo_ocr.json') as f:
            data = json.load(f)
            for i in data['compos']:
                if i['clip_path'] == "REDUNDANT":
                    continue
                else:
                    clip_path = i['clip_path']  #"E:\\smart-ui\\uied\\final\\"+
                    img = cv2.imread(clip_path.replace(
                        "/", "\\"))  ### set directory path
                    #rgb = img[0][0];
                    all_pixels = img.reshape((-1, 3))
                    from sklearn.cluster import KMeans
                    k = 2
                    km = KMeans(n_clusters=k)
                    km.fit(all_pixels)
                    rgb = km.cluster_centers_[0]
                    rgb = rgb[::-1]
                    rgb = rgb.astype(int)
                    i["color"] = '#%02x%02x%02x' % tuple(rgb)
            with open(path + '/compo_html.json', 'w') as json_file:
                json.dump(data, json_file)

    if new_html:
        htmltext = """<!DOCTYPE html>
        <html>
        <head>
        <title>HTML, CSS and JavaScript demo</title>
        </head>
        <body>"""
        char = ['\n', '\f', '\\', '/', ']', '[', '(', ")"]
        with open(path + '/compo_html.json') as f:  # make this path variable
            #parse json
            data = json.load(f)
            # get clips
            for i in data['compos']:
                if i['clip_path'] == "REDUNDANT":
                    continue
                else:
                    div = '<div style="background-color:' + i[
                        'color'] + '; position: absolute; top:' + str(
                            i["row_min"]) + 'px; left:' + str(
                                i["column_min"]
                            ) + 'px; border:3px solid black; height:' + str(
                                i["height"]) + 'px; width:' + str(
                                    i["width"]) + 'px;">' + ''.join([
                                        c for c in i['ocr'] if c not in char
                                    ]) + '</div>'
                    htmltext = htmltext + div
            htmltext = htmltext + "</body></html>"
            Html_file = open(path + '/output.html', "w", encoding="utf-8")
            Html_file.write(htmltext)
            Html_file.close()
        #print(htmltext)

        #json output
    st.header("Generated Json ")
    with open(path + '/compo_html.json') as f:
        data = json.load(f)
    st.write(data)

    st.header("Generated Json with classification details")
    with open(path + '/ip/clf_' + name + '.json') as f:
        data = json.load(f)
        for i in data['compos']:
            if i['class'] == "Background":
                continue
            else:
                clas = i['class']
                fid = i['id']
                i['clip_path'] = path + '/new/' + clas + '/' + str(
                    fid) + '.jpg'
    st.write(data)

    # st.write(json.dumps(data))

    st.header("Generated Html")
    hp = path + '/output.html'
    HtmlFile = open(hp, 'r', encoding='utf-8')
    source_code = HtmlFile.read()
    st.markdown(source_code, unsafe_allow_html=True)

    #image output
    st.header("Output Classification")
    imagee = cv2.imread(path + "/ip/result.jpg")
    #cv2.imshow('Image', imagee)
    st.image(imagee, caption='annotated result')