Beispiel #1
0
    # catch label and screenshot img and segment them into smaller size
    img, label = None, None
    catch_success = False
    if is_catch_element and '.com' in links.iloc[index]:
        # set the format of libel
        libel_format = pd.read_csv(os.path.join(data_position, 'format.csv'),
                                   index_col=0)
        url = 'http://' + links.iloc[index] if 'http://' not in links.iloc[
            index] else links.iloc[index]
        try:
            img, label = catch.catch(url, label_path, img_org_path,
                                     libel_format, driver_path)
        except FunctionTimedOut:
            print('Catch Time Out')
            continue

    # segment the lengthy images
    if is_segment and img is not None:
        seg.segment_img(img, 600, img_segment_path, 0)

    # read and draw label on segment img
    if is_draw_label and img is not None and label is not None:
        draw.label(label, img, img_drawn_path)

    end_time = time.clock()
    print("*** %d Time taken:%ds ***\n" % (index, int(end_time - start_time)))

    if index > end_pos:
        break
    if browser == 'PhantomJS':
        driver = webdriver.PhantomJS(
            executable_path=os.path.join(driver_path, 'phantomjs.exe'))
    elif browser == 'Chrome':
        options = webdriver.ChromeOptions()
        options.add_argument(
            '--headless')  # do not show the browser every time
        driver = webdriver.Chrome(executable_path=os.path.join(
            driver_path, 'chromedriver.exe'),
                                  options=options)

    # set the format of label
    label_format = pd.read_csv('data/format.csv', index_col=0)
    try:
        img, label = catch.catch(url, out_html, out_elements, out_img,
                                 label_format, driver)
        # read and draw label on segment img
        if is_draw_label and img is not None and label is not None:
            img_drawn_path = 'data/0_drawn.png'
            draw.label(label, img, img_drawn_path)
        # segment the lengthy images
        if is_segment and img is not None:
            img_segment_dir = '/segment'
            seg.segment_img(img, 600, img_segment_dir, 0)

    except FunctionTimedOut:
        print('Catch Time Out')

end_time = time.clock()
print("*** Time taken:%ds ***" % int(end_time - start_time))