def devide_jpgs(jpg_dir, norm_img_size=True):
    '''カラーならc.jpg、グレーでよければグレスケ化してg.jpgで保存'''
    out_dir = utils.make_outdir(jpg_dir, NEW_JPG_DIR)
    for jpg_path in utils.get_path_list(jpg_dir, 'jpg'):
        img = cv2.imread(jpg_path)
        if img is None:
            continue

        # norm_img_sizeが指定されていればリサイズ
        if norm_img_size:
            img = utils.img_resize(img, max_height=config.MAX_HEIGHT)

        if is_color_img(img):
            cv2.imwrite(
                join(out_dir,
                     basename(jpg_path).replace('.jpg', 'c.jpg')), img)
        else:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            cv2.imwrite(
                join(out_dir,
                     basename(jpg_path).replace('.jpg', 'g.jpg')), img)
Exemple #2
0
if args.with_subtitle or args.shave_subtitle:
    good_cons_num_y += 1

# 出力ディレクトリ・パスを準備
outdir_name = '2_paint_out'
output_path = utils.make_outdir(image_dir, outdir_name)

output_koma_path = utils.make_outdir(output_path, '0_koma')
if len(os.listdir(output_koma_path)) >= 3:
    shutil.rmtree(output_path)
    output_path = utils.make_outdir(image_dir, outdir_name)
    output_koma_path = utils.make_outdir(output_path, '0_koma')
output_shaved_path = utils.make_outdir(output_koma_path, '0_padding_shave')

# paint_out処理: 1st
img_path_list = utils.get_path_list(image_dir, args.ext)
print('pages:', len(img_path_list) - (args.start + args.end))
with utils.timer('paint_out処理: 1st 切り抜き位置が求められた画像を切り抜き'):
    odd_cp_list = []  # 奇数idxページのカットポイントを格納
    even_cp_list = []  # 偶数idxページのカットポイントを格納
    not_cut_img_path_dict = {}
    exec_paint_out_cut(img_path_list, kind='1st')

    # 平均切り出し座標を算出
    even_page_cp = find_average_point(even_cp_list)
    odd_page_cp = find_average_point(odd_cp_list)

print('lens', len(img_path_list) - len(not_cut_img_path_dict))

# 平均切り出し座標から画像を切り出すループ
if not_cut_img_path_dict:
Exemple #3
0
# 1ページの画像から各コマを切り出す処理
import os
import sys

import cv2

from cut import AverageDiffCut
import utils

if __name__ == '__main__':
    image_dir = utils.check_argv_path(sys.argv)
    outdir_name = 'koma'
    output_path = utils.make_outdir(image_dir, outdir_name)
    img_path_list = utils.get_path_list(image_dir, 'jpg')

    adc = AverageDiffCut()

    # 切り出し座標=カットポイント(cp)を探すためのループ
    print('切り出し座標を検出しています')
    odd_cp_list = []  # 奇数idxページのカットポイントを格納
    even_cp_list = []  # 偶数idxページのカットポイントを格納
    not_cut_img_path_dict = {}
    for idx, img_path in enumerate(img_path_list):
        img = cv2.imread(img_path)
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # 切り出し座標の探索
        cp_dict = adc.search_cut_point(img_gray)

        print(idx, cp_dict, len(cp_dict['x']), len(cp_dict['y']))
        # 切り出し座標が規定の数だけ帰ってきたらリストに追加
            # エラーが出たら( ˘ω˘)スヤァ
            print(e)
            req_count += 1
            if req_count < 5:
                # sleep_timeが徐々に伸びるように設定
                sleep_time = 2 ** req_count
                print('retry after {} second'.format(sleep_time))
                sleep(sleep_time)
            else:
                # 呼び出し元でエラー内容等を表示
                raise
    return

if __name__ == '__main__':
    pdf_dir = utils.check_argv_path(sys.argv)
    pdf_path_list = utils.get_path_list(pdf_dir, 'pdf')
    # 処理済みのファイルを雑に判別して除外
    # pdf_path_list = [p for p in pdf_path_list if not os.path.basename(p).startswith('[')]
    amazon_url_list = []
    for pdf_path in pdf_path_list:
        isbn = pdf_to_isbn(pdf_path)
        if isbn:
            try:
                amazon_items = fetch_amazon_item(isbn)
            except HTTPError as e:
                print('情報の取得に失敗しました:', pdf_path, isbn)
                continue
            if amazon_items:
                amazon_url = get_amazon_url(amazon_items)
                print(amazon_url)
                amazon_url_list.append(amazon_url)
Exemple #5
0
def pdf_to_page1(pdf_path):
    with open(pdf_path, 'rb') as fp:
        parser, document = prev_pdf_parser(fp)

        # Check if the document allows text extraction. If not, abort.
        if not document.is_extractable:
            raise PDFException
        # 最初のページを取得
        page1 = PDFPage.create_pages(document).__next__()

        layouts = prev_pdf_layout(page1)
        for layout in layouts:
            for thing in layout:
                save_image(thing,
                           dirname(pdf_path),
                           TMP_FILENAME_PAGE1,
                           max_width=MAX_WIDTH_JPG1)

    return join(dirname(pdf_path), TMP_FILENAME_PAGE1)


if __name__ == '__main__':
    pdf_dir = utils.check_argv_path(sys.argv)
    pdf_path_list = utils.get_path_list(pdf_dir, 'pdf')
    pdf_path_list = [path for path in pdf_path_list if not os.path.isdir(path)]
    pdf_to_jpg_dir = utils.make_outdir(pdf_dir, PDF_TO_JPG_DIR)

    print('pdf files:', len(pdf_path_list))
    for pdf_path in pdf_path_list:
        pdf_to_jpg(pdf_path)
Exemple #6
0
               for v in text['boundingPoly']['vertices']]
        draw.line(box + [box[0]], width=2, fill=color)
    return img


if __name__ == '__main__':
    image_dir = Path(utils.check_argv_path(sys.argv))

    key_file_path = '/Users/esuji/Dropbox/program/cvtest/yonkoma2data/src/MyFirstProject-b8427fd28b8d.json'
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = key_file_path

    # Instantiates a client
    client = vision.ImageAnnotatorClient()

    pickle_dir = utils.make_outdir(image_dir / '..', 'pickles')
    image_path_list = sorted(utils.get_path_list(image_dir, '.jpg'))

    ta_list = []
    for idx_koma, image_path in enumerate(image_path_list[100:110]):
        print(idx_koma, image_path)

        try:
            image = get_image(image_path)
            ta = get_response_text_annotations(image)
            ta_list.append({
                'image_path': image_path,
                'text_annotation': list(ta)
            })
            print(ta[0].description)
        except Exception as e:
            print(e)