コード例 #1
0
ファイル: fix_wordpress_images.py プロジェクト: Leicas/blog
def replace_fn_no_caption_tag(find_str: str) -> str:
    print(find_str)
    caption_start_pos = power_edit.find_nth_match(find_str, '[', 2) + 1
    print(f'caption_start_pos = {caption_start_pos}')
    caption_stop_pos = power_edit.strict_find(find_str, ']', caption_start_pos)
    print(f'caption_stop_pos = {caption_stop_pos}')
    caption = find_str[caption_start_pos:caption_stop_pos]
    # Convert all " to '
    caption = caption.replace('"', "'")
    print(f'caption = {caption}')

    url_start_pos = caption_stop_pos + 2
    url_stop_pos = power_edit.strict_find(find_str, ')', caption_stop_pos)
    url = find_str[url_start_pos: url_stop_pos]
    print(f'url = {url}')

    # Substitue values into template
    template = '{{< figure src="<URL>" caption="<CAPTION>" caption-position="bottom" >}}'
    template = template.replace('<URL>', url)
    template = template.replace('<CAPTION>', caption)

    global num_replacements
    num_replacements += 1

    return template
コード例 #2
0
def replace_fn_single_pic(find_str: str, file_path: str):
    print(f'find_str = {find_str}, file_path = {file_path}')

    id_start_pos = power_edit.strict_find(find_str, 'id=', 0) + 3
    print(f'id_start_pos = {id_start_pos}')

    id_stop_pos = power_edit.strict_find(find_str, ' ', id_start_pos)
    print(f'id_stop_pos = {id_stop_pos}')

    id = find_str[id_start_pos:id_stop_pos]
    print(f'id = {id}')

    # Calculate width
    try:
        width_start_pos = power_edit.strict_find(find_str, 'w=', 0) + 2
        width_stop_pos = power_edit.strict_find(find_str, ' ', width_start_pos)
        img_width = find_str[width_start_pos:width_stop_pos]
    except RuntimeError:
        img_width = None

    print(f'img_width = {img_width}')

    # Calculate the URL
    url_frag_start_pos = power_edit.strict_find(file_path, 'content\\pages\\',
                                                0) + 14
    print(f'url_frag_start_pos = {url_frag_start_pos}')

    url_frag = file_path[url_frag_start_pos:-10]
    print(f'url_frag = {url_frag}')

    full_url = 'http://blog.mbedded.ninja/' + url_frag.replace('\\', '/')
    print(f'full_url = {full_url}')

    #================================
    # CHECK IF HTML ALREADY SAVED
    #================================

    html_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data',
                            'html')
    # Make path safe
    file_name = full_url.replace('/', '')
    file_name = file_name.replace(':', '')
    file_name = file_name + '.html'
    file_path = os.path.join(html_dir, file_name)
    print(f'file_path = {file_path}')

    if os.path.isfile(file_path):
        print('HTML already downloaded.')
        with open(file_path, 'r', encoding='utf-8') as the_file:
            html = the_file.read()
    else:
        print('HTML not downloaded, so downloading...')
        response = requests.get(full_url)
        html = response.content.decode('utf-8')
        with open(file_path, 'w+', encoding='utf-8') as the_file:
            the_file.write(html)

    # print(f'html = {html}')

    #===================================
    # EXTRACT HTML ELEMENTS
    #===================================

    parser = AdvancedHTMLParser.AdvancedHTMLParser()
    parser.parseStr(html)
    elements = parser.getElementsByAttr('data-image-id', id)
    print(f'elements = {elements}')
    assert len(
        elements
    ) >= 1  # We could find the same image multiple times on the page, this is o.k.
    image_url = elements[0].href

    caption = elements[0].getAttribute('data-description')
    caption = caption.replace('"', "'")  # Remove any " from caption
    print(f'caption = {caption}')

    #===================================
    # IMAGE FINDING/DOWNLOADING
    #===================================

    img_file_name = image_url.split('/')[-1]
    print(f'img_file_name = {img_file_name}')
    img_sec_path = image_url[45:]
    print(f'img_sec_path = {img_sec_path}')
    img_base_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                'data', 'img')
    print(f'img_base_dir = {img_base_dir}')
    img_path = os.path.join(img_base_dir, img_sec_path.replace('/', '\\'))
    print(f'img_path = {img_path}')

    if os.path.isfile(img_path):
        print(f'Image alredy downloaded')
    else:
        print(f'Image not downloaded, downloading...')
        os.makedirs(os.path.dirname(img_path), exist_ok=True)
        r = requests.get(image_url, stream=True)
        if r.status_code == 200:
            with open(img_path, 'wb') as f:
                r.raw.decode_content = True
                shutil.copyfileobj(r.raw, f)

    #=============================
    # COPY TO HUGO IF NOT ALREADY THERE
    #=============================

    hugo_image_base_path = os.path.join(
        'C:\\Users\\gbmhu\\code\\Hugo\\quickstart\\static\\images\\')
    hugo_image_path = os.path.join(hugo_image_base_path,
                                   img_sec_path.replace('/', '\\'))

    if os.path.isfile(hugo_image_path):
        print(f'Image already exists in Hugo, not copying.')
    else:
        print('Copying image to hugo site.')
        os.makedirs(os.path.dirname(hugo_image_path), exist_ok=True)
        shutil.copyfile(img_path, hugo_image_path)

    #===========================
    # CREATE REPLACEMENT TEXT
    #===========================

    hugo_rel_img_path = '/images/' + img_sec_path
    print(f'hugo_rel_img_path = {hugo_rel_img_path}')

    template = '{{< figure <URL> <CAPTION> caption-position="bottom" <WIDTH> >}}'
    template = template.replace('<URL>', 'src="' + hugo_rel_img_path + '"')

    if caption is not None:
        template = template.replace('<CAPTION>', 'caption="' + caption + '"')
    else:
        template = template.replace('<CAPTION>', '')

    if img_width is None:
        template = template.replace('<WIDTH>', '')
    else:
        template = template.replace('<WIDTH>', 'width="' + img_width + 'px"')

    print(f'replacment_text = {template}')

    global num_replacements
    num_replacements += 1

    return template