Exemple #1
0
def update_href_link_suffix_in_content(content: str, output_suffix: str, links_to_update: Iterable[Path]) -> str:
    """
    Update file extensions for href links in the provided content str.

    The links provided will be searched for in the content and the file extensions on those links will be updated to
    the new output_suffix value.

    Parameters
    ----------
    content : str
        string containing href links to be updated
    output_suffix :  str
        string for the new suffix including the leading '.' for example '.md'
    links_to_update : Iterable[Path]
        iterable of path objects where each path is checked as being a href link path in the provided content

    Returns
    -------
    str:
        The updated content

    """
    soup = BeautifulSoup(content, 'html.parser')
    for a_tag in soup.findAll(href=True):
        url_path = Path(urlparse(a_tag['href']).path)
        if url_path in links_to_update:
            link = helper_functions.path_to_posix_str(url_path.with_suffix(output_suffix))
            a_tag['href'] = link

    return str(soup)
Exemple #2
0
def update_html_link_src(content: str, old_name: str, new_name: Path) -> str:
    soup = BeautifulSoup(content, 'html.parser')
    for a_tag in soup.findAll(href=True):
        url_path = urlparse(a_tag['href']).path
        if url_path == old_name:
            a_tag['href'] = helper_functions.path_to_posix_str(new_name)
            # do not return early after finding link as content may have more than one link to the renamed file

    return str(soup)
Exemple #3
0
def update_markdown_link_src(content: str, old_name: str, new_name: Path) -> str:
    tags = re.findall(r'\[.*?]\(.*?\)', content)

    if not tags:
        return content

    for tag in tags:
        src = tag.rsplit('(', 1)[1].rstrip(')')
        if src == old_name:
            new_image_tag = tag.replace(old_name, helper_functions.path_to_posix_str(new_name))
            content = content.replace(tag, new_image_tag)

    return content
Exemple #4
0
def test_set_pandoc_path(monkeypatch, is_linux, if_frozen, expected_ends_with):
    def mock_is_system_linux(_ignored):
        return is_linux

    def mock_is_this_a_frozen_package(_ignored):
        return if_frozen

    cs = conversion_settings.ConversionSettings()
    pandoc_processor = pandoc_converter.PandocConverter(cs)
    monkeypatch.setattr(pandoc_converter.PandocConverter, 'is_system_linux',
                        mock_is_system_linux)
    monkeypatch.setattr(pandoc_converter.PandocConverter,
                        'is_this_a_frozen_package',
                        mock_is_this_a_frozen_package)
    pandoc_processor.set_pandoc_path()

    found_path_as_posix_string = helper_functions.path_to_posix_str(
        Path(pandoc_processor._pandoc_path))
    assert found_path_as_posix_string.endswith(expected_ends_with)
Exemple #5
0
def clean_html_image_tag(tag, src_path=None):
    """
    Generate a clean tag object attrs dictionary

    Process the tag object to retrieve alt text, width and src if present.  If a value for src_path is not
    provided the src in the tag will be used, else src_path is used.
    If the img tag does not contain alt or width they will not be in the returned dict. If src is missing
    and src_path is not provided src is set to "".
    then and return a attrs dictionary with only the values for these 3 items that can be used to update the tag.

    Parameters
    ==========
    tag : bs4.Tag
        img tag to be processed for data
    src_path : str
        path to the image content if not provided the src from the img tag will be used

    Returns
    =======
    dict : attrs dict that can be used to replace an existing attrs dictionary of an img tag - "tag.attrs = new_attrs"

    """
    if not src_path:
        src_path = tag.attrs.get('src', "")

    src_path = helper_functions.path_to_posix_str(src_path)
    new_attrs = {'src': src_path}

    if 'width' in tag.attrs:
        new_attrs['width'] = tag.attrs['width']

    if 'height' in tag.attrs:
        new_attrs['height'] = tag.attrs['height']

    if 'alt' in tag.attrs:
        clean_alt = tag.attrs['alt']
        clean_alt = clean_alt.replace('[', '')
        clean_alt = clean_alt.replace(']', '')
        new_attrs['alt'] = clean_alt

    return new_attrs
Exemple #6
0
def generate_obsidian_image_markdown_link(tag) -> Optional[str]:
    """
    Generate an obsidian image markdown link string.

    Use the values in the tag.attrs dict to populate a obsidian image link and return as a string.  The source path
    in the returned link is formatted as a posix path (forward slashes).

    Parameters
    ==========
    tag : bs4.Tag
        an img tag element

    Returns
    =======
    str : if width in html tag returns obsidian markdown formatted image link
    None : if width is not in the image tag - no need to format for obsidian

    """
    width = tag.attrs.get('width', '')
    if not width:
        return

    height = tag.attrs.get('height', '')

    alt = tag.attrs.get('alt', '')
    alt = alt.replace('[', '')
    alt = alt.replace(']', '')
    src = tag.attrs.get('src', '')
    src = helper_functions.path_to_posix_str(src)

    height_string = ''
    if height:
        height_string = f'x{height}'

    with_and_height = f'{width}{height_string}'
    obsidian_img_tag_markdown = f'![{alt}|{with_and_height}]({src})'

    return obsidian_img_tag_markdown
Exemple #7
0
def update_content_with_new_link(old_path, new_path, content: str) -> str:
    """Update provided content string with a new path to replace an old path"""
    return content.replace(helper_functions.path_to_posix_str(old_path), helper_functions.path_to_posix_str(new_path))
def test_path_to_posix_str(expected, path):
    result = helper_functions.path_to_posix_str(path)

    assert result == expected