def update_href_link_suffix_in_content(content: str, output_suffix: str, links_to_update: Iterable[Path]) -> str: """ Update file extensions for href links in the provided content str. The links provided will be searched for in the content and the file extensions on those links will be updated to the new output_suffix value. Parameters ---------- content : str string containing href links to be updated output_suffix : str string for the new suffix including the leading '.' for example '.md' links_to_update : Iterable[Path] iterable of path objects where each path is checked as being a href link path in the provided content Returns ------- str: The updated content """ soup = BeautifulSoup(content, 'html.parser') for a_tag in soup.findAll(href=True): url_path = Path(urlparse(a_tag['href']).path) if url_path in links_to_update: link = helper_functions.path_to_posix_str(url_path.with_suffix(output_suffix)) a_tag['href'] = link return str(soup)
def update_html_link_src(content: str, old_name: str, new_name: Path) -> str: soup = BeautifulSoup(content, 'html.parser') for a_tag in soup.findAll(href=True): url_path = urlparse(a_tag['href']).path if url_path == old_name: a_tag['href'] = helper_functions.path_to_posix_str(new_name) # do not return early after finding link as content may have more than one link to the renamed file return str(soup)
def update_markdown_link_src(content: str, old_name: str, new_name: Path) -> str: tags = re.findall(r'\[.*?]\(.*?\)', content) if not tags: return content for tag in tags: src = tag.rsplit('(', 1)[1].rstrip(')') if src == old_name: new_image_tag = tag.replace(old_name, helper_functions.path_to_posix_str(new_name)) content = content.replace(tag, new_image_tag) return content
def test_set_pandoc_path(monkeypatch, is_linux, if_frozen, expected_ends_with): def mock_is_system_linux(_ignored): return is_linux def mock_is_this_a_frozen_package(_ignored): return if_frozen cs = conversion_settings.ConversionSettings() pandoc_processor = pandoc_converter.PandocConverter(cs) monkeypatch.setattr(pandoc_converter.PandocConverter, 'is_system_linux', mock_is_system_linux) monkeypatch.setattr(pandoc_converter.PandocConverter, 'is_this_a_frozen_package', mock_is_this_a_frozen_package) pandoc_processor.set_pandoc_path() found_path_as_posix_string = helper_functions.path_to_posix_str( Path(pandoc_processor._pandoc_path)) assert found_path_as_posix_string.endswith(expected_ends_with)
def clean_html_image_tag(tag, src_path=None): """ Generate a clean tag object attrs dictionary Process the tag object to retrieve alt text, width and src if present. If a value for src_path is not provided the src in the tag will be used, else src_path is used. If the img tag does not contain alt or width they will not be in the returned dict. If src is missing and src_path is not provided src is set to "". then and return a attrs dictionary with only the values for these 3 items that can be used to update the tag. Parameters ========== tag : bs4.Tag img tag to be processed for data src_path : str path to the image content if not provided the src from the img tag will be used Returns ======= dict : attrs dict that can be used to replace an existing attrs dictionary of an img tag - "tag.attrs = new_attrs" """ if not src_path: src_path = tag.attrs.get('src', "") src_path = helper_functions.path_to_posix_str(src_path) new_attrs = {'src': src_path} if 'width' in tag.attrs: new_attrs['width'] = tag.attrs['width'] if 'height' in tag.attrs: new_attrs['height'] = tag.attrs['height'] if 'alt' in tag.attrs: clean_alt = tag.attrs['alt'] clean_alt = clean_alt.replace('[', '') clean_alt = clean_alt.replace(']', '') new_attrs['alt'] = clean_alt return new_attrs
def generate_obsidian_image_markdown_link(tag) -> Optional[str]: """ Generate an obsidian image markdown link string. Use the values in the tag.attrs dict to populate a obsidian image link and return as a string. The source path in the returned link is formatted as a posix path (forward slashes). Parameters ========== tag : bs4.Tag an img tag element Returns ======= str : if width in html tag returns obsidian markdown formatted image link None : if width is not in the image tag - no need to format for obsidian """ width = tag.attrs.get('width', '') if not width: return height = tag.attrs.get('height', '') alt = tag.attrs.get('alt', '') alt = alt.replace('[', '') alt = alt.replace(']', '') src = tag.attrs.get('src', '') src = helper_functions.path_to_posix_str(src) height_string = '' if height: height_string = f'x{height}' with_and_height = f'{width}{height_string}' obsidian_img_tag_markdown = f'![{alt}|{with_and_height}]({src})' return obsidian_img_tag_markdown
def update_content_with_new_link(old_path, new_path, content: str) -> str: """Update provided content string with a new path to replace an old path""" return content.replace(helper_functions.path_to_posix_str(old_path), helper_functions.path_to_posix_str(new_path))
def test_path_to_posix_str(expected, path): result = helper_functions.path_to_posix_str(path) assert result == expected