def test_dont_touch_absolute_urls(): input = ''' [Homepage](/) [Github](https://github.com/user/repo) [Privacy policy](/privacy) ''' output = rewrite_relative_urls(input, Path('README'), Path('docs/nav.md')) assert output == input
def test_multiline_link(): input = ''' Here's a [link whose text is really long and so is broken across multiple lines](CHANGELOG.md) to the changelog. ''' output = rewrite_relative_urls(input, Path('README'), Path('docs/nav.md')) assert output == '''
def test_relative_link_down(): input = ''' Check [this link](foobar.md) for more information ''' output = rewrite_relative_urls( input, Path('docs/includes/feature_a/index.md'), Path('docs/setup.md') ) assert output == '''
def test_rewrite_relative_urls( markdown, source_path, destination_path, expected_result, ): assert rewrite_relative_urls( markdown, source_path, destination_path, ) == expected_result
def test_link_reference(): input = textwrap.dedent(''' Here's a [link][changelog] to the changelog. [changelog]: CHANGELOG.md ''') output = rewrite_relative_urls(input, Path('README'), Path('docs/nav.md')) assert output == textwrap.dedent(''' Here's a [link][changelog] to the changelog. [changelog]: ../CHANGELOG.md ''')
def found_include_markdown_tag(match): # handle filename parameter and read content filename = match.group('filename') file_path_abs = page_src_path.parent / filename if not file_path_abs.exists(): raise FileNotFoundError('File \'%s\' not found' % filename) text_to_include = file_path_abs.read_text(encoding='utf8') # handle options and regex modifiers _includer_indent = match.group('_includer_indent') arguments_string = match.group("arguments") # boolean options bool_options = { 'rewrite_relative_urls': { 'value': True, 'regex': ARGUMENT_REGEXES['rewrite_relative_urls'] }, 'comments': { 'value': True, 'regex': ARGUMENT_REGEXES['comments'] }, 'preserve_includer_indent': { 'value': False, 'regex': ARGUMENT_REGEXES['preserve_includer_indent'] }, 'dedent': { 'value': False, 'regex': ARGUMENT_REGEXES['dedent'] } } for opt_name, opt_data in bool_options.items(): match = re.search(opt_data['regex'], arguments_string) if match is None: continue try: bool_options[opt_name]['value'] = TRUE_FALSE_STR_BOOL[ match.group(1) or TRUE_FALSE_BOOL_STR[opt_data['value']] ] except KeyError: raise ValueError(('Unknown value for \'%s\'. Possible values ' 'are: true, false') % opt_name) # string options start_match = re.search(ARGUMENT_REGEXES['start'], arguments_string) start = None if start_match is not None: start = process.interpret_escapes(start_match.group(1)) _, _, text_to_include = text_to_include.partition(start) end_match = re.search(ARGUMENT_REGEXES['end'], arguments_string) end = None if end_match is not None: end = process.interpret_escapes(end_match.group(1)) text_to_include, _, _ = text_to_include.partition(end) # Relative URLs rewriting if bool_options['rewrite_relative_urls']['value']: text_to_include = process.rewrite_relative_urls( text_to_include, source_path=file_path_abs, destination_path=page_src_path, ) # Dedent if bool_options['dedent']: text_to_include = textwrap.dedent(text_to_include) # Includer indentation preservation if bool_options['preserve_includer_indent']['value']: text_to_include = ''.join( _includer_indent + line for line in text_to_include.splitlines(keepends=True)) else: text_to_include = _includer_indent + text_to_include # nested includes new_text_to_include = get_file_content(text_to_include, file_path_abs) if new_text_to_include != text_to_include: text_to_include = new_text_to_include if not bool_options['comments']['value']: return text_to_include return ( _includer_indent + '<!-- BEGIN INCLUDE %s %s %s -->\n' % ( filename, html.escape(start or ''), html.escape(end or '') ) + text_to_include + '\n' + _includer_indent + '<!-- END INCLUDE -->' )
def found_include_markdown_tag(match): directive_match_start = match.start() _includer_indent = match.group('_includer_indent') filename, raw_filename = parse_filename_argument(match) if filename is None: lineno = lineno_from_content_start( markdown, directive_match_start, ) logger.error( "Found no path passed including with 'include-markdown'" f' directive at {os.path.relpath(page_src_path, docs_dir)}' f':{lineno}', ) return '' arguments_string = match.group('arguments') if os.path.isabs(filename): file_path_glob = filename else: file_path_glob = os.path.join( os.path.abspath(os.path.dirname(page_src_path)), filename, ) exclude_match = re.search( ARGUMENT_REGEXES['exclude'], arguments_string, ) if exclude_match is None: ignore_paths = [] else: exclude_string = parse_string_argument(exclude_match) if exclude_string is None: lineno = lineno_from_content_start( markdown, directive_match_start, ) logger.error( "Invalid empty 'exclude' argument in 'include-markdown'" f' directive at {os.path.relpath(page_src_path, docs_dir)}' f':{lineno}', ) ignore_paths = [] else: if os.path.isabs(exclude_string): exclude_globstr = exclude_string else: exclude_globstr = os.path.realpath( os.path.join( os.path.abspath(os.path.dirname(page_src_path)), exclude_string, ), ) ignore_paths = glob.glob(exclude_globstr) file_paths_to_include = process.filter_paths( glob.iglob(file_path_glob, recursive=True), ignore_paths=ignore_paths, ) if not file_paths_to_include: lineno = lineno_from_content_start( markdown, directive_match_start, ) logger.error( f"No files found including '{raw_filename}' at" f' {os.path.relpath(page_src_path, docs_dir)}' f':{lineno}', ) return '' bool_options = { 'rewrite-relative-urls': { 'value': True, 'regex': ARGUMENT_REGEXES['rewrite-relative-urls'], }, 'comments': { 'value': True, 'regex': ARGUMENT_REGEXES['comments'], }, 'preserve-includer-indent': { 'value': True, 'regex': ARGUMENT_REGEXES['preserve-includer-indent'], }, 'dedent': { 'value': False, 'regex': ARGUMENT_REGEXES['dedent'], }, 'trailing-newlines': { 'value': True, 'regex': ARGUMENT_REGEXES['trailing-newlines'], }, } for arg_name, arg in bool_options.items(): match = re.search(arg['regex'], arguments_string) if match is None: continue try: bool_options[arg_name]['value'] = TRUE_FALSE_STR_BOOL[ match.group(1) or TRUE_FALSE_BOOL_STR[arg['value']]] except KeyError: lineno = lineno_from_content_start( markdown, directive_match_start, ) logger.error( f"Invalid value for '{arg_name}' argument of" " 'include-markdown' directive at" f' {os.path.relpath(page_src_path, docs_dir)}' f':{lineno}. Possible values are true or false.', ) return '' # start and end arguments start_match = re.search(ARGUMENT_REGEXES['start'], arguments_string) if start_match: start = parse_string_argument(start_match) if start is None: lineno = lineno_from_content_start( markdown, directive_match_start, ) logger.error( "Invalid empty 'start' argument in 'include-markdown'" f' directive at {os.path.relpath(page_src_path, docs_dir)}' f':{lineno}', ) else: start = None end_match = re.search(ARGUMENT_REGEXES['end'], arguments_string) if end_match: end = parse_string_argument(end_match) if end is None: lineno = lineno_from_content_start( markdown, directive_match_start, ) logger.error( "Invalid empty 'end' argument in 'include-markdown'" f' directive at {os.path.relpath(page_src_path, docs_dir)}' f':{lineno}', ) else: end = None # heading offset offset = 0 offset_match = re.search( ARGUMENT_REGEXES['heading-offset'], arguments_string, ) if offset_match: offset += int(offset_match.group(1)) text_to_include = '' # if any start or end strings are found in the included content # but the arguments are specified, we must raise a warning # # `True` means that no start/end strings have been found in content # but they have been specified, so the warning(s) must be raised expected_but_any_found = [start is not None, end is not None] for file_path in file_paths_to_include: with open(file_path, encoding='utf-8') as f: new_text_to_include = f.read() if start is not None or end is not None: new_text_to_include, *expected_not_found = ( process.filter_inclusions( start, end, new_text_to_include, )) for i in range(2): if expected_but_any_found[i] and not expected_not_found[i]: expected_but_any_found[i] = False # nested includes new_text_to_include = get_file_content( new_text_to_include, file_path, docs_dir, ) # trailing newlines right stripping if not bool_options['trailing-newlines']['value']: new_text_to_include = process.rstrip_trailing_newlines( new_text_to_include, ) # relative URLs rewriting if bool_options['rewrite-relative-urls']['value']: new_text_to_include = process.rewrite_relative_urls( new_text_to_include, source_path=file_path, destination_path=page_src_path, ) # dedent if bool_options['dedent']: new_text_to_include = textwrap.dedent(new_text_to_include) # includer indentation preservation if bool_options['preserve-includer-indent']['value']: new_text_to_include = ''.join( _includer_indent + line for line in new_text_to_include.splitlines(keepends=True)) else: new_text_to_include = _includer_indent + new_text_to_include if offset_match: new_text_to_include = process.increase_headings_offset( new_text_to_include, offset=offset + cumulative_heading_offset, ) text_to_include += new_text_to_include # warn if expected start or ends haven't been found in included content for i, argname in enumerate(['start', 'end']): if expected_but_any_found[i]: value = locals()[argname] readable_files_to_include = ', '.join([ os.path.relpath(fpath, docs_dir) for fpath in file_paths_to_include ]) plural_suffix = 's' if len(file_paths_to_include) > 1 else '' lineno = lineno_from_content_start( markdown, directive_match_start, ) logger.warning( f"Delimiter {argname} '{value}' of 'include-markdown'" f' directive at {os.path.relpath(page_src_path, docs_dir)}' f':{lineno} not detected in the file{plural_suffix}' f' {readable_files_to_include}', ) if not bool_options['comments']['value']: return text_to_include separator = '\n' if bool_options['trailing-newlines']['value'] else '' start_end_part = html.escape(start or '') if start_end_part: start_end_part += ' ' start_end_part += html.escape(end or '') if start_end_part: start_end_part += ' ' return (f'{_includer_indent}<!-- BEGIN INCLUDE {filename}' f' {start_end_part}-->{separator}{text_to_include}' f'{separator}{_includer_indent}<!-- END INCLUDE -->')
def test_dont_touch_mailto_urls(): input = '''[contact us](mailto:[email protected])''' output = rewrite_relative_urls(input, Path('README'), Path('docs/nav.md')) assert output == input
def test_relative_link(): input = ''' Here's a [link](CHANGELOG.md) to the changelog. ''' output = rewrite_relative_urls(input, Path('README'), Path('docs/nav.md')) assert output == '''
def test_image_inside_link(): input = ''' Build status: [![Build Status](badge.png)](build/) ''' output = rewrite_relative_urls(input, Path('README'), Path('docs/home.md')) assert output == '''
def test_image(): input = ''' Here's a diagram: ![diagram](assets/diagram.png) ''' output = rewrite_relative_urls(input, Path('README'), Path('docs/home.md')) assert output == '''