예제 #1
0
def correct_hyperlinks(book_dir=BOOK_PATH, dest=None, include_tags=None,
                       ext='.nlpiabak', skip_untitled=True):
    """ DEPRECATED (see translate_line_footnotes)

    Find bad footnotes (only urls), visit the page, add the title to the footnote

    >>> len(correct_hyperlinks(book_dir=BOOK_PATH, dest='cleaned_hyperlinks'))
    2
    >>> rm_rf(os.path.join(BOOK_PATH, 'cleaned_hyperlinks'))
    """
    # bad_url_lines = find_all_bad_footnote_urls(book_dir=book_dir)
    # file_line_maps = []
    return translate_book(translators=HyperlinkStyleCorrector().translate,
                          book_dir=book_dir, dest=dest, include_tags=include_tags,
                          ext=ext, skip_untitled=skip_untitled)
예제 #2
0
def translate_book(translators=(HyperlinkStyleCorrector().translate,
                                translate_line_footnotes),
                   book_dir=BOOK_PATH,
                   dest=None,
                   include_tags=None,
                   ext='.nlpiabak',
                   skip_untitled=True):
    """ Fix any style corrections listed in `translate` list of translation functions

    >>> len(translate_book(book_dir=BOOK_PATH, dest='cleaned_hyperlinks'))
    3
    >>> rm_rf(os.path.join(BOOK_PATH, 'cleaned_hyperlinks'))
    """
    if callable(translators) or not hasattr(translators, '__len__'):
        translators = (translators, )

    sections = get_tagged_sections(book_dir=book_dir,
                                   include_tags=include_tags)
    file_line_maps = []

    for fileid, (filepath, tagged_lines) in enumerate(sections):
        log.info('filepath={}'.format(filepath))
        destpath = filepath
        if not dest:
            copyfile(filepath, filepath + '.' + ext.lstrip('.'))
        elif os.path.sep in dest:
            destpath = os.path.join(dest, os.path.basename(filepath))
        else:
            destpath = os.path.join(os.path.dirname(filepath), dest,
                                    os.path.basename(filepath))
        ensure_dir_exists(os.path.dirname(destpath))
        with open(destpath, 'w') as fout:
            log.info('destpath={}'.format(destpath))
            for lineno, (tag, line) in enumerate(tagged_lines):
                if (include_tags is None or tag in include_tags or any(
                    (tag.startswith(t) for t in include_tags))):
                    for translate in translators:
                        new_line = translate(
                            line
                        )  # TODO: be smarter about writing to files in-place
                        if line != new_line:
                            file_line_maps.append((fileid, lineno, filepath,
                                                   destpath, line, new_line))
                            line = new_line
                fout.write(line)
    return file_line_maps