コード例 #1
0
ファイル: index.py プロジェクト: XIONGJUNHAN/ArchiveBox
def load_links_index(out_dir=OUTPUT_DIR, import_path=None):
    """parse and load existing index with any new links from import_path merged in"""

    existing_links = []
    if out_dir:
        existing_links = parse_json_links_index(out_dir)
        check_links_structure(existing_links)

    new_links = []
    if import_path:
        # parse and validate the import file
        log_parsing_started(import_path)
        raw_links, parser_name = parse_links(import_path)
        new_links = validate_links(raw_links)
        check_links_structure(new_links)

    # merge existing links in out_dir and new links
    all_links = validate_links(existing_links + new_links)
    check_links_structure(all_links)
    num_new_links = len(all_links) - len(existing_links)

    if import_path and parser_name:
        log_parsing_finished(num_new_links, parser_name)

    return all_links, new_links
コード例 #2
0
def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
    """get new links from file and optionally append them to links in existing archive"""
    all_links = []
    if import_path:
        # parse and validate the import file
        raw_links, parser_name = parse_links(import_path)
        all_links = validate_links(raw_links)

    # merge existing links in archive_path and new links
    existing_links = []
    if archive_path:
        existing_links = parse_json_links_index(archive_path)
        all_links = validate_links(existing_links + all_links)

    num_new_links = len(all_links) - len(existing_links)
    if num_new_links and not only_new:
        print('{green}[+] [{}] Adding {} new links to index from {} ({} format){reset}'.format(
            datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            num_new_links,
            pretty_path(import_path),
            parser_name,
            **ANSI,
        ))
    # else:
    #     print('[*] [{}] No new links added to {}/index.json{}'.format(
    #         datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    #         archive_path,
    #         ' from {}'.format(import_path) if import_path else '',
    #         **ANSI,
    #     ))

    if only_new:
        return new_links(all_links, existing_links)

    return all_links
コード例 #3
0
def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
    """get new links from file and optionally append them to links in existing archive"""
    all_links = []
    if import_path:
        # parse and validate the import file
        raw_links, parser_name = parse_links(import_path)
        all_links = validate_links(raw_links)

    # merge existing links in archive_path and new links
    existing_links = []
    if archive_path:
        existing_links = parse_json_links_index(archive_path)
        all_links = validate_links(existing_links + all_links)

    num_new_links = len(all_links) - len(existing_links)
    if SHOW_PROGRESS:
        print()
    print('    > Adding {} new links to index from {} (parsed as {} format)'.
          format(
              num_new_links,
              pretty_path(import_path),
              parser_name,
          ))

    if only_new:
        return new_links(all_links, existing_links)

    return all_links
コード例 #4
0
ファイル: archive.py プロジェクト: yyniu/ArchiveBox
def load_links(archive_path=OUTPUT_DIR, import_path=None):
    """get new links from file and optionally append them to links in existing archive"""

    existing_links = []
    if archive_path:
        existing_links = parse_json_links_index(archive_path)
        check_links_structure(existing_links)

    new_links = []
    if import_path:
        # parse and validate the import file
        raw_links, parser_name = parse_links(import_path)
        new_links = validate_links(raw_links)
        check_links_structure(new_links)

    # merge existing links in archive_path and new links
    all_links = validate_links(existing_links + new_links)
    check_links_structure(all_links)
    num_new_links = len(all_links) - len(existing_links)

    if import_path and parser_name:
        print(
            '    > Adding {} new links to index (parsed import as {})'.format(
                num_new_links,
                parser_name,
            ))

    return all_links, new_links
コード例 #5
0
def get_links(new_links_file_path, archive_path=HTML_FOLDER):
    """get new links from file and optionally append them to links in existing archive"""
    # parse and validate the new_links_file
    raw_links = parse_links(new_links_file_path)
    valid_links = validate_links(raw_links)

    # merge existing links in archive_path and new links
    existing_links = []
    if archive_path:
        existing_links = parse_json_links_index(archive_path)
        valid_links = validate_links(existing_links + valid_links)

    num_new_links = len(valid_links) - len(existing_links)
    print('[*] [{}] Adding {} new links from {} to index'.format(
        datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        num_new_links,
        new_links_file_path,
    ))

    return valid_links
コード例 #6
0
ファイル: index.py プロジェクト: maziesmith/python-resources
def load_links_index(out_dir=OUTPUT_DIR, import_path=None):
    """parse and load existing index with any new links form import_path merged in"""
    
    existing_links = []
    if out_dir:
        existing_links = parse_json_links_index(out_dir)
        check_links_structure(existing_link)

    new_links = []
    if import_path: 
        # parse and validate import file 
        # this serves as a logging function
        log_parsing_started(import_path)
        raw_links, parser_name = parse_links(import_path)
        new_links = validate_links(raw_links)
        check_links_structure(new_links)

    # merge existing links in out_dir and new links
    all_links = validate_links(existing_links + new_links)
    
    return all_links, new_links
コード例 #7
0
from settings import DEST as dest
from parse import parse_links

if __name__ == '__main__':
    """
    Creates dest folder, a 'names' dictionary
    and initiates recursive 'parse_links' process
    """
    if os.path.exists(src):
        print("Processing")
        make_new_directory(dest)
        # 'names' stores folder names to avoid conflicts
        names = defaultdict(int)
        menu_links = {}
        make_menu_pages(src, dest, menu_links)
        parse_links(names,
                    src,
                    menu_links,
                    input_path=os.path.join(src, "index.html"),
                    output_path=dest,
                    layer_level=0,
                    recursion_depth=3)
        arrange_resources(src, dest)
        print("Successful\n")
    else:
        print("\nError: Unable to find src location.",
              "\nEnter valid src path in settings.py.")
        quit()
else:
    print("This module is not meant to be imported.")
コード例 #8
0
ファイル: run.py プロジェクト: ellore/copy-website
from helpers import make_new_directory, arrange_resources, make_menu_pages
from settings import SRC as src
from settings import DEST as dest
from parse import parse_links

if __name__ == '__main__':
    """
    Creates dest folder, a 'names' dictionary
    and initiates recursive 'parse_links' process
    """
    if os.path.exists(src):
        print("Processing")
        make_new_directory(dest)
        # 'names' stores folder names to avoid conflicts
        names = defaultdict(int)
        menu_links = {}
        make_menu_pages(src, dest, menu_links)
        parse_links(names, src, menu_links,
                    input_path=os.path.join(src, "index.html"),
                    output_path=dest,
                    layer_level=0,
                    recursion_depth=3)
        arrange_resources(src, dest)
        print("Successful\n")
    else:
        print("\nError: Unable to find src location.",
              "\nEnter valid src path in settings.py.")
        quit()
else:
    print("This module is not meant to be imported.")