Exemple #1
0
def load_links_index(out_dir=OUTPUT_DIR, import_path=None):
    """parse and load existing index with any new links from import_path merged in"""

    existing_links = []
    if out_dir:
        existing_links = parse_json_links_index(out_dir)
        check_links_structure(existing_links)

    new_links = []
    if import_path:
        # parse and validate the import file
        log_parsing_started(import_path)
        raw_links, parser_name = parse_links(import_path)
        new_links = validate_links(raw_links)
        check_links_structure(new_links)

    # merge existing links in out_dir and new links
    all_links = validate_links(existing_links + new_links)
    check_links_structure(all_links)
    num_new_links = len(all_links) - len(existing_links)

    if import_path and parser_name:
        log_parsing_finished(num_new_links, parser_name)

    return all_links, new_links
Exemple #2
0
def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
    """get new links from file and optionally append them to links in existing archive"""
    all_links = []
    if import_path:
        # parse and validate the import file
        raw_links, parser_name = parse_links(import_path)
        all_links = validate_links(raw_links)

    # merge existing links in archive_path and new links
    existing_links = []
    if archive_path:
        existing_links = parse_json_links_index(archive_path)
        all_links = validate_links(existing_links + all_links)

    num_new_links = len(all_links) - len(existing_links)
    if SHOW_PROGRESS:
        print()
    print('    > Adding {} new links to index from {} (parsed as {} format)'.
          format(
              num_new_links,
              pretty_path(import_path),
              parser_name,
          ))

    if only_new:
        return new_links(all_links, existing_links)

    return all_links
Exemple #3
0
def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False):
    """get new links from file and optionally append them to links in existing archive"""
    all_links = []
    if import_path:
        # parse and validate the import file
        raw_links, parser_name = parse_links(import_path)
        all_links = validate_links(raw_links)

    # merge existing links in archive_path and new links
    existing_links = []
    if archive_path:
        existing_links = parse_json_links_index(archive_path)
        all_links = validate_links(existing_links + all_links)

    num_new_links = len(all_links) - len(existing_links)
    if num_new_links and not only_new:
        print('{green}[+] [{}] Adding {} new links to index from {} ({} format){reset}'.format(
            datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            num_new_links,
            pretty_path(import_path),
            parser_name,
            **ANSI,
        ))
    # else:
    #     print('[*] [{}] No new links added to {}/index.json{}'.format(
    #         datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    #         archive_path,
    #         ' from {}'.format(import_path) if import_path else '',
    #         **ANSI,
    #     ))

    if only_new:
        return new_links(all_links, existing_links)

    return all_links
Exemple #4
0
def load_links(archive_path=OUTPUT_DIR, import_path=None):
    """get new links from file and optionally append them to links in existing archive"""

    existing_links = []
    if archive_path:
        existing_links = parse_json_links_index(archive_path)
        check_links_structure(existing_links)

    new_links = []
    if import_path:
        # parse and validate the import file
        raw_links, parser_name = parse_links(import_path)
        new_links = validate_links(raw_links)
        check_links_structure(new_links)

    # merge existing links in archive_path and new links
    all_links = validate_links(existing_links + new_links)
    check_links_structure(all_links)
    num_new_links = len(all_links) - len(existing_links)

    if import_path and parser_name:
        print(
            '    > Adding {} new links to index (parsed import as {})'.format(
                num_new_links,
                parser_name,
            ))

    return all_links, new_links
def get_links(new_links_file_path, archive_path=HTML_FOLDER):
    """get new links from file and optionally append them to links in existing archive"""
    # parse and validate the new_links_file
    raw_links = parse_links(new_links_file_path)
    valid_links = validate_links(raw_links)

    # merge existing links in archive_path and new links
    existing_links = []
    if archive_path:
        existing_links = parse_json_links_index(archive_path)
        valid_links = validate_links(existing_links + valid_links)

    num_new_links = len(valid_links) - len(existing_links)
    print('[*] [{}] Adding {} new links from {} to index'.format(
        datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        num_new_links,
        new_links_file_path,
    ))

    return valid_links
Exemple #6
0
def load_links_index(out_dir=OUTPUT_DIR, import_path=None):
    """parse and load existing index with any new links form import_path merged in"""
    
    existing_links = []
    if out_dir:
        existing_links = parse_json_links_index(out_dir)
        check_links_structure(existing_link)

    new_links = []
    if import_path: 
        # parse and validate import file 
        # this serves as a logging function
        log_parsing_started(import_path)
        raw_links, parser_name = parse_links(import_path)
        new_links = validate_links(raw_links)
        check_links_structure(new_links)

    # merge existing links in out_dir and new links
    all_links = validate_links(existing_links + new_links)
    
    return all_links, new_links