def load_links_index(out_dir=OUTPUT_DIR, import_path=None): """parse and load existing index with any new links from import_path merged in""" existing_links = [] if out_dir: existing_links = parse_json_links_index(out_dir) check_links_structure(existing_links) new_links = [] if import_path: # parse and validate the import file log_parsing_started(import_path) raw_links, parser_name = parse_links(import_path) new_links = validate_links(raw_links) check_links_structure(new_links) # merge existing links in out_dir and new links all_links = validate_links(existing_links + new_links) check_links_structure(all_links) num_new_links = len(all_links) - len(existing_links) if import_path and parser_name: log_parsing_finished(num_new_links, parser_name) return all_links, new_links
def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False): """get new links from file and optionally append them to links in existing archive""" all_links = [] if import_path: # parse and validate the import file raw_links, parser_name = parse_links(import_path) all_links = validate_links(raw_links) # merge existing links in archive_path and new links existing_links = [] if archive_path: existing_links = parse_json_links_index(archive_path) all_links = validate_links(existing_links + all_links) num_new_links = len(all_links) - len(existing_links) if num_new_links and not only_new: print('{green}[+] [{}] Adding {} new links to index from {} ({} format){reset}'.format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), num_new_links, pretty_path(import_path), parser_name, **ANSI, )) # else: # print('[*] [{}] No new links added to {}/index.json{}'.format( # datetime.now().strftime('%Y-%m-%d %H:%M:%S'), # archive_path, # ' from {}'.format(import_path) if import_path else '', # **ANSI, # )) if only_new: return new_links(all_links, existing_links) return all_links
def merge_links(archive_path=OUTPUT_DIR, import_path=None, only_new=False): """get new links from file and optionally append them to links in existing archive""" all_links = [] if import_path: # parse and validate the import file raw_links, parser_name = parse_links(import_path) all_links = validate_links(raw_links) # merge existing links in archive_path and new links existing_links = [] if archive_path: existing_links = parse_json_links_index(archive_path) all_links = validate_links(existing_links + all_links) num_new_links = len(all_links) - len(existing_links) if SHOW_PROGRESS: print() print(' > Adding {} new links to index from {} (parsed as {} format)'. format( num_new_links, pretty_path(import_path), parser_name, )) if only_new: return new_links(all_links, existing_links) return all_links
def load_links(archive_path=OUTPUT_DIR, import_path=None): """get new links from file and optionally append them to links in existing archive""" existing_links = [] if archive_path: existing_links = parse_json_links_index(archive_path) check_links_structure(existing_links) new_links = [] if import_path: # parse and validate the import file raw_links, parser_name = parse_links(import_path) new_links = validate_links(raw_links) check_links_structure(new_links) # merge existing links in archive_path and new links all_links = validate_links(existing_links + new_links) check_links_structure(all_links) num_new_links = len(all_links) - len(existing_links) if import_path and parser_name: print( ' > Adding {} new links to index (parsed import as {})'.format( num_new_links, parser_name, )) return all_links, new_links
def get_links(new_links_file_path, archive_path=HTML_FOLDER): """get new links from file and optionally append them to links in existing archive""" # parse and validate the new_links_file raw_links = parse_links(new_links_file_path) valid_links = validate_links(raw_links) # merge existing links in archive_path and new links existing_links = [] if archive_path: existing_links = parse_json_links_index(archive_path) valid_links = validate_links(existing_links + valid_links) num_new_links = len(valid_links) - len(existing_links) print('[*] [{}] Adding {} new links from {} to index'.format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), num_new_links, new_links_file_path, )) return valid_links
def load_links_index(out_dir=OUTPUT_DIR, import_path=None): """parse and load existing index with any new links form import_path merged in""" existing_links = [] if out_dir: existing_links = parse_json_links_index(out_dir) check_links_structure(existing_link) new_links = [] if import_path: # parse and validate import file # this serves as a logging function log_parsing_started(import_path) raw_links, parser_name = parse_links(import_path) new_links = validate_links(raw_links) check_links_structure(new_links) # merge existing links in out_dir and new links all_links = validate_links(existing_links + new_links) return all_links, new_links
from settings import DEST as dest from parse import parse_links if __name__ == '__main__': """ Creates dest folder, a 'names' dictionary and initiates recursive 'parse_links' process """ if os.path.exists(src): print("Processing") make_new_directory(dest) # 'names' stores folder names to avoid conflicts names = defaultdict(int) menu_links = {} make_menu_pages(src, dest, menu_links) parse_links(names, src, menu_links, input_path=os.path.join(src, "index.html"), output_path=dest, layer_level=0, recursion_depth=3) arrange_resources(src, dest) print("Successful\n") else: print("\nError: Unable to find src location.", "\nEnter valid src path in settings.py.") quit() else: print("This module is not meant to be imported.")
from helpers import make_new_directory, arrange_resources, make_menu_pages from settings import SRC as src from settings import DEST as dest from parse import parse_links if __name__ == '__main__': """ Creates dest folder, a 'names' dictionary and initiates recursive 'parse_links' process """ if os.path.exists(src): print("Processing") make_new_directory(dest) # 'names' stores folder names to avoid conflicts names = defaultdict(int) menu_links = {} make_menu_pages(src, dest, menu_links) parse_links(names, src, menu_links, input_path=os.path.join(src, "index.html"), output_path=dest, layer_level=0, recursion_depth=3) arrange_resources(src, dest) print("Successful\n") else: print("\nError: Unable to find src location.", "\nEnter valid src path in settings.py.") quit() else: print("This module is not meant to be imported.")