def archive_link(link_dir, link, overwrite=True): """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp""" check_link_structure(link) try: update_existing = os.path.exists(link_dir) if update_existing: link = { **parse_json_link_index(link_dir), **link, } else: os.makedirs(link_dir) print_link_status_line(link_dir, link, update_existing) if FETCH_FAVICON: link = fetch_favicon(link_dir, link, overwrite=overwrite) if FETCH_TITLE: link = fetch_title(link_dir, link, overwrite=overwrite) if FETCH_WGET: link = fetch_wget(link_dir, link, overwrite=overwrite) if FETCH_PDF: link = fetch_pdf(link_dir, link, overwrite=overwrite) if FETCH_SCREENSHOT: link = fetch_screenshot(link_dir, link, overwrite=overwrite) if FETCH_DOM: link = fetch_dom(link_dir, link, overwrite=overwrite) if SUBMIT_ARCHIVE_DOT_ORG: link = archive_dot_org(link_dir, link, overwrite=overwrite) if FETCH_GIT: link = fetch_git(link_dir, link, overwrite=overwrite) if FETCH_MEDIA: link = fetch_media(link_dir, link, overwrite=overwrite) write_link_index(link_dir, link) except Exception as err: print(' ! Failed to archive link: {}: {}'.format( err.__class__.__name__, err)) return link
def load_link_index(link_dir, link): """check for an existing link archive in the given directory, and load+merge it into the given link dict """ is_new = not os.path.exists(link_dir) if is_new: os.makedirs(link_dir) else: link = { **parse_json_link_index(link_dir), **link, } check_link_structure(link) print_link_status_line(link_dir, link, is_new) return link
def archive_link(link_dir, link, overwrite=True): """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp""" update_existing = os.path.exists(link_dir) if update_existing: link = { **parse_json_link_index(link_dir), **link, } else: os.makedirs(link_dir) log_link_archive(link_dir, link, update_existing) if FETCH_WGET: link = fetch_wget(link_dir, link, overwrite=overwrite) if FETCH_PDF: link = fetch_pdf(link_dir, link, overwrite=overwrite) if FETCH_SCREENSHOT: link = fetch_screenshot(link_dir, link, overwrite=overwrite) if FETCH_DOM: link = fetch_dom(link_dir, link, overwrite=overwrite) if SUBMIT_ARCHIVE_DOT_ORG: link = archive_dot_org(link_dir, link, overwrite=overwrite) # if FETCH_AUDIO: # link = fetch_audio(link_dir, link, overwrite=overwrite) # if FETCH_VIDEO: # link = fetch_video(link_dir, link, overwrite=overwrite) if FETCH_FAVICON: link = fetch_favicon(link_dir, link, overwrite=overwrite) write_link_index(link_dir, link) # print() return link