def archive_links(archive_path, links, source=None, resume=None): check_dependencies() to_archive = Peekable(links_after_timestamp(links, resume)) idx, link = 0, to_archive.peek(0) try: for idx, link in enumerate(to_archive): link_dir = os.path.join(ARCHIVE_DIR, link['timestamp']) archive_link(link_dir, link) except (KeyboardInterrupt, SystemExit, Exception) as e: print( '{lightyellow}[X] [{now}] Downloading paused on link {timestamp} ({idx}/{total}){reset}' .format( **ANSI, now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), idx=idx + 1, timestamp=link['timestamp'], total=len(links), )) print(' Continue where you left off by running:') print(' {} {}'.format( pretty_path(sys.argv[0]), link['timestamp'], )) if not isinstance(e, KeyboardInterrupt): raise e raise SystemExit(1)
def update_archive(archive_path, links, source=None, resume=None, append=True): """update or create index.html+json given a path to an export file containing new links""" start_ts = datetime.now().timestamp() if resume: print('{green}[▶] [{}] Resuming archive downloading from {}...{reset}'. format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), resume, **ANSI, )) else: print( '{green}[▶] [{}] Updating content for {} pages in archive...{reset}' .format( datetime.now().strftime('%Y-%m-%d %H:%M:%S'), len(links), **ANSI, )) check_links_structure(links) # prefetch the first link off the generator so that if we pause or fail # immediately we can show that we paused on the first link and not just None to_archive = Peekable(links_after_timestamp(links, resume)) idx, link = 0, to_archive.peek(0) # loop over links and archive them try: check_dependencies() for idx, link in enumerate(to_archive): link_dir = os.path.join(ARCHIVE_DIR, link['timestamp']) archive_link(link_dir, link) except (KeyboardInterrupt, SystemExit, Exception) as e: print( '\n{lightyellow}[X] [{now}] Downloading paused on link {timestamp} ({idx}/{total}){reset}' .format( **ANSI, now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), idx=idx + 1, timestamp=link['timestamp'], total=len(links), )) print(' To view your archive, open: {}/index.html'.format( OUTPUT_DIR.replace(REPO_DIR + '/', ''))) print(' Continue where you left off by running:') print(' {} {}'.format( pretty_path(sys.argv[0]), link['timestamp'], )) if not isinstance(e, KeyboardInterrupt): print() raise e raise SystemExit(1) # print timing information & summary end_ts = datetime.now().timestamp() seconds = end_ts - start_ts if seconds > 60: duration = '{0:.2f} min'.format(seconds / 60, 2) else: duration = '{0:.2f} sec'.format(seconds, 2) print('{}[√] [{}] Update of {} pages complete ({}){}'.format( ANSI['green'], datetime.now().strftime('%Y-%m-%d %H:%M:%S'), len(links), duration, ANSI['reset'], )) print(' - {} entries skipped'.format(_RESULTS_TOTALS['skipped'])) print(' - {} entries updated'.format(_RESULTS_TOTALS['succeded'])) print(' - {} errors'.format(_RESULTS_TOTALS['failed'])) print(' To view your archive, open: {}/index.html'.format( OUTPUT_DIR.replace(REPO_DIR + '/', '')))