def run_once(scanner: DataPipeline, auto_push: bool): scanner.update_sources() scanner.process() if auto_push: host = get_host() util_git.push( scanner.config.base_dir, f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}" )
def run_once(scanner: DataPipeline, capture: SpecializedCapture, auto_push: bool): " run the scanner once " scanner.update_sources() scanner.process() if capture: do_specialized_capture(capture) if auto_push: host = get_host() util_git.push(scanner.config.base_dir, f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}")
def run_continuous(scanner: DataPipeline, capture: SpecializedCapture, auto_push: bool): if util_git.monitor_check(): return host = get_host() try: print("starting continuous run") scanner.update_sources() scanner.process() if capture: try: special_cases(capture) except Exception as ex: logger.error(ex) logger.error( "*** continue after exception in specialized capture") if auto_push: util_git.push( scanner.config.base_dir, f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}" ) if util_git.monitor_check(): return cnt = 1 t = next_time() print(f"sleep until {t}") while True: time.sleep(15) if datetime.now() < t: continue if util_git.monitor_check(): break print("==================================") print(f"=== run {cnt} at {t}") print("==================================") try: scanner.update_sources() scanner.process() if capture: special_cases(capture) if auto_push: util_git.push( scanner.config.base_dir, f"{udatetime.to_displayformat(scanner.change_list.start_date)} on {host}" ) except Exception as ex: logger.exception(ex) print(f"run failed, wait 5 minutes and try again") t = t + timedelta(minutes=5) print("==================================") print("") t = next_time() print(f"sleep until {t}") cnt += 1 finally: if capture: capture.close()
def run_continuous(scanner: DataPipeline, capture: SpecializedCapture, auto_push: bool): " run in continuous mode twice an hour " # check for new source code (return if found so watchdog can reload the main loop) if util_git.monitor_check(): return host = get_host() try: print("starting continuous run") # run the first time outside of the 'rety' logic # so it fails if something is really wrong # get new external source data scanner.update_sources() # main scan/clean/extract loop scanner.process() # run a one-off capture if requested if capture: do_specialized_capture(capture) # push to the git repo if auto_push: util_git.push(scanner.config.base_dir, f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}") # check for new source again if util_git.monitor_check(): return cnt = 1 t = next_time() # run twice per hour forever # on error, rety twice before going back to sleep until next cycle print(f"sleep until {t}") while True: time.sleep(15) if datetime.now() < t: continue if util_git.monitor_check(): break print("==================================") print(f"=== run {cnt} at {t}") print("==================================") retry_cnt = 0 try: scanner.update_sources() scanner.process() if capture: do_specialized_capture(capture) if auto_push: util_git.push(scanner.config.base_dir, f"{udatetime.to_displayformat(scanner.change_list.start_date)} on {host}") except Exception as ex: logger.exception(ex) if retry_cnt < 2: print(f"run failed, wait 5 minutes and try again") t = t + timedelta(minutes=5) retry_cnt += 1 continue print("==================================") print("") t = next_time() print(f"sleep until {t}") cnt += 1 finally: if capture: capture.close()