def precompute(config: str, scheduler: sched.scheduler = None) -> None: """Precompute a configuration file result to serve it faster when it is requested. This function should be used with a scheduler to be repeated over time. :param config: name of the configuration file to precompute the result for :type config: str scheduler used to relaunch the precomputing task in the future. If not scheduler is specified, the task will not be relaunched :type scheduler: sched.scheduler """ try: cal = process(os.path.basename(config), False) path = "app/cache/" + os.path.basename(config).rstrip('.json') + ".ics" open(path, 'w').writelines(cal) print(arrow.now().format("YYYY-MM-DD HH:mm:ss"), "Precomputed", os.path.basename(config).rstrip('.json')) except Exception as e: with open("error " + arrow.now().format("YYYY-MM-DD HH:mm:ss")+".txt", 'w') as file: file.write(arrow.now().format("YYYY-MM-DD HH:mm:ss") + "\nCould not precompute : " + str(config)) file.write(str(e)) file.write(str(traceback.format_exc())) finally: if scheduler is not None: delay = get_min_cache(config) delay *= 60 scheduler.enter(delay=delay, priority=1, action=precompute, argument=(config, scheduler))
def crawl_pastebin(sc: sched.scheduler, pbc: PastebinController): logger.info("crawl_pastebin: started...") pbc.run() sc.enter(20, 1, crawl_pastebin, ( sc, pbc, )) logger.info("crawl_pastebin: done")
def timer_event(delay, priority, s: sched.scheduler, q: queue.Queue): print("timer event: ", time.asctime(), delay) argument = (delay, priority, s, q) if q.empty(): s.enter(delay, priority, timer_event, argument) else: print("no more!")
def keepalive_fn(scheduler: sched.scheduler, params: inputs.Inputs, context: LambdaContext, keepalive_state: KeepaliveState, cache: Cache): ''' Each iteration of keepalive_thread runs this code. Add the next iteration of keepalive before exiting to continue the keepalive thread. Otherwise keepalives will stop ''' try: update_keepalive(params, keepalive_state, cache) keepalive_fn.num_keepalives += 1 if keepalive_fn.num_keepalives % defaults.KEEPALIVE_PRINT_EVERY == 0: print("keepalive_fn: keepalive #{}: state={}".format( keepalive_fn.num_keepalives, keepalive_state)) if context.invoked_function_arn and context.get_remaining_time_in_millis( ) < defaults.RETRIGGER_BEFORE_EXPIRY_MS: # if invoked as lambda (not CLI), then retrigger backing job if this instance of it will expire soon cache_keys = keepalive_state.cache_keys lastaccess_ms = int(cache.get(cache_keys.lastaccess)) lastaccess_age_ms = utils.millitime() - lastaccess_ms if lastaccess_age_ms > (defaults.BACKING_JOB_LIFETIME_MS * 0.9): # There were no recent calls to fetch the data produced by this backing job. No need to re-issue print( "Exiting backing job by ending keepalive thread. lastaccess_age_ms = ", lastaccess_age_ms) return False if not params.is_streaming(): ''' Fixed time-range jobs need not be reissued ''' print( "keepalive_fn: backing job won't be restarted because it is not a streaming job", params) return False # Restart this job again in another lambda invocation. # Before doing that, don't keepalive for a while to make it stale. Otherwise the new invocation # will assume there is another backing job already running and will auto-exit print( "keepalive_fn: backing job needs to be restarted. lastaccess_age_ms =", lastaccess_age_ms) time.sleep(defaults.KEEPALIVE_INTERVAL_SEC * defaults.KEEPALIVE_EXPIRY_MULTIPLE) start_backing_job_if_necessary(params, context, keepalive_state, cache) print( "keepalive_fn: exiting current backing job after re-issuing a new one" ) return False except Exception as e: print("keepalive_fn: exception", e, traceback.format_exc()) # schedule the next iteration of keepalive thread scheduler.enter(defaults.KEEPALIVE_INTERVAL_SEC, 1, keepalive_fn, argument=(scheduler, params, context, keepalive_state, cache))
def cache(entry: dict, scheduler: sched.scheduler = None) -> None: """Cache an .ics feed in the app/cache directory. Different entries with the same URL will be cached in the same file. The cached calendar contains a new line in the description with the current time when cached prefixed by the 'Cached at' mention :param entry: representation of the entry to cache. This is the Python representation of the corresponding entry in the config file :type entry: dict :param scheduler: scheduler used to relaunch the caching task in the future. If not scheduler is specified, the task will not be relaunched :type scheduler: sched.scheduler """ try: if not os.path.isdir('app/cache'): os.mkdir('app/cache') url = entry['url'] path = "app/cache/" + sha256(url.encode()).hexdigest() + ".ics" r = requests.get(entry["url"], allow_redirects=True) if "encoding" in entry: cal = Calendar(imports=r.content.decode(encoding=entry["encoding"])) else: cal = Calendar(imports=r.content.decode()) cal = horodate(cal, 'Cached at') open(path, 'w').writelines(cal) print(arrow.now().format("YYYY-MM-DD HH:mm:ss"), "Cached", entry['name']) except FailedParse: print("Could not parse", entry['name']) # Save stack trace when an unknown error occurs except Exception as e: with open("error " + arrow.now().format("YYYY-MM-DD HH:mm:ss")+".txt", 'w') as file: file.write(arrow.now().format("YYYY-MM-DD HH:mm:ss") + "\nCould not cache : " + str(entry)) file.write(str(e)) file.write(str(traceback.format_exc())) finally: if scheduler is not None: delay = entry['cache'] if entry['cache'] > 0 else 10 delay *= 60 scheduler.enter(delay=delay, priority=1, action=cache, argument=(entry, scheduler))
def __safety(self, s: sched.scheduler = None, warn: bool = True): with open(strings.auth_log) as fh: lines = fh.readlines() for line in lines: line = line.rstrip() ip = get_first_ip(line) if not self.__ip_ignored(ip): self.__add_line(ip, line, warn) if s is not None: s.enter(self.delay_safety, self.priority_safety, self.__safety, (s, True))
def scheduled_task(sc: scheduler): print('start scheduled task') with open(STORAGE, mode='rt', encoding='utf-8') as fp: storage = json.load(fp) for calendar in storage['calendars']: url = calendar['url'] try: if RE_QIITA_URL.findall(url): new_entries = get_qiita_entries(url) else: new_entries = get_adventar_entries(url) except requests.exceptions.ConnectionError: print('ERROR ConnectionError on loading %s' % url) continue idx = [ i for i, old, new in zip(range(25), calendar['entry_urls'], new_entries) if old != new] print('found %d new entries' % len(idx)) for i in idx: text = '%s %d日目 %s' % (calendar['title'], i + 1, new_entries[i]) post_slack(text) calendar['entry_urls'] = new_entries storage['last_updated'] = dt.now().strftime(TIME_FORMAT) with open(STORAGE, mode='wt', encoding='utf-8') as fp: json.dump(storage, fp) print('end scheduled task') # register self for periodic execution sc.enter(FETCH_INTERVAL, 1, scheduled_task, (sc,))
def schedule_note( scheduler: sched.scheduler, port: rtmidi.MidiOut, channel: int, midi_no: int, time: float, duration: float, volume: int, ) -> None: # print(OUTS.index(port) + 1, channel + 1, midi_no, time, duration, volume) scheduler.enter( delay=time + duration - Player.EPSILON, priority=1, action=port.send_message, argument=([NOTE_OFF | channel, midi_no, 0],), ) scheduler.enter( delay=time, priority=10, action=port.send_message, argument=([NOTE_ON | channel, midi_no, volume],), )
def healthcheck_fn(scheduler: sched.scheduler, params: inputs.Inputs, context: LambdaContext, keepalive_state: KeepaliveState, cache: Cache): ''' Code that is executed each time healthcheck is performed. Schdules the next run before returning, otherwise healthchecks will be stopped ''' try: cached_result = get_cached_result(params, context, cache) cache_misses = set(cached_result["missing_timestamps_ms"]) if len(cache_misses): print("healthcheck_fn: {} cache misses {}".format( len(cache_misses), sorted(cache_misses))) consecutive_misses = cache_misses.difference( healthcheck_fn.previous_cache_misses) if len(consecutive_misses): # The same data keys could not be fetched twice in a row in consecutive healthcheck runs print( "healthcheck_fn: exiting backing job to trigger restart due to {} consecutive cache misses" .format(len(consecutive_misses), sorted(consecutive_misses))) return False healthcheck_fn.previous_cache_misses = cache_misses healthcheck_fn.consecutive_errors = 0 except Exception as e: print("healthcheck_fn: exception", e, traceback.format_exc()) healthcheck_fn.consecutive_errors += 1 if healthcheck_fn.consecutive_errors >= defaults.HEALTHCHECK_EXIT_ERRORMULTIPLE: print( "healthcheck_fn: exiting due to too many consecutive errors", healthcheck_fn.consecutive_errors) return False # schedule the next iteration of healthcheck thread scheduler.enter(defaults.HEALTHCHECK_INTERVAL_SEC, 1, healthcheck_fn, argument=(scheduler, params, context, keepalive_state, cache))
def start_scheduler(scheduler: sched.scheduler) -> None: """Start the caching of every config file found in the app/config directory :param scheduler: scheduler object to use to schedule the caching :type scheduler: sched.scheduler """ path = "app/config" files = [os.path.join(path, f) for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) and f.endswith('.json')] for file in files: with open(file, 'r') as config_file: config = json.loads(config_file.read()) for entry in config: if 'cache' in entry: scheduler.enter(delay=0, priority=1, action=cache, argument=(entry, scheduler)) if get_min_cache(file) < float('inf'): scheduler.enter(delay=get_min_cache(file)*60, priority=1, action=precompute, argument=(file, scheduler)) scheduler.run()
def main(periodic: sched.scheduler) -> None: # Set working variables s3_id, s3_key, s3_bucket, s3_input, s3_output, s3_sync = get_environment() path_input, path_output = ('/dev/shm/gps/input', '/dev/shm/gps/output') path_data = '/dev/shm/gps/data' #print(f"\n=== Started input processing cycle ===\n") s3 = B3W(s3_bucket, s3_id, s3_key) # Get input files from S3 files_input = get_from_aws(s3, s3_input, path_input) #print("DEBUG: input files -->") #print("\n".join([f"DEBUG: {filename}" for filename in files_input])) objects_output = check_in_aws(s3, s3_output, depth=1) #print("DEBUG: output sets -->") #print("\n".join([f"DEBUG: {name}" for name in objects_output])) # DEBUG: list sync objects in S3, remove output test set #objects_sync = check_in_aws(s3, s3_sync) # don't uncomment - dangerous! #print("DEBUG: sync objects -->") #print("\n".join([f"DEBUG: {name}" for name in objects_sync])) # Initialize Copernicus Open Data Access Hub search object config = Config.load('config.yaml') data_hub = DataHub(config, limit=1000) # Cycle through all the data input sets: a set may contain multiple # input areas and shapes to process. Result will be a snapshot that is # cut with each shape (if any) for data_input in glob(os.path.join(path_input, '*')): if not os.path.isdir(data_input): #print(f"DEBUG: '{data_input}' is not a valid data input!") #print("TODO: unzip archived input sets...") continue data_name = os.path.basename(data_input) #print(f"DEBUG: 'data_input' basename = {data_name}") if data_name in objects_output: #print(f"Output set for '{data_input}' already exists. Skipping...") continue #print(f"DEBUG: input directory --->\n{os.listdir(data_input)}\n") areas = glob(os.path.join(data_input, '*.geojson')) shapes = glob(os.path.join(data_input, '*.shp')) #print(f"DEBUG: shapes = {shapes}") if not shapes: shapes.append(None) for area in areas: try: print(f"\n=== Processing '{area}' ===\n") polygon, properties = Polygons.read_geojson(area) except Exception as e: print(f"Failed to read '{area}'!\n{str(e)}") continue #print(f"DEBUG:\n{polygon}") # Set config key (search area) #print(f"DEBUG: config.search -->\n{config.search}") search = config.search.copy() search.update(properties) #config.search["footprint"] = f"\"Intersects({polygon})\"" #print(f"DEBUG: config.search -->\n{config.search}") #print(f"Config 'search' section:\n{config.search}") snapshots = data_hub.search(search, area=polygon) snapshots = sorted(snapshots, key=lambda item: item.begin_position) print(f"\n=== {len(snapshots)} snapshots found ===\n") # print_snapshots(snapshots) # DEBUG # break # DEBUG print(f"\n=== Processing snapshots and shapes ===\n") for index, snapshot in enumerate(snapshots): filename = sync_with_aws(s3, s3_sync, data_hub, snapshot, path_data) if not filename: print(f"'\n{snapshot.uuid}' not synced. Skipping...") continue else: print(f"\n{index:8d}: {snapshot.title}") try: # Process each superposition of an area and a shape # # Process a snapshot # #print(f"DEBUG: search keys = {search.keys()}") path_target = os.path.join(path_output, data_name) #print(f"DEBUG: path_data = '{path_data}'") if search['platformName'] == 'Sentinel-2': filenames = process_sentinel2(filename, path_target, area, shapes) elif search['platformName'] == 'Sentinel-1': filenames = process_sentinel1(filename, path_target, area, shapes) else: filenames = [] print(f"NOT IMPLEMENTED: {snapshot.title}", f"{config.search['platformName']}") #print(f"DEBUG: exporting '{data_prefix}' to S3 -->") # Put processing result (for each output set) to S3 result = put_to_aws(s3, s3_output, path_output) # result... for outfile in filenames: remove(outfile) # all files (TODO: file or directory) except Exception as e: print(f"FAILED: {e}") raise e remove(filename) # remove snapshot #break # DEBUG: the first snapshot only print(f"\n=== Done snapshots for '{area}' ===\n") # Clean up output set (there should remain only logs) try: rmtree(os.path.join(path_output, data_name)) # data output - prefix except FileNotFoundError as e: pass # Clean up for path in (path_data, path_input, path_output): try: #print(f"DEBUG: removing {path}") rmtree(path) except FileNotFoundError as e: pass #print(f"\n=== Completed input processing cycle ===\n") periodic.enter(INTERVAL, 1, main, (periodic,)) return None
def execute_send_delta_emails(sc: sched.scheduler, **kwargs: dict) -> None: gmp = kwargs.get('gmp') task_tag = kwargs.get('task_tag') interval = kwargs.get('interval') email_subject = kwargs.get('email_subject') to_addresses = kwargs.get('to_addresses') from_address = kwargs.get('from_address') mta_address = kwargs.get('mta_address') mta_user = kwargs.get('mta_user') mta_port = kwargs.get('mta_port') mta_password = kwargs.get('mta_password') report_tag_name = kwargs.get('report_tag_name') print('Retrieving task list ...') task_filter = f'tag={task_tag}' tasks = gmp.get_tasks(filter_string=task_filter).xpath('task') print(f'Found {str(len(tasks))} task(s) with tag "{task_tag}".') for task in tasks: task_id = task.xpath('@id')[0] task_name = task.xpath('name/text()')[0] print(f'Processing task "{task_name}" ({task_id})...') reports = gmp.get_reports( filter_string='task_id={task_id} and status=Done ' 'sort-reverse=date' ).xpath('report') print(f' Found {str(len(reports))} report(s).') if len(reports) < 2: print(' Delta-reporting requires at least 2 finished reports.') continue if reports[0].xpath( 'report/user_tags/tag/' 'name[text()="delta_alert_sent"]' ): print(' Delta report for latest finished report already sent') continue print( ' Latest finished report not send yet. Preparing delta ' 'report...' ) delta_report = gmp.get_report( report_id=reports[0].xpath('@id')[0], delta_report_id=reports[1].xpath('@id')[0], filter_string='delta_states=n', format_id='c1645568-627a-11e3-a660-406186ea4fc5', ) csv_in_b64 = delta_report.xpath('report/text()')[0] csv = base64.b64decode(csv_in_b64) print(" Composing Email...") alert_email = MIMEMultipart() alert_email['Subject'] = email_subject alert_email['To'] = ', '.join(to_addresses) alert_email['From'] = from_address alert_email['Date'] = formatdate(localtime=True) report_attachment = MIMEBase('application', "octet-stream") report_attachment.add_header( 'Content-Disposition', 'attachment', filename='delta.csv' ) report_attachment.set_payload(csv) alert_email.attach(report_attachment) print(" Sending Email...") try: with smtplib.SMTP(mta_address, mta_port) as smtp: smtp.ehlo() smtp.starttls() smtp.ehlo() smtp.login(mta_user, mta_password) # if required smtp.sendmail( from_address, to_addresses, alert_email.as_string() ) smtp.close() print(" Email has been sent!") gmp.create_tag( name=report_tag_name, resource_id=reports[0].xpath('@id')[0], resource_type='report', value=datetime.datetime.now(), ) except Exception: # pylint: disable=broad-except print(" Unable to send the email. Error: ", sys.exc_info()[0]) # raise # in case an error should stop the script continue # ignore the problem for the time being print(f"\nCheck will be repeated in {str(interval)} minutes...\n") sc.enter( interval * 60, 1, execute_send_delta_emails, argument=(sc,), kwargs=kwargs, )