Esempio n. 1
0
def precompute(config: str, scheduler: sched.scheduler = None) -> None:
    """Precompute a configuration file result to serve it faster when it is requested.  This function
    should be used with a scheduler to be repeated over time.

    :param config: name of the configuration file to precompute the result for
    :type config: str

    scheduler used to relaunch the precomputing task in the future.  If not scheduler is specified,
    the task will not be relaunched
    :type scheduler: sched.scheduler
    """
    try:
        cal = process(os.path.basename(config), False)
        path = "app/cache/" + os.path.basename(config).rstrip('.json') + ".ics"
        open(path, 'w').writelines(cal)
        print(arrow.now().format("YYYY-MM-DD HH:mm:ss"), "Precomputed", os.path.basename(config).rstrip('.json'))

    except Exception as e:
        with open("error " + arrow.now().format("YYYY-MM-DD HH:mm:ss")+".txt", 'w') as file:
            file.write(arrow.now().format("YYYY-MM-DD HH:mm:ss") + "\nCould not precompute : " + str(config))
            file.write(str(e))
            file.write(str(traceback.format_exc()))
    finally:
        if scheduler is not None:
            delay = get_min_cache(config)
            delay *= 60
            scheduler.enter(delay=delay, priority=1, action=precompute, argument=(config, scheduler))
Esempio n. 2
0
def crawl_pastebin(sc: sched.scheduler, pbc: PastebinController):
    logger.info("crawl_pastebin: started...")
    pbc.run()
    sc.enter(20, 1, crawl_pastebin, (
        sc,
        pbc,
    ))
    logger.info("crawl_pastebin: done")
Esempio n. 3
0
def timer_event(delay, priority, s: sched.scheduler, q: queue.Queue):
    print("timer event: ", time.asctime(), delay)

    argument = (delay, priority, s, q)
    if q.empty():
        s.enter(delay, priority, timer_event, argument)
    else:
        print("no more!")
Esempio n. 4
0
def keepalive_fn(scheduler: sched.scheduler, params: inputs.Inputs,
                 context: LambdaContext, keepalive_state: KeepaliveState,
                 cache: Cache):
    ''' Each iteration of keepalive_thread runs this code. Add the next iteration of keepalive before exiting to
    continue the keepalive thread. Otherwise keepalives will stop '''
    try:
        update_keepalive(params, keepalive_state, cache)
        keepalive_fn.num_keepalives += 1
        if keepalive_fn.num_keepalives % defaults.KEEPALIVE_PRINT_EVERY == 0:
            print("keepalive_fn: keepalive #{}: state={}".format(
                keepalive_fn.num_keepalives, keepalive_state))

        if context.invoked_function_arn and context.get_remaining_time_in_millis(
        ) < defaults.RETRIGGER_BEFORE_EXPIRY_MS:
            # if invoked as lambda (not CLI), then retrigger backing job if this instance of it will expire soon
            cache_keys = keepalive_state.cache_keys
            lastaccess_ms = int(cache.get(cache_keys.lastaccess))
            lastaccess_age_ms = utils.millitime() - lastaccess_ms

            if lastaccess_age_ms > (defaults.BACKING_JOB_LIFETIME_MS * 0.9):
                # There were no recent calls to fetch the data produced by this backing job. No need to re-issue
                print(
                    "Exiting backing job by ending keepalive thread. lastaccess_age_ms = ",
                    lastaccess_age_ms)
                return False

            if not params.is_streaming():
                ''' Fixed time-range jobs need not be reissued '''
                print(
                    "keepalive_fn: backing job won't be restarted because it is not a streaming job",
                    params)
                return False

            # Restart this job again in another lambda invocation.
            # Before doing that, don't keepalive for a while to make it stale. Otherwise the new invocation
            # will assume there is another backing job already running and will auto-exit
            print(
                "keepalive_fn: backing job needs to be restarted. lastaccess_age_ms =",
                lastaccess_age_ms)
            time.sleep(defaults.KEEPALIVE_INTERVAL_SEC *
                       defaults.KEEPALIVE_EXPIRY_MULTIPLE)
            start_backing_job_if_necessary(params, context, keepalive_state,
                                           cache)
            print(
                "keepalive_fn: exiting current backing job after re-issuing a new one"
            )
            return False
    except Exception as e:
        print("keepalive_fn: exception", e, traceback.format_exc())

    # schedule the next iteration of keepalive thread
    scheduler.enter(defaults.KEEPALIVE_INTERVAL_SEC,
                    1,
                    keepalive_fn,
                    argument=(scheduler, params, context, keepalive_state,
                              cache))
Esempio n. 5
0
def cache(entry: dict, scheduler: sched.scheduler = None) -> None:
    """Cache an .ics feed in the app/cache directory.
    Different entries with the same URL will be cached in the same file.
    The cached calendar contains a new line in the description with the current time when cached prefixed by the
    'Cached at' mention



    :param entry: representation of the entry to cache.  This is the Python representation of the corresponding entry
    in the config file
    :type entry: dict

    :param scheduler: scheduler used to relaunch the caching task in the future.  If not scheduler is specified,
    the task will not be relaunched
    :type scheduler: sched.scheduler
    """

    try:
        if not os.path.isdir('app/cache'):
            os.mkdir('app/cache')

        url = entry['url']
        path = "app/cache/" + sha256(url.encode()).hexdigest() + ".ics"

        r = requests.get(entry["url"], allow_redirects=True)

        if "encoding" in entry:
            cal = Calendar(imports=r.content.decode(encoding=entry["encoding"]))
        else:
            cal = Calendar(imports=r.content.decode())

        cal = horodate(cal, 'Cached at')
        open(path, 'w').writelines(cal)
        print(arrow.now().format("YYYY-MM-DD HH:mm:ss"), "Cached", entry['name'])

    except FailedParse:
        print("Could not parse", entry['name'])

    # Save stack trace when an unknown error occurs
    except Exception as e:
        with open("error " + arrow.now().format("YYYY-MM-DD HH:mm:ss")+".txt", 'w') as file:
            file.write(arrow.now().format("YYYY-MM-DD HH:mm:ss") + "\nCould not cache : " + str(entry))
            file.write(str(e))
            file.write(str(traceback.format_exc()))
    finally:
        if scheduler is not None:
            delay = entry['cache'] if entry['cache'] > 0 else 10
            delay *= 60
            scheduler.enter(delay=delay, priority=1, action=cache, argument=(entry, scheduler))
Esempio n. 6
0
    def __safety(self, s: sched.scheduler = None, warn: bool = True):
        with open(strings.auth_log) as fh:
            lines = fh.readlines()

        for line in lines:

            line = line.rstrip()
            ip = get_first_ip(line)

            if not self.__ip_ignored(ip):
                self.__add_line(ip, line, warn)

        if s is not None:
            s.enter(self.delay_safety, self.priority_safety, self.__safety,
                    (s, True))
Esempio n. 7
0
def scheduled_task(sc: scheduler):
    print('start scheduled task')

    with open(STORAGE, mode='rt', encoding='utf-8') as fp:
        storage = json.load(fp)

    for calendar in storage['calendars']:
        url = calendar['url']

        try:
            if RE_QIITA_URL.findall(url):
                new_entries = get_qiita_entries(url)
            else:
                new_entries = get_adventar_entries(url)
        except requests.exceptions.ConnectionError:
            print('ERROR ConnectionError on loading %s' % url)
            continue

        idx = [
            i
            for i, old, new
            in zip(range(25), calendar['entry_urls'], new_entries)
            if old != new]

        print('found %d new entries' % len(idx))

        for i in idx:
            text = '%s %d日目 %s' % (calendar['title'], i + 1, new_entries[i])
            post_slack(text)

        calendar['entry_urls'] = new_entries

    storage['last_updated'] = dt.now().strftime(TIME_FORMAT)

    with open(STORAGE, mode='wt', encoding='utf-8') as fp:
        json.dump(storage, fp)

    print('end scheduled task')

    # register self for periodic execution
    sc.enter(FETCH_INTERVAL, 1, scheduled_task, (sc,))
Esempio n. 8
0
File: midi.py Progetto: vug/cac
 def schedule_note(
     scheduler: sched.scheduler,
     port: rtmidi.MidiOut,
     channel: int,
     midi_no: int,
     time: float,
     duration: float,
     volume: int,
 ) -> None:
     # print(OUTS.index(port) + 1, channel + 1, midi_no, time, duration, volume)
     scheduler.enter(
         delay=time + duration - Player.EPSILON,
         priority=1,
         action=port.send_message,
         argument=([NOTE_OFF | channel, midi_no, 0],),
     )
     scheduler.enter(
         delay=time,
         priority=10,
         action=port.send_message,
         argument=([NOTE_ON | channel, midi_no, volume],),
     )
Esempio n. 9
0
 def healthcheck_fn(scheduler: sched.scheduler, params: inputs.Inputs,
                    context: LambdaContext, keepalive_state: KeepaliveState,
                    cache: Cache):
     ''' Code that is executed each time healthcheck is performed. Schdules the next run before returning, otherwise
      healthchecks will be stopped '''
     try:
         cached_result = get_cached_result(params, context, cache)
         cache_misses = set(cached_result["missing_timestamps_ms"])
         if len(cache_misses):
             print("healthcheck_fn: {} cache misses {}".format(
                 len(cache_misses), sorted(cache_misses)))
         consecutive_misses = cache_misses.difference(
             healthcheck_fn.previous_cache_misses)
         if len(consecutive_misses):
             # The same data keys could not be fetched twice in a row in consecutive healthcheck runs
             print(
                 "healthcheck_fn: exiting backing job to trigger restart due to {} consecutive cache misses"
                 .format(len(consecutive_misses),
                         sorted(consecutive_misses)))
             return False
         healthcheck_fn.previous_cache_misses = cache_misses
         healthcheck_fn.consecutive_errors = 0
     except Exception as e:
         print("healthcheck_fn: exception", e, traceback.format_exc())
         healthcheck_fn.consecutive_errors += 1
         if healthcheck_fn.consecutive_errors >= defaults.HEALTHCHECK_EXIT_ERRORMULTIPLE:
             print(
                 "healthcheck_fn: exiting due to too many consecutive errors",
                 healthcheck_fn.consecutive_errors)
             return False
     # schedule the next iteration of healthcheck thread
     scheduler.enter(defaults.HEALTHCHECK_INTERVAL_SEC,
                     1,
                     healthcheck_fn,
                     argument=(scheduler, params, context, keepalive_state,
                               cache))
Esempio n. 10
0
def start_scheduler(scheduler: sched.scheduler) -> None:
    """Start the caching of every config file found in the app/config directory


    :param scheduler: scheduler object to use to schedule the caching
    :type scheduler: sched.scheduler
    """

    path = "app/config"
    files = [os.path.join(path, f) for f in os.listdir(path)
             if os.path.isfile(os.path.join(path, f)) and f.endswith('.json')]

    for file in files:
        with open(file, 'r') as config_file:
            config = json.loads(config_file.read())

        for entry in config:
            if 'cache' in entry:
                scheduler.enter(delay=0, priority=1, action=cache, argument=(entry, scheduler))

        if get_min_cache(file) < float('inf'):
            scheduler.enter(delay=get_min_cache(file)*60, priority=1, action=precompute, argument=(file, scheduler))

    scheduler.run()
Esempio n. 11
0
def main(periodic: sched.scheduler) -> None:
    # Set working variables
    s3_id, s3_key, s3_bucket, s3_input, s3_output, s3_sync = get_environment()
    path_input, path_output = ('/dev/shm/gps/input', '/dev/shm/gps/output')
    path_data = '/dev/shm/gps/data'

    #print(f"\n=== Started input processing cycle ===\n")
    s3 = B3W(s3_bucket, s3_id, s3_key)

    # Get input files from S3
    files_input = get_from_aws(s3, s3_input, path_input)
    #print("DEBUG: input files -->")
    #print("\n".join([f"DEBUG: {filename}" for filename in files_input]))
    objects_output = check_in_aws(s3, s3_output, depth=1)
    #print("DEBUG: output sets -->")
    #print("\n".join([f"DEBUG: {name}" for name in objects_output]))
    # DEBUG: list sync objects in S3, remove output test set
    #objects_sync = check_in_aws(s3, s3_sync) # don't uncomment - dangerous!
    #print("DEBUG: sync objects -->")
    #print("\n".join([f"DEBUG: {name}" for name in objects_sync]))

    # Initialize Copernicus Open Data Access Hub search object
    config = Config.load('config.yaml')
    data_hub = DataHub(config, limit=1000)

    # Cycle through all the data input sets: a set may contain multiple
    # input areas and shapes to process. Result will be a snapshot that is
    # cut with each shape (if any)
    for data_input in glob(os.path.join(path_input, '*')):
        if not os.path.isdir(data_input):
            #print(f"DEBUG: '{data_input}' is not a valid data input!")
            #print("TODO: unzip archived input sets...")
            continue
        data_name = os.path.basename(data_input)
        #print(f"DEBUG: 'data_input' basename = {data_name}")
        if data_name in objects_output:
            #print(f"Output set for '{data_input}' already exists. Skipping...")
            continue
        #print(f"DEBUG: input directory --->\n{os.listdir(data_input)}\n")
        areas = glob(os.path.join(data_input, '*.geojson'))
        shapes = glob(os.path.join(data_input, '*.shp'))
        #print(f"DEBUG: shapes = {shapes}")
        if not shapes:
            shapes.append(None)
        for area in areas:
            try:
                print(f"\n=== Processing '{area}' ===\n")
                polygon, properties = Polygons.read_geojson(area)
            except Exception as e:
                print(f"Failed to read '{area}'!\n{str(e)}")
                continue
            #print(f"DEBUG:\n{polygon}")

            # Set config key (search area)
            #print(f"DEBUG: config.search -->\n{config.search}")
            search = config.search.copy()
            search.update(properties)
            #config.search["footprint"] = f"\"Intersects({polygon})\""
            #print(f"DEBUG: config.search -->\n{config.search}")
            #print(f"Config 'search' section:\n{config.search}")

            snapshots = data_hub.search(search, area=polygon)
            snapshots = sorted(snapshots,
                               key=lambda item: item.begin_position)

            print(f"\n=== {len(snapshots)} snapshots found ===\n")
            # print_snapshots(snapshots) # DEBUG
            # break # DEBUG

            print(f"\n=== Processing snapshots and shapes ===\n")
            for index, snapshot in enumerate(snapshots):
                filename = sync_with_aws(s3, s3_sync, data_hub, snapshot,
                                         path_data)
                if not filename:
                    print(f"'\n{snapshot.uuid}' not synced. Skipping...")
                    continue
                else:
                    print(f"\n{index:8d}: {snapshot.title}")
                try:
                    # Process each superposition of an area and a shape
                    #
                    # Process a snapshot
                    #
                    #print(f"DEBUG: search keys = {search.keys()}")
                    path_target = os.path.join(path_output, data_name)
                    #print(f"DEBUG: path_data = '{path_data}'")
                    if search['platformName'] == 'Sentinel-2':
                        filenames = process_sentinel2(filename, path_target,
                                                      area, shapes)
                    elif search['platformName'] == 'Sentinel-1':
                        filenames = process_sentinel1(filename, path_target,
                                                      area, shapes)
                    else:
                        filenames = []
                        print(f"NOT IMPLEMENTED: {snapshot.title}",
                              f"{config.search['platformName']}")
                    #print(f"DEBUG: exporting '{data_prefix}' to S3 -->")
                    # Put processing result (for each output set) to S3
                    result = put_to_aws(s3, s3_output, path_output) # result...
                    for outfile in filenames:
                        remove(outfile) # all files (TODO: file or directory)
                except Exception as e:
                    print(f"FAILED: {e}")
                    raise e
                remove(filename) # remove snapshot
                #break # DEBUG: the first snapshot only
            print(f"\n=== Done snapshots for '{area}' ===\n")
        # Clean up output set (there should remain only logs)
        try:
            rmtree(os.path.join(path_output, data_name)) # data output - prefix
        except FileNotFoundError as e:
            pass
    # Clean up
    for path in (path_data, path_input, path_output):
        try:
            #print(f"DEBUG: removing {path}")
            rmtree(path)
        except FileNotFoundError as e:
            pass

    #print(f"\n=== Completed input processing cycle ===\n")
    periodic.enter(INTERVAL, 1, main, (periodic,))

    return None
Esempio n. 12
0
def execute_send_delta_emails(sc: sched.scheduler, **kwargs: dict) -> None:
    gmp = kwargs.get('gmp')
    task_tag = kwargs.get('task_tag')
    interval = kwargs.get('interval')
    email_subject = kwargs.get('email_subject')
    to_addresses = kwargs.get('to_addresses')
    from_address = kwargs.get('from_address')
    mta_address = kwargs.get('mta_address')
    mta_user = kwargs.get('mta_user')
    mta_port = kwargs.get('mta_port')
    mta_password = kwargs.get('mta_password')
    report_tag_name = kwargs.get('report_tag_name')

    print('Retrieving task list ...')

    task_filter = f'tag={task_tag}'
    tasks = gmp.get_tasks(filter_string=task_filter).xpath('task')
    print(f'Found {str(len(tasks))} task(s) with tag "{task_tag}".')

    for task in tasks:
        task_id = task.xpath('@id')[0]
        task_name = task.xpath('name/text()')[0]
        print(f'Processing task "{task_name}" ({task_id})...')

        reports = gmp.get_reports(
            filter_string='task_id={task_id} and status=Done '
            'sort-reverse=date'
        ).xpath('report')
        print(f'  Found {str(len(reports))} report(s).')
        if len(reports) < 2:
            print('  Delta-reporting requires at least 2 finished reports.')
            continue

        if reports[0].xpath(
            'report/user_tags/tag/' 'name[text()="delta_alert_sent"]'
        ):
            print('  Delta report for latest finished report already sent')
            continue

        print(
            '  Latest finished report not send yet. Preparing delta '
            'report...'
        )

        delta_report = gmp.get_report(
            report_id=reports[0].xpath('@id')[0],
            delta_report_id=reports[1].xpath('@id')[0],
            filter_string='delta_states=n',
            format_id='c1645568-627a-11e3-a660-406186ea4fc5',
        )

        csv_in_b64 = delta_report.xpath('report/text()')[0]
        csv = base64.b64decode(csv_in_b64)

        print("  Composing Email...")
        alert_email = MIMEMultipart()
        alert_email['Subject'] = email_subject
        alert_email['To'] = ', '.join(to_addresses)
        alert_email['From'] = from_address
        alert_email['Date'] = formatdate(localtime=True)

        report_attachment = MIMEBase('application', "octet-stream")
        report_attachment.add_header(
            'Content-Disposition', 'attachment', filename='delta.csv'
        )
        report_attachment.set_payload(csv)
        alert_email.attach(report_attachment)

        print("  Sending Email...")
        try:
            with smtplib.SMTP(mta_address, mta_port) as smtp:
                smtp.ehlo()
                smtp.starttls()
                smtp.ehlo()
                smtp.login(mta_user, mta_password)  # if required
                smtp.sendmail(
                    from_address, to_addresses, alert_email.as_string()
                )
                smtp.close()
                print("  Email has been sent!")

                gmp.create_tag(
                    name=report_tag_name,
                    resource_id=reports[0].xpath('@id')[0],
                    resource_type='report',
                    value=datetime.datetime.now(),
                )
        except Exception:  # pylint: disable=broad-except
            print("  Unable to send the email. Error: ", sys.exc_info()[0])
            # raise # in case an error should stop the script
            continue  # ignore the problem for the time being

    print(f"\nCheck will be repeated in {str(interval)} minutes...\n")
    sc.enter(
        interval * 60,
        1,
        execute_send_delta_emails,
        argument=(sc,),
        kwargs=kwargs,
    )