Python CronSchedule Exemples, prefect.schedules.CronSchedule Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : monitor_failing_scrapers.py Projet : christopherturner/can-scrapers

def create_scraper_monitor_flow():
    # Run every day at 15 UTC (or 10 ET)
    schedule = CronSchedule("0 15 * * *")
    with Flow("MonitorFailingScrapers", schedule) as flow:
        slack_webhook_url = EnvVarSecret("SLACK_WEBHOOK_URL")
        run_monitor_scrapers(slack_webhook_url)

    return flow

Exemple #2

0

Afficher le fichier

Fichier : clean_sql.py Projet : christopherturner/can-scrapers

def create_flow_for_table(table_name):
    sched = CronSchedule("50 */2 * * *")
    tn = f"data.{table_name}"
    sn = f"{tn}_id_seq"
    with Flow(f"clean-sql-{table_name}", sched) as flow:
        connstr = EnvVarSecret("COVID_DB_CONN_URI")
        ready = truncate_table(connstr, tn)
        reset_sequence(connstr, sn, ready)

    return flow

Exemple #3

0

Afficher le fichier

def main(project_name):
    schedule = CronSchedule('0 * * * *')
    config = Config()
    with Flow('Purple Air hourly download flow', schedule) as flow:
        run_time = get_run_time
        fetch_result = fetch_results(config)
        transform_result = transform_results(config, fetch_result)
        write_results(config, transform_result, run_time)
    print(flow.register(project_name=project_name))
    flow.run_agent()

Exemple #4

0

Afficher le fichier

def create_cdc_all_states_flow():
    """Creates a flow that runs the CDC data update on all states."""
    sched = CronSchedule("17 */4 * * *")

    flow = Flow("CDCAllStatesDataUpdate", sched)
    for state in ALL_STATES_PLUS_DC:
        task = StartFlowRun(
            flow_name=CDCCovidDataTracker.__name__,
            project_name="can-scrape",
            wait=True,
            parameters={"state": state.abbr},
        )
        flow.add_task(task)

    return flow

Exemple #5

0

Afficher le fichier

Fichier : generated_flows.py Projet : MattSidor/can-scrapers

def create_main_flow(flows: List[Flow], project_name):
    schedule = CronSchedule("0 */3 * * *")

    with Flow("MainFlow", schedule) as main_flow:
        tasks = []
        for flow in flows:
            task = StartFlowRun(flow_name=flow.name,
                                project_name=project_name,
                                wait=True)
            tasks.append(task)

        parquet_flow = StartFlowRun(flow_name="UpdateParquetFiles",
                                    project_name=project_name,
                                    wait=True)

        for task in tasks:
            task.set_downstream(parquet_flow)

    return main_flow

Exemple #6

0

Afficher le fichier

Fichier : test_sensor.py Projet : vishalbelsare/FlowKit

def test_run_workflow_ignores_schedule(test_logger):
    """
    Test that run_workflow ignores the workflow's schedule.
    """
    function_mock = create_autospec(lambda dummy_param: None)
    # Flow with no more scheduled runs
    with prefect.Flow(
            "Dummy_workflow",
            schedule=CronSchedule("0 0 * * *",
                                  end_date=pendulum.now().subtract(days=2)),
    ) as dummy_workflow:
        dummy_param = prefect.Parameter("dummy_param")
        FunctionTask(function_mock)(dummy_param=dummy_param)

    with prefect.context(logger=test_logger):
        run_workflow.run(
            parametrised_workflow=(dummy_workflow,
                                   dict(dummy_param="DUMMY_VALUE")))
    function_mock.assert_called_once_with(dummy_param="DUMMY_VALUE")

Exemple #7

0

Afficher le fichier

def create_flow_for_scraper(ix: int, cls: Type[DatasetBase]):
    sched = CronSchedule(f"{ix % 60} */4 * * *")

    with Flow(cls.__name__, sched) as flow:
        connstr = EnvVarSecret("COVID_DB_CONN_URI")
        sentry_dsn = EnvVarSecret("SENTRY_DSN")
        sentry_sdk_task = initialize_sentry(sentry_dsn)

        d = create_scraper(cls)
        fetched = fetch(d)
        normalized = normalize(d)
        validated = validate(d)
        done = put(d, connstr)

        d.set_upstream(sentry_sdk_task)
        normalized.set_upstream(fetched)
        validated.set_upstream(normalized)
        done.set_upstream(validated)

    return flow

Exemple #8

0

Afficher le fichier

Fichier : custom_fields.py Projet : shreyasgm/FlowKit

    def _deserialize(self, value, attr, data,
                     **kwargs) -> "prefect.schedules.schedules.Schedule":
        """
        Deserialise a cron string as a cron schedule.

        Returns
        -------
        Schedule
            Prefect CronSchedule to run a flow according to the schedule
            defined by the input string.
        
        Raises
        ------
        ValidationError
            if the input value is not a valid cron string or None
        """
        cron_string = super()._deserialize(value, attr, data, **kwargs)
        try:
            schedule = CronSchedule(cron_string)
        except ValueError:
            raise ValidationError(f"Invalid cron string: '{cron_string}'.")
        return schedule

Exemple #9

0

Afficher le fichier

Fichier : etl.py Projet : sticknor/hn_app

@task
def frontfill():
    now = datetime.datetime.now()
    pm.execute_notebook(
        "etl/hn_etl_front_fill.ipynb",
        "s3://python-portfolio-notebooks/hn_updates/frontfill" +
        str(now.year) + "-" + str(now.month) + "-" + str(now.day) + ".ipynb",
    )


@task
def test_changes():

    now = datetime.datetime.now()
    pm.execute_notebook(
        "etl/hn_data_test.ipynb",
        "s3://python-portfolio-notebooks/hn_updates/test" + str(now.year) +
        "-" + str(now.month) + "-" + str(now.day) + ".ipynb",
    )


with Flow("ETL", schedule=CronSchedule("0 9 * * *")) as flow:

    frontfill = frontfill()
    backfill = backfill()
    test_changes = test_changes()

if __name__ == "__main__":
    flow.run()

Exemple #10

0

Afficher le fichier

    retry_delay=timedelta(minutes=1),
    nout=2,
    trigger=triggers.all_finished,
)
def create_parquet(_success):
    ts = prefect.context.scheduled_start_time
    dt_str = pd.to_datetime(ts).strftime("%Y-%m-%dT%H")
    vintage_fn = FN_STR.format(dt_str) + ".parquet"
    fn = FN_STR.format("") + ".parquet"

    df = pd.read_csv(CSV_FN, parse_dates=["dt"])
    df.to_parquet(DATA_PATH / vintage_fn, index=False)
    df.to_parquet(DATA_PATH / fn, index=False)
    return vintage_fn, fn


@task
def get_gcs_cmd(fn):
    return f"gsutil acl ch -u AllUsers:R gs://can-scrape-outputs/final/{fn}"


shell = ShellTask()
with Flow("UpdateParquetFiles", CronSchedule("10 */2 * * *")) as f:
    connstr = EnvVarSecret("COVID_DB_CONN_URI")
    success = export_to_csv(connstr)
    vintage_fn, fn = create_parquet(success)
    shell(get_gcs_cmd(vintage_fn))
    shell(get_gcs_cmd(fn))

f.register(project_name="can-scrape")

Exemple #11

0

Afficher le fichier

from datetime import datetime, timedelta
import pendulum
import prefect
from prefect import task, Flow
from prefect.schedules import IntervalSchedule, CronSchedule
import pandas as pd
from io import BytesIO
import zipfile
import requests

schedule = CronSchedule(cron="*/10 * * * *",
                        start_date=pendulum.datetime(2020,
                                                     11,
                                                     25,
                                                     18,
                                                     40,
                                                     tz="America/Sao_Paulo"))


@task
def get_raw_data():
    url = "http://download.inep.gov.br/microdados/Enade_Microdados/microdados_enade_2019.zip"
    filebytes = BytesIO(requests.get(url).content)

    myzip = zipfile.ZipFile(filebytes)
    myzip.extractall()
    path = './microdados_enade_2019/2019/3.DADOS/'
    return path


@task

Exemple #12

0

Afficher le fichier

    info_keys=["stargazers_count", "subscribers_count"],
    max_retries=1,
    retry_delay=datetime.timedelta(minutes=1),
)


@task
def process_stats(stats):
    data = {
        "Stars": stats["stargazers_count"],
        "Watchers": stats["subscribers_count"],
        "Date": pendulum.now("utc").isoformat(),
    }
    return data


airtable = WriteAirtableRow(
    base_key="XXXXXXX",
    table_name="Stars",
    max_retries=1,
    retry_delay=datetime.timedelta(minutes=1),
)
daily_schedule = CronSchedule("*/1 * * * *")


with Flow("Collect Repo Stats", schedule=daily_schedule) as flow:
    data = process_stats(repo_stats)
    final = airtable(data)


flow.run()

Exemple #13

0

Afficher le fichier

Fichier : schedules.py Projet : MTES-MCT/monitorfish

    fleet_segments,
    infractions,
    init_species_groups,
    last_positions,
    ports,
    species,
    vessels,
)

################################ Define flow schedules ################################
control_anteriority.flow.schedule = IntervalSchedule(interval=timedelta(
    hours=1))
current_segments.flow.schedule = IntervalSchedule(interval=timedelta(
    minutes=10))
ers.flow.schedule = IntervalSchedule(interval=timedelta(minutes=1))
fishing_gear_codes.flow.schedule = CronSchedule("0 3 * * *")
last_positions.flow.schedule = IntervalSchedule(interval=timedelta(minutes=1))
species.flow.schedule = CronSchedule("0 4 * * *")
vessels.flow.schedule = CronSchedule("0 2 * * *")

###################### List flows to register with prefect server #####################
flows_to_register = [
    controllers.flow,
    controls.flow,
    control_anteriority.flow,
    current_segments.flow,
    ers.flow,
    fishing_gear_codes.flow,
    fleet_segments.flow,
    infractions.flow,
    init_species_groups.flow,

Exemple #14

0

Afficher le fichier

Fichier : pref02.py Projet : mari0611/IGTI_Prefect

from datetime import datetime, timedelta
import pendulum
import prefect
from prefect import task, Flow
from prefect.schedules import CronSchedule
import pandas as pd
from io import BytesIO
import zipfile
import requests
import sqlalchemy
import pyodbc

schedule = CronSchedule(cron="*/10 * * * *",
                        start_date=pendulum.datetime(2020,
                                                     12,
                                                     1,
                                                     13,
                                                     45,
                                                     tz='America/Sao_Paulo'))


@task
def get_raw_data():
    url = "http://download.inep.gov.br/microdados/Enade_Microdados/microdados_enade_2019.zip",
    filebytes = BytesIO(requests.get(url).content)

    #extrair conteudo do zip
    myzip = zipfile.ZipeFile(filebytes)
    myzip.extractall()
    path = './microdados_enade_2019/2019/3.DADOS/'
    return path

Exemple #15

0

Afficher le fichier

from datetime import datetime, timedelta
import pendulum
import prefect
from prefect import task, Flow
from prefect.schedules import CronSchedule
import pandas as pd
from io import BytesIO
import zipfile
import requests

schedule = CronSchedule(cron="*/30 * * * *",
                        start_date=pendulum.datetime(2021,
                                                     3,
                                                     12,
                                                     17,
                                                     00,
                                                     tz='America/Sao_Paulo'))


@task
def get_raw_data():
    url = 'http://download.inep.gov.br/microdados/microdados_enem_2019.zip'
    filebytes = BytesIO(requests.get(url).content)

    logger = prefect.context.get('logger')
    logger.info('Dados obtidos')

    # Extrair o conteudo do zipfile
    myzip = zipfile.ZipFile(filebytes)
    myzip.extractall()
    path = './DADOS/'

Exemple #16

0

Afficher le fichier

Fichier : etl.py Projet : sungchun12/prefect-examples

from prefect.schedules import CronSchedule


@task
def extract():
    return [1, 2, 3, 50]


@task
def transform(x):
    return [i * 10 for i in x]


@task
def load(y):
    print("Received y: {}".format(y))


with Flow("ETL") as flow:
    e = extract()
    t = transform(e)
    l = load(t)
    schedule = CronSchedule("0 0 * * *")  # setup a cron scheduler

flow_state = flow.run()  # set the flow run to an object to track state

flow.visualize(
    flow_state=flow_state)  # visualize how the data moves throughout the DAG

#%%

Exemple #17

0

Afficher le fichier

from datetime import datetime, timedelta
import pendulum
import prefect
from prefect import task, Flow
from prefect.schedules import CronSchedule
import pandas as pd
from io import BytesIO
import zipfile
import requests
import mysql
import pymysql
import sqlalchemy
from sqlalchemy import create_engine

schedule = CronSchedule(
    cron="*/10 * * * * ",  # *minutos *horas *dia *mês *dia semana 
    start_date=pendulum.datetime(2020, 11, 26, 14, 25, tz='America/Sao_Paulo'))


@task
def get_raw_data():
    # Atribuindo o link a um objeto 'url'
    url = 'http://download.inep.gov.br/microdados/Enade_Microdados/microdados_enade_2019.zip'

    # Faz o download do conteúdo
    filebytes = BytesIO(requests.get(url).content)

    # Extrair o conteúdo do 'zipfile'
    myzip = zipfile.ZipFile(filebytes)
    myzip.extractall()
    path = './microdados_enade_2019/2019/3.DADOS/'

Exemple #18

0

Afficher le fichier

    repo="PrefectHQ/prefect",
    info_keys=["stargazers_count", "subscribers_count"],
    max_retries=1,
    retry_delay=datetime.timedelta(minutes=1),
)


@task
def process_stats(stats):
    data = {
        "Stars": stats["stargazers_count"],
        "Watchers": stats["subscribers_count"],
        "Date": pendulum.now("utc").isoformat(),
    }
    return data


airtable = WriteAirtableRow(
    base_key="XXXXXXX",
    table_name="Stars",
    max_retries=1,
    retry_delay=datetime.timedelta(minutes=1),
)
daily_schedule = CronSchedule("0 8 * * *")

with Flow("Collect Repo Stats", schedule=daily_schedule) as flow:
    data = process_stats(repo_stats)
    final = airtable(data)

flow.run()

Exemple #19

0

Afficher le fichier

Fichier : flow.py Projet : ooi-data/CE01ISSM-MFD35-02-PRESFA000-recovered_host-presf_abc_dcl_tide_measurement_recovered

    'labels': ['ecs-agent', 'ooi', 'prod'],
    'run_task_kwargs': {
        'cluster': 'prefectECSCluster',
        'launchType': 'FARGATE',
    },
}

project_name = "ooi-harvest"
data_org = "ooi-data"
config_json = yaml.safe_load(CONFIG_PATH.open())
flow_run_name = "-".join([
    config_json['instrument'],
    config_json['stream']['method'],
    config_json['stream']['name'],
])
schedule = CronSchedule(config_json['workflow_config']['schedule'])
run_config = ECSRun(**RUN_OPTIONS)

parent_run_opts = dict(**copy.deepcopy(RUN_OPTIONS))
parent_run_opts.update({'cpu': '0.5 vcpu', 'memory': '2 GB'})
parent_run_config = ECSRun(**parent_run_opts)

with Flow(flow_run_name, schedule=schedule,
          run_config=parent_run_config) as parent_flow:
    flow_run = create_flow_run(
        flow_name="stream_harvest",
        run_name=flow_run_name,
        project_name=project_name,
        parameters={
            'config': config_json,
            'error_test': False,

Exemple #20

0

Afficher le fichier

Fichier : deploy.py Projet : dylanbhughes/reddit-daily

from prefect import Client
from prefect.schedules import CronSchedule
from reddit_daily import flow

c = Client()
s = CronSchedule("0 * * * *")

flow.schedule = s

flow.deploy(project="Dylan's Project")

Exemple #21

0

Afficher le fichier

Fichier : parallelism.py Projet : gustavo32/DataEngineeringBootcamp

from datetime import datetime, timedelta
import pendulum
import prefect
from prefect import task, Flow
from prefect.schedules import CronSchedule
import pandas as pd
from io import BytesIO
import zipfile
import requests

schedule = CronSchedule(cron='*/10 * * * *',
                        start_date=pendulum.datetime(2020,
                                                     12,
                                                     5,
                                                     14,
                                                     tz="America/Sao_Paulo"))


@task
def get_raw_date():
    url = 'http://download.inep.gov.br/microdados/Enade_Microdados/microdados_enade_2019.zip'
    filebytes = BytesIO(requests.get(url).content)

    zipped = zipfile.ZipFile(filebytes)
    zipped.extractall()
    return './microdados_enade_2019/2019/3.DADOS/'


@task
def apply_filters(path):
    interested_cols = [

Exemple #22

0

Afficher le fichier

Fichier : pyoilfundyflows.py Projet : rwestoil/prefectflows

import sys
import prefect
from prefect import task, Flow, Parameter
from prefect.schedules import CronSchedule

sys.path.append('../pyoilfundy')

from pyoilfundy import fundyproducts as p

daily_7_sched = CronSchedule('0 7 * * 1-5')


def register_products_dash_flow():
    with Flow('product_by_region', schedule=daily_7_sched) as f:
        commods = ['lpg', 'naphtha', 'gasoline', 'diesel', 'jet', 'fueloil']
        p.make_specified_product_dash.map(commods)

    f.register(project_name='pyoilfundy')