Python CronSchedule 예제들, prefect.schedules.CronSchedule Python 예제들

예제 #1

0

파일 보기

파일: monitor_failing_scrapers.py 프로젝트: christopherturner/can-scrapers

def create_scraper_monitor_flow():
    # Run every day at 15 UTC (or 10 ET)
    schedule = CronSchedule("0 15 * * *")
    with Flow("MonitorFailingScrapers", schedule) as flow:
        slack_webhook_url = EnvVarSecret("SLACK_WEBHOOK_URL")
        run_monitor_scrapers(slack_webhook_url)

    return flow

예제 #2

0

파일 보기

파일: clean_sql.py 프로젝트: christopherturner/can-scrapers

def create_flow_for_table(table_name):
    sched = CronSchedule("50 */2 * * *")
    tn = f"data.{table_name}"
    sn = f"{tn}_id_seq"
    with Flow(f"clean-sql-{table_name}", sched) as flow:
        connstr = EnvVarSecret("COVID_DB_CONN_URI")
        ready = truncate_table(connstr, tn)
        reset_sequence(connstr, sn, ready)

    return flow

예제 #3

0

파일 보기

def main(project_name):
    schedule = CronSchedule('0 * * * *')
    config = Config()
    with Flow('Purple Air hourly download flow', schedule) as flow:
        run_time = get_run_time
        fetch_result = fetch_results(config)
        transform_result = transform_results(config, fetch_result)
        write_results(config, transform_result, run_time)
    print(flow.register(project_name=project_name))
    flow.run_agent()

예제 #4

0

파일 보기

def create_cdc_all_states_flow():
    """Creates a flow that runs the CDC data update on all states."""
    sched = CronSchedule("17 */4 * * *")

    flow = Flow("CDCAllStatesDataUpdate", sched)
    for state in ALL_STATES_PLUS_DC:
        task = StartFlowRun(
            flow_name=CDCCovidDataTracker.__name__,
            project_name="can-scrape",
            wait=True,
            parameters={"state": state.abbr},
        )
        flow.add_task(task)

    return flow

예제 #5

0

파일 보기

파일: generated_flows.py 프로젝트: MattSidor/can-scrapers

def create_main_flow(flows: List[Flow], project_name):
    schedule = CronSchedule("0 */3 * * *")

    with Flow("MainFlow", schedule) as main_flow:
        tasks = []
        for flow in flows:
            task = StartFlowRun(flow_name=flow.name,
                                project_name=project_name,
                                wait=True)
            tasks.append(task)

        parquet_flow = StartFlowRun(flow_name="UpdateParquetFiles",
                                    project_name=project_name,
                                    wait=True)

        for task in tasks:
            task.set_downstream(parquet_flow)

    return main_flow

예제 #6

0

파일 보기

파일: test_sensor.py 프로젝트: vishalbelsare/FlowKit

def test_run_workflow_ignores_schedule(test_logger):
    """
    Test that run_workflow ignores the workflow's schedule.
    """
    function_mock = create_autospec(lambda dummy_param: None)
    # Flow with no more scheduled runs
    with prefect.Flow(
            "Dummy_workflow",
            schedule=CronSchedule("0 0 * * *",
                                  end_date=pendulum.now().subtract(days=2)),
    ) as dummy_workflow:
        dummy_param = prefect.Parameter("dummy_param")
        FunctionTask(function_mock)(dummy_param=dummy_param)

    with prefect.context(logger=test_logger):
        run_workflow.run(
            parametrised_workflow=(dummy_workflow,
                                   dict(dummy_param="DUMMY_VALUE")))
    function_mock.assert_called_once_with(dummy_param="DUMMY_VALUE")

예제 #7

0

파일 보기

def create_flow_for_scraper(ix: int, cls: Type[DatasetBase]):
    sched = CronSchedule(f"{ix % 60} */4 * * *")

    with Flow(cls.__name__, sched) as flow:
        connstr = EnvVarSecret("COVID_DB_CONN_URI")
        sentry_dsn = EnvVarSecret("SENTRY_DSN")
        sentry_sdk_task = initialize_sentry(sentry_dsn)

        d = create_scraper(cls)
        fetched = fetch(d)
        normalized = normalize(d)
        validated = validate(d)
        done = put(d, connstr)

        d.set_upstream(sentry_sdk_task)
        normalized.set_upstream(fetched)
        validated.set_upstream(normalized)
        done.set_upstream(validated)

    return flow

예제 #8

0

파일 보기

파일: custom_fields.py 프로젝트: shreyasgm/FlowKit

    def _deserialize(self, value, attr, data,
                     **kwargs) -> "prefect.schedules.schedules.Schedule":
        """
        Deserialise a cron string as a cron schedule.

        Returns
        -------
        Schedule
            Prefect CronSchedule to run a flow according to the schedule
            defined by the input string.
        
        Raises
        ------
        ValidationError
            if the input value is not a valid cron string or None
        """
        cron_string = super()._deserialize(value, attr, data, **kwargs)
        try:
            schedule = CronSchedule(cron_string)
        except ValueError:
            raise ValidationError(f"Invalid cron string: '{cron_string}'.")
        return schedule

예제 #9

0

파일 보기

파일: etl.py 프로젝트: sticknor/hn_app

@task
def frontfill():
    now = datetime.datetime.now()
    pm.execute_notebook(
        "etl/hn_etl_front_fill.ipynb",
        "s3://python-portfolio-notebooks/hn_updates/frontfill" +
        str(now.year) + "-" + str(now.month) + "-" + str(now.day) + ".ipynb",
    )


@task
def test_changes():

    now = datetime.datetime.now()
    pm.execute_notebook(
        "etl/hn_data_test.ipynb",
        "s3://python-portfolio-notebooks/hn_updates/test" + str(now.year) +
        "-" + str(now.month) + "-" + str(now.day) + ".ipynb",
    )


with Flow("ETL", schedule=CronSchedule("0 9 * * *")) as flow:

    frontfill = frontfill()
    backfill = backfill()
    test_changes = test_changes()

if __name__ == "__main__":
    flow.run()

예제 #10

0

파일 보기

    retry_delay=timedelta(minutes=1),
    nout=2,
    trigger=triggers.all_finished,
)
def create_parquet(_success):
    ts = prefect.context.scheduled_start_time
    dt_str = pd.to_datetime(ts).strftime("%Y-%m-%dT%H")
    vintage_fn = FN_STR.format(dt_str) + ".parquet"
    fn = FN_STR.format("") + ".parquet"

    df = pd.read_csv(CSV_FN, parse_dates=["dt"])
    df.to_parquet(DATA_PATH / vintage_fn, index=False)
    df.to_parquet(DATA_PATH / fn, index=False)
    return vintage_fn, fn


@task
def get_gcs_cmd(fn):
    return f"gsutil acl ch -u AllUsers:R gs://can-scrape-outputs/final/{fn}"


shell = ShellTask()
with Flow("UpdateParquetFiles", CronSchedule("10 */2 * * *")) as f:
    connstr = EnvVarSecret("COVID_DB_CONN_URI")
    success = export_to_csv(connstr)
    vintage_fn, fn = create_parquet(success)
    shell(get_gcs_cmd(vintage_fn))
    shell(get_gcs_cmd(fn))

f.register(project_name="can-scrape")

예제 #11

0

파일 보기

from datetime import datetime, timedelta
import pendulum
import prefect
from prefect import task, Flow
from prefect.schedules import IntervalSchedule, CronSchedule
import pandas as pd
from io import BytesIO
import zipfile
import requests

schedule = CronSchedule(cron="*/10 * * * *",
                        start_date=pendulum.datetime(2020,
                                                     11,
                                                     25,
                                                     18,
                                                     40,
                                                     tz="America/Sao_Paulo"))


@task
def get_raw_data():
    url = "http://download.inep.gov.br/microdados/Enade_Microdados/microdados_enade_2019.zip"
    filebytes = BytesIO(requests.get(url).content)

    myzip = zipfile.ZipFile(filebytes)
    myzip.extractall()
    path = './microdados_enade_2019/2019/3.DADOS/'
    return path


@task

예제 #12

0

파일 보기

    info_keys=["stargazers_count", "subscribers_count"],
    max_retries=1,
    retry_delay=datetime.timedelta(minutes=1),
)


@task
def process_stats(stats):
    data = {
        "Stars": stats["stargazers_count"],
        "Watchers": stats["subscribers_count"],
        "Date": pendulum.now("utc").isoformat(),
    }
    return data


airtable = WriteAirtableRow(
    base_key="XXXXXXX",
    table_name="Stars",
    max_retries=1,
    retry_delay=datetime.timedelta(minutes=1),
)
daily_schedule = CronSchedule("*/1 * * * *")


with Flow("Collect Repo Stats", schedule=daily_schedule) as flow:
    data = process_stats(repo_stats)
    final = airtable(data)


flow.run()

예제 #13

0

파일 보기

파일: schedules.py 프로젝트: MTES-MCT/monitorfish

    fleet_segments,
    infractions,
    init_species_groups,
    last_positions,
    ports,
    species,
    vessels,
)

################################ Define flow schedules ################################
control_anteriority.flow.schedule = IntervalSchedule(interval=timedelta(
    hours=1))
current_segments.flow.schedule = IntervalSchedule(interval=timedelta(
    minutes=10))
ers.flow.schedule = IntervalSchedule(interval=timedelta(minutes=1))
fishing_gear_codes.flow.schedule = CronSchedule("0 3 * * *")
last_positions.flow.schedule = IntervalSchedule(interval=timedelta(minutes=1))
species.flow.schedule = CronSchedule("0 4 * * *")
vessels.flow.schedule = CronSchedule("0 2 * * *")

###################### List flows to register with prefect server #####################
flows_to_register = [
    controllers.flow,
    controls.flow,
    control_anteriority.flow,
    current_segments.flow,
    ers.flow,
    fishing_gear_codes.flow,
    fleet_segments.flow,
    infractions.flow,
    init_species_groups.flow,

예제 #14

0

파일 보기

파일: pref02.py 프로젝트: mari0611/IGTI_Prefect

from datetime import datetime, timedelta
import pendulum
import prefect
from prefect import task, Flow
from prefect.schedules import CronSchedule
import pandas as pd
from io import BytesIO
import zipfile
import requests
import sqlalchemy
import pyodbc

schedule = CronSchedule(cron="*/10 * * * *",
                        start_date=pendulum.datetime(2020,
                                                     12,
                                                     1,
                                                     13,
                                                     45,
                                                     tz='America/Sao_Paulo'))


@task
def get_raw_data():
    url = "http://download.inep.gov.br/microdados/Enade_Microdados/microdados_enade_2019.zip",
    filebytes = BytesIO(requests.get(url).content)

    #extrair conteudo do zip
    myzip = zipfile.ZipeFile(filebytes)
    myzip.extractall()
    path = './microdados_enade_2019/2019/3.DADOS/'
    return path

예제 #15

0

파일 보기

from datetime import datetime, timedelta
import pendulum
import prefect
from prefect import task, Flow
from prefect.schedules import CronSchedule
import pandas as pd
from io import BytesIO
import zipfile
import requests

schedule = CronSchedule(cron="*/30 * * * *",
                        start_date=pendulum.datetime(2021,
                                                     3,
                                                     12,
                                                     17,
                                                     00,
                                                     tz='America/Sao_Paulo'))


@task
def get_raw_data():
    url = 'http://download.inep.gov.br/microdados/microdados_enem_2019.zip'
    filebytes = BytesIO(requests.get(url).content)

    logger = prefect.context.get('logger')
    logger.info('Dados obtidos')

    # Extrair o conteudo do zipfile
    myzip = zipfile.ZipFile(filebytes)
    myzip.extractall()
    path = './DADOS/'

예제 #16

0

파일 보기

파일: etl.py 프로젝트: sungchun12/prefect-examples

from prefect.schedules import CronSchedule


@task
def extract():
    return [1, 2, 3, 50]


@task
def transform(x):
    return [i * 10 for i in x]


@task
def load(y):
    print("Received y: {}".format(y))


with Flow("ETL") as flow:
    e = extract()
    t = transform(e)
    l = load(t)
    schedule = CronSchedule("0 0 * * *")  # setup a cron scheduler

flow_state = flow.run()  # set the flow run to an object to track state

flow.visualize(
    flow_state=flow_state)  # visualize how the data moves throughout the DAG

#%%

예제 #17

0

파일 보기

from datetime import datetime, timedelta
import pendulum
import prefect
from prefect import task, Flow
from prefect.schedules import CronSchedule
import pandas as pd
from io import BytesIO
import zipfile
import requests
import mysql
import pymysql
import sqlalchemy
from sqlalchemy import create_engine

schedule = CronSchedule(
    cron="*/10 * * * * ",  # *minutos *horas *dia *mês *dia semana 
    start_date=pendulum.datetime(2020, 11, 26, 14, 25, tz='America/Sao_Paulo'))


@task
def get_raw_data():
    # Atribuindo o link a um objeto 'url'
    url = 'http://download.inep.gov.br/microdados/Enade_Microdados/microdados_enade_2019.zip'

    # Faz o download do conteúdo
    filebytes = BytesIO(requests.get(url).content)

    # Extrair o conteúdo do 'zipfile'
    myzip = zipfile.ZipFile(filebytes)
    myzip.extractall()
    path = './microdados_enade_2019/2019/3.DADOS/'

예제 #18

0

파일 보기

    repo="PrefectHQ/prefect",
    info_keys=["stargazers_count", "subscribers_count"],
    max_retries=1,
    retry_delay=datetime.timedelta(minutes=1),
)


@task
def process_stats(stats):
    data = {
        "Stars": stats["stargazers_count"],
        "Watchers": stats["subscribers_count"],
        "Date": pendulum.now("utc").isoformat(),
    }
    return data


airtable = WriteAirtableRow(
    base_key="XXXXXXX",
    table_name="Stars",
    max_retries=1,
    retry_delay=datetime.timedelta(minutes=1),
)
daily_schedule = CronSchedule("0 8 * * *")

with Flow("Collect Repo Stats", schedule=daily_schedule) as flow:
    data = process_stats(repo_stats)
    final = airtable(data)

flow.run()

예제 #19

0

파일 보기

파일: flow.py 프로젝트: ooi-data/CE01ISSM-MFD35-02-PRESFA000-recovered_host-presf_abc_dcl_tide_measurement_recovered

    'labels': ['ecs-agent', 'ooi', 'prod'],
    'run_task_kwargs': {
        'cluster': 'prefectECSCluster',
        'launchType': 'FARGATE',
    },
}

project_name = "ooi-harvest"
data_org = "ooi-data"
config_json = yaml.safe_load(CONFIG_PATH.open())
flow_run_name = "-".join([
    config_json['instrument'],
    config_json['stream']['method'],
    config_json['stream']['name'],
])
schedule = CronSchedule(config_json['workflow_config']['schedule'])
run_config = ECSRun(**RUN_OPTIONS)

parent_run_opts = dict(**copy.deepcopy(RUN_OPTIONS))
parent_run_opts.update({'cpu': '0.5 vcpu', 'memory': '2 GB'})
parent_run_config = ECSRun(**parent_run_opts)

with Flow(flow_run_name, schedule=schedule,
          run_config=parent_run_config) as parent_flow:
    flow_run = create_flow_run(
        flow_name="stream_harvest",
        run_name=flow_run_name,
        project_name=project_name,
        parameters={
            'config': config_json,
            'error_test': False,

예제 #20

0

파일 보기

파일: deploy.py 프로젝트: dylanbhughes/reddit-daily

from prefect import Client
from prefect.schedules import CronSchedule
from reddit_daily import flow

c = Client()
s = CronSchedule("0 * * * *")

flow.schedule = s

flow.deploy(project="Dylan's Project")

예제 #21

0

파일 보기

파일: parallelism.py 프로젝트: gustavo32/DataEngineeringBootcamp

from datetime import datetime, timedelta
import pendulum
import prefect
from prefect import task, Flow
from prefect.schedules import CronSchedule
import pandas as pd
from io import BytesIO
import zipfile
import requests

schedule = CronSchedule(cron='*/10 * * * *',
                        start_date=pendulum.datetime(2020,
                                                     12,
                                                     5,
                                                     14,
                                                     tz="America/Sao_Paulo"))


@task
def get_raw_date():
    url = 'http://download.inep.gov.br/microdados/Enade_Microdados/microdados_enade_2019.zip'
    filebytes = BytesIO(requests.get(url).content)

    zipped = zipfile.ZipFile(filebytes)
    zipped.extractall()
    return './microdados_enade_2019/2019/3.DADOS/'


@task
def apply_filters(path):
    interested_cols = [

예제 #22

0

파일 보기

파일: pyoilfundyflows.py 프로젝트: rwestoil/prefectflows

import sys
import prefect
from prefect import task, Flow, Parameter
from prefect.schedules import CronSchedule

sys.path.append('../pyoilfundy')

from pyoilfundy import fundyproducts as p

daily_7_sched = CronSchedule('0 7 * * 1-5')


def register_products_dash_flow():
    with Flow('product_by_region', schedule=daily_7_sched) as f:
        commods = ['lpg', 'naphtha', 'gasoline', 'diesel', 'jet', 'fueloil']
        p.make_specified_product_dash.map(commods)

    f.register(project_name='pyoilfundy')