Example #1
0
def main(cmd):
    # Create a prefect's flow object with some configuration
    flow_nwp_00 = create_flow_download(run=00, **settings)
    settings['max_concurrent_download'] = 5
    flow_nwp_12 = create_flow_download(run=12, **settings)

    if cmd in ("register", "trigger"):
        # Ensure the flow is well registered in prefect server
        for flow in flow_nwp_00, flow_nwp_12:
            r = flow.register(project_name=prefect_project_name)
            print(r)

        for flow in flow_nwp_00, flow_nwp_12:
            # For demo purpose, schedule a download each hour
            from prefect.schedules import Schedule
            from prefect.schedules.clocks import CronClock
            schedule = Schedule(clocks=[CronClock(f"0 * * * *")])
            flow.schedule = schedule
            r = flow.register(project_name=prefect_project_name)
            print(r)

    if cmd == "trigger":
        # Trigger the flow manually
        for flow in flow_nwp_00, flow_nwp_12:
            trigger_prefect_flow(
                flow_name=flow.name,
                run_name=f"{flow.name}-manually_triggered",
            )

    if cmd == "run":
        # Run a download from current process
        flow_nwp_00.schedule = None
        flow_nwp_00.run()
Example #2
0
def main(register, run):
    if register:
        schedule = Schedule(clocks=[CronClock("1 19 * * *")])
    else:
        schedule = None

    result = GCSResult(bucket='uuazed-prefect')
    with Flow("numerai-reports", schedule, result=result) as flow:
        filenames = fetch()
        upload_to_gcs(filenames)

    flow.storage = Docker(
        registry_url="gcr.io/numerai-171710",
        python_dependencies=['pandas', 'numerapi', 'pyarrow'],
        files={
            os.path.abspath("data.py"): "numerai_reports/data.py",
            os.path.abspath("settings.py"): "numerai_reports/settings.py",
            os.path.abspath("utils.py"): "numerai_reports/utils.py",
        },
        env_vars={"PYTHONPATH": "$PYTHONPATH:/"},
        secrets=["GCP_CREDENTIALS"])

    if register:
        flow.register(project_name="numerai", labels=["docker"])
    if run:
        flow.run()
Example #3
0
def init_schedule(start, stop, dates: Tuple, **interval: int) -> Schedule:
    date_now, _, _, date_stop_serializable, date_start_serializable = dates
    clock = IntervalClock(
        start_date=date_now,
        interval=timedelta(**interval),
        parameter_defaults={
            start.name: date_start_serializable,
            stop.name: date_stop_serializable
        }
    )
    return Schedule(clocks=[clock])
Example #4
0
def on_schedule(path_to_schedule):
    schedule_df = pd.read_csv(path_to_schedule)

    clocks = [
        CronClock(
            cron_str,
            parameter_defaults={"row_index": i},
            start_date=pendulum.now(f"America/{city}"),
        ) for i, (cron_str, city) in enumerate(
            zip(
                map(lambda x: chronstr_from_row(x[1]), schedule_df.iterrows()),
                schedule_df.time_zone_city,
            ))
    ]
    schedule = Schedule(clocks=clocks)
    with Flow("Send Habit Reminders", schedule) as flow:
        prefect_send_email(schedule_df, row_index)

    flow.run(parameters={'row_index': None})
from datetime import timedelta
from prefect import Flow
from prefect.schedules import Schedule
from prefect.schedules.clocks import IntervalClock

schedule = Schedule(clocks=[IntervalClock(timedelta(seconds=65))])

my_flow = Flow("Interval Test", schedule=schedule)

my_flow.register(project_name="experimental")
Example #6
0
    job_config.skip_leading_rows = 1
    job_config.autodetect = True
    job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

    with open(filename, "rb") as source_file:
        job = client.load_table_from_file(source_file,
                                          table_ref,
                                          job_config=job_config)

    job.result()  # Waits for table load to complete.

    print("Loaded {} rows into {}:{}.".format(job.output_rows, dataset_id,
                                              table_id))


daily_schedule = Schedule(clocks=[CronClock("00 1 * * *")])


@task
def create_final_table():
    global master_table_path, today_table_path

    death, confirm, recover = get_data()
    date_range = confirm.columns[4:]

    df_loc = confirm[[
        'Province/State',
        'Country/Region',
        'Lat',
        'Long',
    ]].reset_index().rename(columns={'index': 'loc_index'})
                    },
                },
                {
                    "type": "section",
                    "text": {
                        "type":
                        "mrkdwn",
                        "text":
                        f"See all the new stargazers <https://github.com/{owner}/{repository}/stargazers|here>",
                    },
                },
            ]
        }


schedule = Schedule(clocks=[IntervalClock(timedelta(minutes=5))],
                    filters=[is_weekday])
with Flow("Stargazers", schedule=schedule) as flow:
    """
    Tasks:
        Repository [Parameter]
        Owner [Parameter]
        GetStars
        ShouldNotify
        NotificationMessage
        Notify [SlackTask]
    """
    repository = Parameter("repository", default="prefect")
    owner = Parameter("owner", default="PrefectHQ")

    stars = GetStars(name="Get Stars",
                     max_retries=2,
    convos = episode.find_all('b') or episode.find_all('span',
                                                       {'class': 'char'})
    dialogue = []
    for item in convos:
        who = item.text.rstrip(': ').rstrip(' *').replace("'", "''")
        what = str(item.next_sibling).rstrip(' *').replace("'", "''")
        dialogue.append((who, what))
    return (title, dialogue)


with Flow(
        name="xfiles",
        schedule=Schedule(clocks=[
            # TODO: specify the schedule you want this to run, and with what parameters
            #  https://docs.prefect.io/core/concepts/schedules.html
            CronClock(cron='0 0 * * *',
                      parameter_defaults=dict(
                          url='http://www.insidethex.co.uk/')),
        ]),
        storage=Docker(
            # TODO: change to your docker registry:
            #  https://docs.prefect.io/cloud/recipes/configuring_storage.html
            registry_url='szelenka',
            # TODO: 'pin' the exact versions you used on your development machine
            python_dependencies=[
                'requests==2.23.0', 'beautifulsoup4==4.8.2',
                'sqlalchemy==1.3.15'
            ],
        ),
        # TODO: specify how you want to handle results
        #  https://docs.prefect.io/core/concepts/results.html#results-and-result-handlers
Example #9
0
import prefect
from prefect import task, Flow, Parameter
from prefect.schedules import clocks, Schedule
from prefect.environments.storage import GitHub


@task
def hello_world():
    print("Hello, World!")


clock = clocks.CronClock("0 0 * * *")
schedule = Schedule(clocks=[clock])
with Flow("Star GitHub Repositories", schedule=schedule) as flow:
    hello_world()

flow.storage = GitHub(
    repo="znicholasbrown/star-repos",
    path="app.py",
    secrets=["GITHUB_AUTH_TOKEN"
             ],  # Change this to your own GitHub auth token secret
)

flow.register(project_name="SOME PROJECT")
# flow.run()
        rating=rating,
        release_date=release_date,
        source_url=url
    )
    return data


with Flow(
        name="example-selenium",
        schedule=Schedule(
            clocks=[
                # TODO: specify the schedule you want this to run, and with what parameters
                #  https://docs.prefect.io/core/concepts/schedules.html
                CronClock(
                    cron='0 0 * * *',
                    parameter_defaults=dict(
                        home_page='https://www.metacritic.com/',
                        gaming_platform='Switch'
                    )
                ),
            ]
        ),
        storage=Docker(
            # TODO: change to your docker registry:
            #  https://docs.prefect.io/cloud/recipes/configuring_storage.html
            registry_url='szelenka',
            # TODO: need to specify a base Docker image which has the chromedriver dependencies already installed
            base_image='szelenka/python-selenium-chromium:3.7.4',
            # TODO: 'pin' the exact versions you used on your development machine
            python_dependencies=[
                'selenium==3.141.0',
Example #11
0
    return {'table_name':'daily_covid', 'table_data':filt_df}

@task
def cleanup_files():
    """Remove the source files to avoid cluttering the run directory"""

    logger = prefect.context.get("logger")
    for fn in DATASOURCE_JHU_GITHUB:
        if path.exists(DATASOURCE_JHU_GITHUB[fn]):
            os.remove(DATASOURCE_JHU_GITHUB[fn])
            logger.info(f'Removed:{DATASOURCE_JHU_GITHUB[fn]}')
    logger.info('Flow completed')

#run 4pm and 5pm daily
daily_schedule = Schedule(clocks=[CronClock("30 1,2 * * *")])

#Set up a prefect flow and run it on a schedule
with Flow('COVID 19 flow', schedule=daily_schedule) as flow:

    #extract tasks
    #daily_covid_us_states_data = extract_us_covid19sheets_data()
    file_dfs = extract_gh_global_covid_ts_data()
    
    #transform
    daily_covid_data = transform_daily_covid_data(file_dfs)
    
    #load - add a load task and include it here
    cleanup_files()

flow.run()
from prefect import Flow, task
from prefect.schedules import Schedule
from prefect.schedules.clocks import CronClock
import logging

logging.basicConfig(datefmt="")

os.environ["TZ"] = "US/Eastern"


@task
def DoSpyStuff():
    return "🕵🏽  Spy stuff done 🕵🏽"


agent_1_clock = CronClock(cron="* * * * *", labels=["Derek Flint"])
agent_2_clock = CronClock(cron="* * * * *", labels=["Evenlyn Salt"])
agent_3_clock = CronClock(cron="* * * * *", labels=["George Smiley"])

schedule = Schedule(clocks=[
    agent_1_clock,
    agent_2_clock,
    agent_3_clock,
])

with Flow("Mission: Possible (with labels)") as flow:
    DoSpyStuff()

flow.run()
Example #13
0
from prefect import Flow, Parameter
from prefect.schedules import Schedule
from prefect.schedules.clocks import CronClock

from iris.tasks import read_column, concatenate_columns, features_extraction, target_extraction, train

sepal_schedule = CronClock(
    '0 * * * *',
    parameter_defaults=dict(
        files=['col1.csv', 'col2.csv', 'target.csv']))  # Sepal DAG
petal_schedule = CronClock(
    '0 * * * *',
    parameter_defaults=dict(
        files=['col3.csv', 'col4.csv', 'target.csv']))  # Petal DAG
schedule = Schedule(clocks=[sepal_schedule, petal_schedule])

with Flow('Training', schedule=schedule) as flow:
    files = Parameter('files', default=['col1.csv', 'col2.csv', 'target.csv'])

    columns = read_column.map(files)
    dataset = concatenate_columns(columns)
    features = features_extraction(dataset)
    target = target_extraction(dataset)
    train(features, target)

if __name__ == '__main__':
    flow.register(project_name='airflow_prefect_contest')
Example #14
0
# print(f"Working Dir: {working_dir_path}")
# docker_flpth = os.path.join(working_dir_path, "Dockerfile")
# print(f"Docker flpth: {docker_flpth}")

# aws
aws_ecr_repo_name = dckr_image_name
aws_region = "us-east-2"


############## Schedule when to run the script ##############
schedule = Schedule(
    # fire every day
    clocks=[clocks.IntervalClock(
        start_date=pendulum.datetime(2020, 4, 22, 17, 30, tz="America/Toronto"),
        interval=timedelta(days=1)
        )],
    # but only on weekdays
    filters=[filters.is_weekday],

    # and not in January TODO: Add TSX Holidays
    not_filters=[filters.between_dates(1, 1, 1, 31)]
)

#tsx_imb_fl.schedule = schedule

############## Storage ecr docker flow ##############
dkr_ecr_scrt = PrefectSecret("docker_ecr_login").run()

get_ecr_auth_token = ShellTask(helper_script="cd ~")
ecr_auth_token = get_ecr_auth_token.run(command=dkr_ecr_scrt)

Example #15
0
from prefect import Flow, Parameter
from prefect.schedules import Schedule
from prefect.schedules.clocks import CronClock

a = Parameter('a', default=None, required=False)
b = Parameter('b', default=None, required=False)

schedule = Schedule(clocks=[
    CronClock(' 0 18  *  *  6', parameter_defaults={
        'a': 'a',
        'b': 'b'
    }),
    CronClock(' 0 12  *  *  0', parameter_defaults={
        'a': 'a',
        'b': 'b'
    })
])

flow = Flow(name='test flow', schedule=schedule)

# flow.add_task(a)
# flow.add_task(b)

flow.register(project_name="Demo")
Example #16
0
@task
def is_snowing_this_week(data):
    """
    Given a list of hourly forecasts, returns a boolean specifying
    whether there is snow in this week's forecast.
    """
    snow = [
        forecast["snow"].get("3h", 0) for forecast in data["list"] if "snow" in forecast
    ]
    if not sum([s >= 1 for s in snow]) >= 8:
        raise SKIP("There is not much snow in the forecast.")


notification = SlackTask(
    message="There is snow in the forecast for this week - it might be time to hit the slopes!",
    webhook_secret="DAVID_SLACK_URL",
)


storage = Docker(registry_url="joshmeek18", image_name="flows")

with Flow("Snow Flow", storage=storage, schedule=Schedule(
        clocks=[CronClock("0 18 * * 1-5", start_date=pendulum.now(tz="US/Pacific"))],
    )) as flow:
    forecast = pull_forecast(city=city, api_key=api_key)
    snow = is_snowing_this_week(forecast)
    notification.set_upstream(snow)

flow.register(project_name="Snow Flow")
Example #17
0
        repo.remote("origin").push("develop")
    else:
        repo.remote("origin").push("master")


@task
def get_projects(config):

    base_path = config["general"]["basepath"]
    projects = json.loads(config["general"]["repos"])

    project_paths = [os.path.join(base_path, project) for project in projects]

    return project_paths


schedule = Schedule(clocks=[CronClock("0 4 * * *")])

with Flow("Git-Push", schedule=schedule) as flow:

    # Load config
    config = load_config()

    # Get projects to sync
    projects = get_projects(config)

    # Commit and push every project
    commit_and_push.map(projects)

flow.register(project_name="Git-Sync")
Example #18
0
# docker_run_config = DockerRun(
#     env={"sample_key": "sample_value"},
#     labels=None,
# )
ecs_run_config = ECSRun(
    env={"sample_key": "sample_value"},
    labels=["fargate"],
    task_role_arn="arn:aws:iam::853106079885:role/ECSTaskS3ECRRole",
)
schedule = Schedule(
    clocks=[
        CronClock(
            "0 12 * * 1-5", 
            start_date=pendulum.now(tz="US/Pacific"), 
            parameter_defaults={"Redshift Table Name": "users"}
        ),
        CronClock(
            "0 12 * * 1-5", 
            start_date=pendulum.now(tz="US/Pacific"),
            parameter_defaults={"Redshift Table Name": "events"}
        ),
    ]
)


with Flow(
    "S3 to Redshift - ECS",
    storage=storage,
    schedule=schedule,
    run_config=ecs_run_config,
) as flow:
    # ----STAGE 1----
Example #19
0
@task(log_stdout=True)
def show_file():
    with Path("/tmp/flow.me") as fd:
        print(fd.read_text())


with Flow("three_little_tasks_flow") as flow:
    t1 = shelltask(command="echo '====== start' >> /tmp/flow.me")
    t2 = shelltask(command="date >> /tmp/flow.me; sleep 3")
    t3 = shelltask(command="echo '====== stop' >> /tmp/flow.me")

    t1.set_downstream(t2)
    t2.set_downstream(t3)
    t3.set_downstream(show_file)

if __name__ == "__main__":
    cmd = "run"
    if len(sys.argv) > 1:
        cmd = sys.argv[1]

    if cmd == "run":
        flow.run()

    if cmd == "schedule":
        flow.schedule = Schedule(clocks=[CronClock("* * * * *")])
        flow.run()

    if cmd == "register":
        flow.schedule = Schedule(clocks=[CronClock("* * * * *")])
        r = flow.register(project_name="demo")
Example #20
0
@task
def say_hello():
    print("hello world")


curr_schedule = Schedule(
    # Fire every min
    clocks=[
        schedules.clocks.IntervalClock(interval=timedelta(minutes=1),
                                       start_date=pendulum.datetime(
                                           2019, 1, 1, tz='America/New_York'))
    ],
    # Only on weekdays
    filters=[filters.is_weekday],
    # and only at 8.15am, 9.30am, 3.50pm, 4pm
    or_filters=[
        filters.between_times(pendulum.time(hour=8, minute=15),
                              pendulum.time(hour=8, minute=15)),
        filters.between_times(pendulum.time(hour=9, minute=30),
                              pendulum.time(hour=9, minute=30)),
        filters.between_times(pendulum.time(hour=10, minute=37),
                              pendulum.time(hour=10, minute=37)),
        filters.between_times(pendulum.time(hour=16), pendulum.time(hour=16)),
    ],
    # do not run on Christmas
    not_filters=[filters.between_dates(12, 25, 12, 25)])

with Flow('Sounds alerts', curr_schedule) as flow:
    say_hello()

flow.run()
Example #21
0
    random_index = random.randint(0, 499)
    return quote_list[random_index]


@task
def post_to_slack(quote):
    print(quote)
    # WEBHOOK = Secret("SLACK_WEBHOOK_URL").get()
    # r = requests.post(WEBHOOK, json={"text": quote})
    # r.raise_for_status()


with Flow(
        "motivational-flow",
        schedule=Schedule(clocks=[
            CronClock("0 8 * * 1-5", start_date=pendulum.now(tz="US/Pacific"))
        ], ),
        storage=Docker(
            registry_url="joshmeek18",
            image_name="flows",
            python_dependencies=["bs4", "lxml", "requests"],
        ),
) as flow:
    content = get_content()
    quote_list = get_quote_list(content)
    random_quote = get_random_quote(quote_list)
    post_to_slack(random_quote)

flow.run(run_on_schedule=False)
# flow.register(project_name="Motivation")
# print(datetime.fromtimestamp(pendulum.now(tz="US/Pacific").timestamp()))
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    if ats is not np.nan:
        tweet = TWEET_ATS.format(title=title,
                                 medium_link=medium_link,
                                 blog_link=blog_link,
                                 ats=ats.strip('"'),
                                 hashtags=hashtags)
    else:
        tweet = TWEET_NO_ATS.format(title=title,
                                    medium_link=medium_link,
                                    blog_link=blog_link,
                                    hashtags=hashtags)

    api.update_status(tweet)


# Tuesday at 8pm
schedule = Schedule(clocks=[CronClock("0 0 * * 3")])

with Flow("Send Tweet", schedule=schedule) as flow:

    title, medium_link, blog_link, hashtags, ats = get_blob_info()

    send_tweet(title, medium_link, blog_link, hashtags, ats)

flow.register(project_name="Blog-Tweeter")
            payment_temp = payments[payment]
            payment_temp["loan_id"] = payment
            payment_list.append(payment_temp)

        return sorted(payment_list, key=itemgetter("payment"), reverse=True)


class LogResult(Task):
    def run(self, res):
        return self.logger.info(pprint.pprint(res))


schedule = Schedule(
    clocks=[
        IntervalClock(interval=timedelta(weeks=1),
                      start_date=datetime.utcnow())
    ],
    filters=[between_times(time(hour=10), time(hour=23))],
)
with Flow(name="Loan Payoff Reminder", schedule=schedule) as flow:
    budget = Parameter("budget", default=3000)
    phone_number = Parameter("phone_number", default="+15707306535")

    # Google Parameters
    scopes = Parameter(
        "SCOPES", default=["https://www.googleapis.com/auth/spreadsheets"])
    spreadsheet_id = Parameter(
        "SPREADHSEET_ID",
        default="1Wb5Anty3nvaa0jHY7NEYz7i6jyIOQ5Q9z2GLqmLjmPA")
    project_id = Parameter("project_id", default="flows-270323")
    auth_provider_x509_cert_url = Parameter(
Example #24
0
    print(f"\nHere's your data: {data}")

from prefect.schedules import Schedule
from prefect.schedules.clocks import IntervalClock

clock1 = IntervalClock(
    start_date=datetime.now() + timedelta(seconds=5),
    interval=timedelta(hours=1),
    parameter_defaults={"length": 6}
)
clock2 = IntervalClock(
    start_date=datetime.now() + timedelta(seconds=15),
    interval=timedelta(hours=1),
    parameter_defaults={"length": 50}
)

schedule = Schedule(clocks=[clock1, clock2])

with Flow("Evolving ETL", schedule=schedule) as flow:
    with case(length, 6):
        e = extract(length)
        t = transform.map(e)
        l = load(t)

    with case(length, 50):
        e = extract(length)
        t = transform.map(e)
        t2 = transform.map(t)
        l = load(t2)

flow.run()
Example #25
0
from prefect import task, Flow, Task, Parameter
from prefect.schedules import clocks, Schedule

diurnal = ['rooster', 'dog']
nocturnal = ['owl', 'hampster']

# Clocks
diurnal_clock = clocks.CronClock("51 * * * *",
                                 parameter_defaults={"animals": diurnal})
nocturnal_clock = clocks.CronClock("53 * * * *",
                                   parameter_defaults={"animals": nocturnal})

# the full schedule
schedule = Schedule(clocks=[diurnal_clock, nocturnal_clock])


@task
def wakeup(animals):
    for item in animals:
        print("Waking up animal %s" % item)


# Flow is common to both types, though with different schedules.
with Flow(name="wakuptime", schedule=schedule) as this_flow:
    animals = Parameter("animals", default=[])
    wakeup(animals)

# will run on the schedule with varying parameter values
this_flow.register("Teste")
Example #26
0

@task(name="Relógio", log_stdout=True)
def imprimir_agendamento(alterado: bool = False):
    print(f"Essa execução ocorre a cada {4 if alterado else 13} minutos")


relogio_padrao = IntervalClock(
    start_date=pendulum.datetime(2021, 5, 27, 19, 30, tz="America/Sao_Paulo"),
    interval=timedelta(minutes=11),
)

relogio_alterado = IntervalClock(
    start_date=pendulum.datetime(2021, 5, 27, 19, 30, tz="America/Sao_Paulo"),
    interval=timedelta(minutes=3),
    parameter_defaults={"Alterado": True},
)

agendamento = Schedule(
    clocks=[relogio_padrao, relogio_alterado],
    filters=[is_weekday,
             between_times(pendulum.time(19), pendulum.time(23))],
)

with Flow("agendamento-avançado", schedule=agendamento) as flow:
    alterado = Parameter("Alterado", default=False, required=False)
    imprimir_agendamento(alterado)

if __name__ == "__main__":
    flow.register(project_name="extras")
Example #27
0
        if not r.ok:
            raise ValueError(
                "Slack notification for {} failed".format(tracked_obj))

    return new_state


@task
def get_start_date():
    return pendulum.naive(2018, 1, 17)


@task(state_handlers=[test_slack])
def calculate_weeks_since(start_date):
    current_date = pendulum.now().naive()
    return current_date.diff(start_date).in_weeks()


with Flow(
        "weeks-since",
        schedule=Schedule(clocks=[
            CronClock("30 11 * * 1",
                      start_date=pendulum.now("America/Toronto"))
        ]),
) as flow:
    start_date = get_start_date()
    calculate_weeks_since(start_date)

# flow.run(run_on_schedule=False)
flow.register(project_name="Demo")
Example #28
0
        axis="columns")

    stocks_html = (stocks.style.applymap(color_gains_loss,
                                         subset=["Difference"]).format({
                                             "Bought At":
                                             "${:20,.2f}",
                                             "Current Price":
                                             "${:20,.2f}",
                                             "Difference":
                                             "${:20,.2f}"
                                         }).hide_index())

    return message.format(stocks_html.render())


schedule = Schedule(clocks=[CronClock("0 21 * * 5")])

email_task = EmailTask(subject="Weekly Holdings Update")

with Flow("Stock-API", schedule=schedule) as flow:

    # Load config
    config = load_config()

    # Load the stocks + the initial value
    # CSV SCHEMA: stock,initial_value
    stocks = load_stocks()

    # Split the stocks into even groups of 5 as the Vantage API
    # only allows 5 api calls per minute
    split_stocks = split_stocks(stocks["stock"], 5)