Exemple #1
0
def main(cmd):
    # Create a prefect's flow object with some configuration
    flow_nwp_00 = create_flow_download(run=00, **settings)
    settings['max_concurrent_download'] = 5
    flow_nwp_12 = create_flow_download(run=12, **settings)

    if cmd in ("register", "trigger"):
        # Ensure the flow is well registered in prefect server
        for flow in flow_nwp_00, flow_nwp_12:
            r = flow.register(project_name=prefect_project_name)
            print(r)

        for flow in flow_nwp_00, flow_nwp_12:
            # For demo purpose, schedule a download each hour
            from prefect.schedules import Schedule
            from prefect.schedules.clocks import CronClock
            schedule = Schedule(clocks=[CronClock(f"0 * * * *")])
            flow.schedule = schedule
            r = flow.register(project_name=prefect_project_name)
            print(r)

    if cmd == "trigger":
        # Trigger the flow manually
        for flow in flow_nwp_00, flow_nwp_12:
            trigger_prefect_flow(
                flow_name=flow.name,
                run_name=f"{flow.name}-manually_triggered",
            )

    if cmd == "run":
        # Run a download from current process
        flow_nwp_00.schedule = None
        flow_nwp_00.run()
Exemple #2
0
def main(register, run):
    if register:
        schedule = Schedule(clocks=[CronClock("1 19 * * *")])
    else:
        schedule = None

    result = GCSResult(bucket='uuazed-prefect')
    with Flow("numerai-reports", schedule, result=result) as flow:
        filenames = fetch()
        upload_to_gcs(filenames)

    flow.storage = Docker(
        registry_url="gcr.io/numerai-171710",
        python_dependencies=['pandas', 'numerapi', 'pyarrow'],
        files={
            os.path.abspath("data.py"): "numerai_reports/data.py",
            os.path.abspath("settings.py"): "numerai_reports/settings.py",
            os.path.abspath("utils.py"): "numerai_reports/utils.py",
        },
        env_vars={"PYTHONPATH": "$PYTHONPATH:/"},
        secrets=["GCP_CREDENTIALS"])

    if register:
        flow.register(project_name="numerai", labels=["docker"])
    if run:
        flow.run()
Exemple #3
0
def on_schedule(path_to_schedule):
    schedule_df = pd.read_csv(path_to_schedule)

    clocks = [
        CronClock(
            cron_str,
            parameter_defaults={"row_index": i},
            start_date=pendulum.now(f"America/{city}"),
        ) for i, (cron_str, city) in enumerate(
            zip(
                map(lambda x: chronstr_from_row(x[1]), schedule_df.iterrows()),
                schedule_df.time_zone_city,
            ))
    ]
    schedule = Schedule(clocks=clocks)
    with Flow("Send Habit Reminders", schedule) as flow:
        prefect_send_email(schedule_df, row_index)

    flow.run(parameters={'row_index': None})
                                                       {'class': 'char'})
    dialogue = []
    for item in convos:
        who = item.text.rstrip(': ').rstrip(' *').replace("'", "''")
        what = str(item.next_sibling).rstrip(' *').replace("'", "''")
        dialogue.append((who, what))
    return (title, dialogue)


with Flow(
        name="xfiles",
        schedule=Schedule(clocks=[
            # TODO: specify the schedule you want this to run, and with what parameters
            #  https://docs.prefect.io/core/concepts/schedules.html
            CronClock(cron='0 0 * * *',
                      parameter_defaults=dict(
                          url='http://www.insidethex.co.uk/')),
        ]),
        storage=Docker(
            # TODO: change to your docker registry:
            #  https://docs.prefect.io/cloud/recipes/configuring_storage.html
            registry_url='szelenka',
            # TODO: 'pin' the exact versions you used on your development machine
            python_dependencies=[
                'requests==2.23.0', 'beautifulsoup4==4.8.2',
                'sqlalchemy==1.3.15'
            ],
        ),
        # TODO: specify how you want to handle results
        #  https://docs.prefect.io/core/concepts/results.html#results-and-result-handlers
        result_handler=GCSResultHandler(bucket='prefect_results')) as flow:
Exemple #5
0
    # only an extreme poweruser should use this ^
)
# docker_run_config = DockerRun(
#     env={"sample_key": "sample_value"},
#     labels=None,
# )
ecs_run_config = ECSRun(
    env={"sample_key": "sample_value"},
    labels=["fargate"],
    task_role_arn="arn:aws:iam::853106079885:role/ECSTaskS3ECRRole",
)
schedule = Schedule(
    clocks=[
        CronClock(
            "0 12 * * 1-5", 
            start_date=pendulum.now(tz="US/Pacific"), 
            parameter_defaults={"Redshift Table Name": "users"}
        ),
        CronClock(
            "0 12 * * 1-5", 
            start_date=pendulum.now(tz="US/Pacific"),
            parameter_defaults={"Redshift Table Name": "events"}
        ),
    ]
)


with Flow(
    "S3 to Redshift - ECS",
    storage=storage,
    schedule=schedule,
Exemple #6
0
    random_index = random.randint(0, 499)
    return quote_list[random_index]


@task
def post_to_slack(quote):
    print(quote)
    # WEBHOOK = Secret("SLACK_WEBHOOK_URL").get()
    # r = requests.post(WEBHOOK, json={"text": quote})
    # r.raise_for_status()


with Flow(
        "motivational-flow",
        schedule=Schedule(clocks=[
            CronClock("0 8 * * 1-5", start_date=pendulum.now(tz="US/Pacific"))
        ], ),
        storage=Docker(
            registry_url="joshmeek18",
            image_name="flows",
            python_dependencies=["bs4", "lxml", "requests"],
        ),
) as flow:
    content = get_content()
    quote_list = get_quote_list(content)
    random_quote = get_random_quote(quote_list)
    post_to_slack(random_quote)

flow.run(run_on_schedule=False)
# flow.register(project_name="Motivation")
# print(datetime.fromtimestamp(pendulum.now(tz="US/Pacific").timestamp()))
        release_date=release_date,
        source_url=url
    )
    return data


with Flow(
        name="example-selenium",
        schedule=Schedule(
            clocks=[
                # TODO: specify the schedule you want this to run, and with what parameters
                #  https://docs.prefect.io/core/concepts/schedules.html
                CronClock(
                    cron='0 0 * * *',
                    parameter_defaults=dict(
                        home_page='https://www.metacritic.com/',
                        gaming_platform='Switch'
                    )
                ),
            ]
        ),
        storage=Docker(
            # TODO: change to your docker registry:
            #  https://docs.prefect.io/cloud/recipes/configuring_storage.html
            registry_url='szelenka',
            # TODO: need to specify a base Docker image which has the chromedriver dependencies already installed
            base_image='szelenka/python-selenium-chromium:3.7.4',
            # TODO: 'pin' the exact versions you used on your development machine
            python_dependencies=[
                'selenium==3.141.0',
                'sqlalchemy==1.3.15'
Exemple #8
0
@task(log_stdout=True)
def show_file():
    with Path("/tmp/flow.me") as fd:
        print(fd.read_text())


with Flow("three_little_tasks_flow") as flow:
    t1 = shelltask(command="echo '====== start' >> /tmp/flow.me")
    t2 = shelltask(command="date >> /tmp/flow.me; sleep 3")
    t3 = shelltask(command="echo '====== stop' >> /tmp/flow.me")

    t1.set_downstream(t2)
    t2.set_downstream(t3)
    t3.set_downstream(show_file)

if __name__ == "__main__":
    cmd = "run"
    if len(sys.argv) > 1:
        cmd = sys.argv[1]

    if cmd == "run":
        flow.run()

    if cmd == "schedule":
        flow.schedule = Schedule(clocks=[CronClock("* * * * *")])
        flow.run()

    if cmd == "register":
        flow.schedule = Schedule(clocks=[CronClock("* * * * *")])
        r = flow.register(project_name="demo")
Exemple #9
0
        repo.remote("origin").push("develop")
    else:
        repo.remote("origin").push("master")


@task
def get_projects(config):

    base_path = config["general"]["basepath"]
    projects = json.loads(config["general"]["repos"])

    project_paths = [os.path.join(base_path, project) for project in projects]

    return project_paths


schedule = Schedule(clocks=[CronClock("0 4 * * *")])

with Flow("Git-Push", schedule=schedule) as flow:

    # Load config
    config = load_config()

    # Get projects to sync
    projects = get_projects(config)

    # Commit and push every project
    commit_and_push.map(projects)

flow.register(project_name="Git-Sync")
Exemple #10
0
    job_config.skip_leading_rows = 1
    job_config.autodetect = True
    job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

    with open(filename, "rb") as source_file:
        job = client.load_table_from_file(source_file,
                                          table_ref,
                                          job_config=job_config)

    job.result()  # Waits for table load to complete.

    print("Loaded {} rows into {}:{}.".format(job.output_rows, dataset_id,
                                              table_id))


daily_schedule = Schedule(clocks=[CronClock("00 1 * * *")])


@task
def create_final_table():
    global master_table_path, today_table_path

    death, confirm, recover = get_data()
    date_range = confirm.columns[4:]

    df_loc = confirm[[
        'Province/State',
        'Country/Region',
        'Lat',
        'Long',
    ]].reset_index().rename(columns={'index': 'loc_index'})
    api = tweepy.API(auth,
                     wait_on_rate_limit=True,
                     wait_on_rate_limit_notify=True)

    if ats is not np.nan:
        tweet = TWEET_ATS.format(title=title,
                                 medium_link=medium_link,
                                 blog_link=blog_link,
                                 ats=ats.strip('"'),
                                 hashtags=hashtags)
    else:
        tweet = TWEET_NO_ATS.format(title=title,
                                    medium_link=medium_link,
                                    blog_link=blog_link,
                                    hashtags=hashtags)

    api.update_status(tweet)


# Tuesday at 8pm
schedule = Schedule(clocks=[CronClock("0 0 * * 3")])

with Flow("Send Tweet", schedule=schedule) as flow:

    title, medium_link, blog_link, hashtags, ats = get_blob_info()

    send_tweet(title, medium_link, blog_link, hashtags, ats)

flow.register(project_name="Blog-Tweeter")
Exemple #12
0
from prefect import Flow, Parameter
from prefect.schedules import Schedule
from prefect.schedules.clocks import CronClock

from iris.tasks import read_column, concatenate_columns, features_extraction, target_extraction, train

sepal_schedule = CronClock(
    '0 * * * *',
    parameter_defaults=dict(
        files=['col1.csv', 'col2.csv', 'target.csv']))  # Sepal DAG
petal_schedule = CronClock(
    '0 * * * *',
    parameter_defaults=dict(
        files=['col3.csv', 'col4.csv', 'target.csv']))  # Petal DAG
schedule = Schedule(clocks=[sepal_schedule, petal_schedule])

with Flow('Training', schedule=schedule) as flow:
    files = Parameter('files', default=['col1.csv', 'col2.csv', 'target.csv'])

    columns = read_column.map(files)
    dataset = concatenate_columns(columns)
    features = features_extraction(dataset)
    target = target_extraction(dataset)
    train(features, target)

if __name__ == '__main__':
    flow.register(project_name='airflow_prefect_contest')
from prefect import Flow, task
from prefect.schedules import Schedule
from prefect.schedules.clocks import CronClock
import logging

logging.basicConfig(datefmt="")

os.environ["TZ"] = "US/Eastern"


@task
def DoSpyStuff():
    return "🕵🏽  Spy stuff done 🕵🏽"


agent_1_clock = CronClock(cron="* * * * *", labels=["Derek Flint"])
agent_2_clock = CronClock(cron="* * * * *", labels=["Evenlyn Salt"])
agent_3_clock = CronClock(cron="* * * * *", labels=["George Smiley"])

schedule = Schedule(clocks=[
    agent_1_clock,
    agent_2_clock,
    agent_3_clock,
])

with Flow("Mission: Possible (with labels)") as flow:
    DoSpyStuff()

flow.run()
Exemple #14
0
    return {'table_name':'daily_covid', 'table_data':filt_df}

@task
def cleanup_files():
    """Remove the source files to avoid cluttering the run directory"""

    logger = prefect.context.get("logger")
    for fn in DATASOURCE_JHU_GITHUB:
        if path.exists(DATASOURCE_JHU_GITHUB[fn]):
            os.remove(DATASOURCE_JHU_GITHUB[fn])
            logger.info(f'Removed:{DATASOURCE_JHU_GITHUB[fn]}')
    logger.info('Flow completed')

#run 4pm and 5pm daily
daily_schedule = Schedule(clocks=[CronClock("30 1,2 * * *")])

#Set up a prefect flow and run it on a schedule
with Flow('COVID 19 flow', schedule=daily_schedule) as flow:

    #extract tasks
    #daily_covid_us_states_data = extract_us_covid19sheets_data()
    file_dfs = extract_gh_global_covid_ts_data()
    
    #transform
    daily_covid_data = transform_daily_covid_data(file_dfs)
    
    #load - add a load task and include it here
    cleanup_files()

flow.run()
Exemple #15
0
from prefect import Flow, Parameter
from prefect.schedules import Schedule
from prefect.schedules.clocks import CronClock

a = Parameter('a', default=None, required=False)
b = Parameter('b', default=None, required=False)

schedule = Schedule(clocks=[
    CronClock(' 0 18  *  *  6', parameter_defaults={
        'a': 'a',
        'b': 'b'
    }),
    CronClock(' 0 12  *  *  0', parameter_defaults={
        'a': 'a',
        'b': 'b'
    })
])

flow = Flow(name='test flow', schedule=schedule)

# flow.add_task(a)
# flow.add_task(b)

flow.register(project_name="Demo")
Exemple #16
0
        if not r.ok:
            raise ValueError(
                "Slack notification for {} failed".format(tracked_obj))

    return new_state


@task
def get_start_date():
    return pendulum.naive(2018, 1, 17)


@task(state_handlers=[test_slack])
def calculate_weeks_since(start_date):
    current_date = pendulum.now().naive()
    return current_date.diff(start_date).in_weeks()


with Flow(
        "weeks-since",
        schedule=Schedule(clocks=[
            CronClock("30 11 * * 1",
                      start_date=pendulum.now("America/Toronto"))
        ]),
) as flow:
    start_date = get_start_date()
    calculate_weeks_since(start_date)

# flow.run(run_on_schedule=False)
flow.register(project_name="Demo")
Exemple #17
0
        axis="columns")

    stocks_html = (stocks.style.applymap(color_gains_loss,
                                         subset=["Difference"]).format({
                                             "Bought At":
                                             "${:20,.2f}",
                                             "Current Price":
                                             "${:20,.2f}",
                                             "Difference":
                                             "${:20,.2f}"
                                         }).hide_index())

    return message.format(stocks_html.render())


schedule = Schedule(clocks=[CronClock("0 21 * * 5")])

email_task = EmailTask(subject="Weekly Holdings Update")

with Flow("Stock-API", schedule=schedule) as flow:

    # Load config
    config = load_config()

    # Load the stocks + the initial value
    # CSV SCHEMA: stock,initial_value
    stocks = load_stocks()

    # Split the stocks into even groups of 5 as the Vantage API
    # only allows 5 api calls per minute
    split_stocks = split_stocks(stocks["stock"], 5)