def main(cmd): # Create a prefect's flow object with some configuration flow_nwp_00 = create_flow_download(run=00, **settings) settings['max_concurrent_download'] = 5 flow_nwp_12 = create_flow_download(run=12, **settings) if cmd in ("register", "trigger"): # Ensure the flow is well registered in prefect server for flow in flow_nwp_00, flow_nwp_12: r = flow.register(project_name=prefect_project_name) print(r) for flow in flow_nwp_00, flow_nwp_12: # For demo purpose, schedule a download each hour from prefect.schedules import Schedule from prefect.schedules.clocks import CronClock schedule = Schedule(clocks=[CronClock(f"0 * * * *")]) flow.schedule = schedule r = flow.register(project_name=prefect_project_name) print(r) if cmd == "trigger": # Trigger the flow manually for flow in flow_nwp_00, flow_nwp_12: trigger_prefect_flow( flow_name=flow.name, run_name=f"{flow.name}-manually_triggered", ) if cmd == "run": # Run a download from current process flow_nwp_00.schedule = None flow_nwp_00.run()
def main(register, run): if register: schedule = Schedule(clocks=[CronClock("1 19 * * *")]) else: schedule = None result = GCSResult(bucket='uuazed-prefect') with Flow("numerai-reports", schedule, result=result) as flow: filenames = fetch() upload_to_gcs(filenames) flow.storage = Docker( registry_url="gcr.io/numerai-171710", python_dependencies=['pandas', 'numerapi', 'pyarrow'], files={ os.path.abspath("data.py"): "numerai_reports/data.py", os.path.abspath("settings.py"): "numerai_reports/settings.py", os.path.abspath("utils.py"): "numerai_reports/utils.py", }, env_vars={"PYTHONPATH": "$PYTHONPATH:/"}, secrets=["GCP_CREDENTIALS"]) if register: flow.register(project_name="numerai", labels=["docker"]) if run: flow.run()
def on_schedule(path_to_schedule): schedule_df = pd.read_csv(path_to_schedule) clocks = [ CronClock( cron_str, parameter_defaults={"row_index": i}, start_date=pendulum.now(f"America/{city}"), ) for i, (cron_str, city) in enumerate( zip( map(lambda x: chronstr_from_row(x[1]), schedule_df.iterrows()), schedule_df.time_zone_city, )) ] schedule = Schedule(clocks=clocks) with Flow("Send Habit Reminders", schedule) as flow: prefect_send_email(schedule_df, row_index) flow.run(parameters={'row_index': None})
{'class': 'char'}) dialogue = [] for item in convos: who = item.text.rstrip(': ').rstrip(' *').replace("'", "''") what = str(item.next_sibling).rstrip(' *').replace("'", "''") dialogue.append((who, what)) return (title, dialogue) with Flow( name="xfiles", schedule=Schedule(clocks=[ # TODO: specify the schedule you want this to run, and with what parameters # https://docs.prefect.io/core/concepts/schedules.html CronClock(cron='0 0 * * *', parameter_defaults=dict( url='http://www.insidethex.co.uk/')), ]), storage=Docker( # TODO: change to your docker registry: # https://docs.prefect.io/cloud/recipes/configuring_storage.html registry_url='szelenka', # TODO: 'pin' the exact versions you used on your development machine python_dependencies=[ 'requests==2.23.0', 'beautifulsoup4==4.8.2', 'sqlalchemy==1.3.15' ], ), # TODO: specify how you want to handle results # https://docs.prefect.io/core/concepts/results.html#results-and-result-handlers result_handler=GCSResultHandler(bucket='prefect_results')) as flow:
# only an extreme poweruser should use this ^ ) # docker_run_config = DockerRun( # env={"sample_key": "sample_value"}, # labels=None, # ) ecs_run_config = ECSRun( env={"sample_key": "sample_value"}, labels=["fargate"], task_role_arn="arn:aws:iam::853106079885:role/ECSTaskS3ECRRole", ) schedule = Schedule( clocks=[ CronClock( "0 12 * * 1-5", start_date=pendulum.now(tz="US/Pacific"), parameter_defaults={"Redshift Table Name": "users"} ), CronClock( "0 12 * * 1-5", start_date=pendulum.now(tz="US/Pacific"), parameter_defaults={"Redshift Table Name": "events"} ), ] ) with Flow( "S3 to Redshift - ECS", storage=storage, schedule=schedule,
random_index = random.randint(0, 499) return quote_list[random_index] @task def post_to_slack(quote): print(quote) # WEBHOOK = Secret("SLACK_WEBHOOK_URL").get() # r = requests.post(WEBHOOK, json={"text": quote}) # r.raise_for_status() with Flow( "motivational-flow", schedule=Schedule(clocks=[ CronClock("0 8 * * 1-5", start_date=pendulum.now(tz="US/Pacific")) ], ), storage=Docker( registry_url="joshmeek18", image_name="flows", python_dependencies=["bs4", "lxml", "requests"], ), ) as flow: content = get_content() quote_list = get_quote_list(content) random_quote = get_random_quote(quote_list) post_to_slack(random_quote) flow.run(run_on_schedule=False) # flow.register(project_name="Motivation") # print(datetime.fromtimestamp(pendulum.now(tz="US/Pacific").timestamp()))
release_date=release_date, source_url=url ) return data with Flow( name="example-selenium", schedule=Schedule( clocks=[ # TODO: specify the schedule you want this to run, and with what parameters # https://docs.prefect.io/core/concepts/schedules.html CronClock( cron='0 0 * * *', parameter_defaults=dict( home_page='https://www.metacritic.com/', gaming_platform='Switch' ) ), ] ), storage=Docker( # TODO: change to your docker registry: # https://docs.prefect.io/cloud/recipes/configuring_storage.html registry_url='szelenka', # TODO: need to specify a base Docker image which has the chromedriver dependencies already installed base_image='szelenka/python-selenium-chromium:3.7.4', # TODO: 'pin' the exact versions you used on your development machine python_dependencies=[ 'selenium==3.141.0', 'sqlalchemy==1.3.15'
@task(log_stdout=True) def show_file(): with Path("/tmp/flow.me") as fd: print(fd.read_text()) with Flow("three_little_tasks_flow") as flow: t1 = shelltask(command="echo '====== start' >> /tmp/flow.me") t2 = shelltask(command="date >> /tmp/flow.me; sleep 3") t3 = shelltask(command="echo '====== stop' >> /tmp/flow.me") t1.set_downstream(t2) t2.set_downstream(t3) t3.set_downstream(show_file) if __name__ == "__main__": cmd = "run" if len(sys.argv) > 1: cmd = sys.argv[1] if cmd == "run": flow.run() if cmd == "schedule": flow.schedule = Schedule(clocks=[CronClock("* * * * *")]) flow.run() if cmd == "register": flow.schedule = Schedule(clocks=[CronClock("* * * * *")]) r = flow.register(project_name="demo")
repo.remote("origin").push("develop") else: repo.remote("origin").push("master") @task def get_projects(config): base_path = config["general"]["basepath"] projects = json.loads(config["general"]["repos"]) project_paths = [os.path.join(base_path, project) for project in projects] return project_paths schedule = Schedule(clocks=[CronClock("0 4 * * *")]) with Flow("Git-Push", schedule=schedule) as flow: # Load config config = load_config() # Get projects to sync projects = get_projects(config) # Commit and push every project commit_and_push.map(projects) flow.register(project_name="Git-Sync")
job_config.skip_leading_rows = 1 job_config.autodetect = True job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE with open(filename, "rb") as source_file: job = client.load_table_from_file(source_file, table_ref, job_config=job_config) job.result() # Waits for table load to complete. print("Loaded {} rows into {}:{}.".format(job.output_rows, dataset_id, table_id)) daily_schedule = Schedule(clocks=[CronClock("00 1 * * *")]) @task def create_final_table(): global master_table_path, today_table_path death, confirm, recover = get_data() date_range = confirm.columns[4:] df_loc = confirm[[ 'Province/State', 'Country/Region', 'Lat', 'Long', ]].reset_index().rename(columns={'index': 'loc_index'})
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if ats is not np.nan: tweet = TWEET_ATS.format(title=title, medium_link=medium_link, blog_link=blog_link, ats=ats.strip('"'), hashtags=hashtags) else: tweet = TWEET_NO_ATS.format(title=title, medium_link=medium_link, blog_link=blog_link, hashtags=hashtags) api.update_status(tweet) # Tuesday at 8pm schedule = Schedule(clocks=[CronClock("0 0 * * 3")]) with Flow("Send Tweet", schedule=schedule) as flow: title, medium_link, blog_link, hashtags, ats = get_blob_info() send_tweet(title, medium_link, blog_link, hashtags, ats) flow.register(project_name="Blog-Tweeter")
from prefect import Flow, Parameter from prefect.schedules import Schedule from prefect.schedules.clocks import CronClock from iris.tasks import read_column, concatenate_columns, features_extraction, target_extraction, train sepal_schedule = CronClock( '0 * * * *', parameter_defaults=dict( files=['col1.csv', 'col2.csv', 'target.csv'])) # Sepal DAG petal_schedule = CronClock( '0 * * * *', parameter_defaults=dict( files=['col3.csv', 'col4.csv', 'target.csv'])) # Petal DAG schedule = Schedule(clocks=[sepal_schedule, petal_schedule]) with Flow('Training', schedule=schedule) as flow: files = Parameter('files', default=['col1.csv', 'col2.csv', 'target.csv']) columns = read_column.map(files) dataset = concatenate_columns(columns) features = features_extraction(dataset) target = target_extraction(dataset) train(features, target) if __name__ == '__main__': flow.register(project_name='airflow_prefect_contest')
from prefect import Flow, task from prefect.schedules import Schedule from prefect.schedules.clocks import CronClock import logging logging.basicConfig(datefmt="") os.environ["TZ"] = "US/Eastern" @task def DoSpyStuff(): return "🕵🏽 Spy stuff done 🕵🏽" agent_1_clock = CronClock(cron="* * * * *", labels=["Derek Flint"]) agent_2_clock = CronClock(cron="* * * * *", labels=["Evenlyn Salt"]) agent_3_clock = CronClock(cron="* * * * *", labels=["George Smiley"]) schedule = Schedule(clocks=[ agent_1_clock, agent_2_clock, agent_3_clock, ]) with Flow("Mission: Possible (with labels)") as flow: DoSpyStuff() flow.run()
return {'table_name':'daily_covid', 'table_data':filt_df} @task def cleanup_files(): """Remove the source files to avoid cluttering the run directory""" logger = prefect.context.get("logger") for fn in DATASOURCE_JHU_GITHUB: if path.exists(DATASOURCE_JHU_GITHUB[fn]): os.remove(DATASOURCE_JHU_GITHUB[fn]) logger.info(f'Removed:{DATASOURCE_JHU_GITHUB[fn]}') logger.info('Flow completed') #run 4pm and 5pm daily daily_schedule = Schedule(clocks=[CronClock("30 1,2 * * *")]) #Set up a prefect flow and run it on a schedule with Flow('COVID 19 flow', schedule=daily_schedule) as flow: #extract tasks #daily_covid_us_states_data = extract_us_covid19sheets_data() file_dfs = extract_gh_global_covid_ts_data() #transform daily_covid_data = transform_daily_covid_data(file_dfs) #load - add a load task and include it here cleanup_files() flow.run()
from prefect import Flow, Parameter from prefect.schedules import Schedule from prefect.schedules.clocks import CronClock a = Parameter('a', default=None, required=False) b = Parameter('b', default=None, required=False) schedule = Schedule(clocks=[ CronClock(' 0 18 * * 6', parameter_defaults={ 'a': 'a', 'b': 'b' }), CronClock(' 0 12 * * 0', parameter_defaults={ 'a': 'a', 'b': 'b' }) ]) flow = Flow(name='test flow', schedule=schedule) # flow.add_task(a) # flow.add_task(b) flow.register(project_name="Demo")
if not r.ok: raise ValueError( "Slack notification for {} failed".format(tracked_obj)) return new_state @task def get_start_date(): return pendulum.naive(2018, 1, 17) @task(state_handlers=[test_slack]) def calculate_weeks_since(start_date): current_date = pendulum.now().naive() return current_date.diff(start_date).in_weeks() with Flow( "weeks-since", schedule=Schedule(clocks=[ CronClock("30 11 * * 1", start_date=pendulum.now("America/Toronto")) ]), ) as flow: start_date = get_start_date() calculate_weeks_since(start_date) # flow.run(run_on_schedule=False) flow.register(project_name="Demo")
axis="columns") stocks_html = (stocks.style.applymap(color_gains_loss, subset=["Difference"]).format({ "Bought At": "${:20,.2f}", "Current Price": "${:20,.2f}", "Difference": "${:20,.2f}" }).hide_index()) return message.format(stocks_html.render()) schedule = Schedule(clocks=[CronClock("0 21 * * 5")]) email_task = EmailTask(subject="Weekly Holdings Update") with Flow("Stock-API", schedule=schedule) as flow: # Load config config = load_config() # Load the stocks + the initial value # CSV SCHEMA: stock,initial_value stocks = load_stocks() # Split the stocks into even groups of 5 as the Vantage API # only allows 5 api calls per minute split_stocks = split_stocks(stocks["stock"], 5)