def main(cmd): # Create a prefect's flow object with some configuration flow_nwp_00 = create_flow_download(run=00, **settings) settings['max_concurrent_download'] = 5 flow_nwp_12 = create_flow_download(run=12, **settings) if cmd in ("register", "trigger"): # Ensure the flow is well registered in prefect server for flow in flow_nwp_00, flow_nwp_12: r = flow.register(project_name=prefect_project_name) print(r) for flow in flow_nwp_00, flow_nwp_12: # For demo purpose, schedule a download each hour from prefect.schedules import Schedule from prefect.schedules.clocks import CronClock schedule = Schedule(clocks=[CronClock(f"0 * * * *")]) flow.schedule = schedule r = flow.register(project_name=prefect_project_name) print(r) if cmd == "trigger": # Trigger the flow manually for flow in flow_nwp_00, flow_nwp_12: trigger_prefect_flow( flow_name=flow.name, run_name=f"{flow.name}-manually_triggered", ) if cmd == "run": # Run a download from current process flow_nwp_00.schedule = None flow_nwp_00.run()
def main(register, run): if register: schedule = Schedule(clocks=[CronClock("1 19 * * *")]) else: schedule = None result = GCSResult(bucket='uuazed-prefect') with Flow("numerai-reports", schedule, result=result) as flow: filenames = fetch() upload_to_gcs(filenames) flow.storage = Docker( registry_url="gcr.io/numerai-171710", python_dependencies=['pandas', 'numerapi', 'pyarrow'], files={ os.path.abspath("data.py"): "numerai_reports/data.py", os.path.abspath("settings.py"): "numerai_reports/settings.py", os.path.abspath("utils.py"): "numerai_reports/utils.py", }, env_vars={"PYTHONPATH": "$PYTHONPATH:/"}, secrets=["GCP_CREDENTIALS"]) if register: flow.register(project_name="numerai", labels=["docker"]) if run: flow.run()
def init_schedule(start, stop, dates: Tuple, **interval: int) -> Schedule: date_now, _, _, date_stop_serializable, date_start_serializable = dates clock = IntervalClock( start_date=date_now, interval=timedelta(**interval), parameter_defaults={ start.name: date_start_serializable, stop.name: date_stop_serializable } ) return Schedule(clocks=[clock])
def on_schedule(path_to_schedule): schedule_df = pd.read_csv(path_to_schedule) clocks = [ CronClock( cron_str, parameter_defaults={"row_index": i}, start_date=pendulum.now(f"America/{city}"), ) for i, (cron_str, city) in enumerate( zip( map(lambda x: chronstr_from_row(x[1]), schedule_df.iterrows()), schedule_df.time_zone_city, )) ] schedule = Schedule(clocks=clocks) with Flow("Send Habit Reminders", schedule) as flow: prefect_send_email(schedule_df, row_index) flow.run(parameters={'row_index': None})
from datetime import timedelta from prefect import Flow from prefect.schedules import Schedule from prefect.schedules.clocks import IntervalClock schedule = Schedule(clocks=[IntervalClock(timedelta(seconds=65))]) my_flow = Flow("Interval Test", schedule=schedule) my_flow.register(project_name="experimental")
job_config.skip_leading_rows = 1 job_config.autodetect = True job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE with open(filename, "rb") as source_file: job = client.load_table_from_file(source_file, table_ref, job_config=job_config) job.result() # Waits for table load to complete. print("Loaded {} rows into {}:{}.".format(job.output_rows, dataset_id, table_id)) daily_schedule = Schedule(clocks=[CronClock("00 1 * * *")]) @task def create_final_table(): global master_table_path, today_table_path death, confirm, recover = get_data() date_range = confirm.columns[4:] df_loc = confirm[[ 'Province/State', 'Country/Region', 'Lat', 'Long', ]].reset_index().rename(columns={'index': 'loc_index'})
}, }, { "type": "section", "text": { "type": "mrkdwn", "text": f"See all the new stargazers <https://github.com/{owner}/{repository}/stargazers|here>", }, }, ] } schedule = Schedule(clocks=[IntervalClock(timedelta(minutes=5))], filters=[is_weekday]) with Flow("Stargazers", schedule=schedule) as flow: """ Tasks: Repository [Parameter] Owner [Parameter] GetStars ShouldNotify NotificationMessage Notify [SlackTask] """ repository = Parameter("repository", default="prefect") owner = Parameter("owner", default="PrefectHQ") stars = GetStars(name="Get Stars", max_retries=2,
convos = episode.find_all('b') or episode.find_all('span', {'class': 'char'}) dialogue = [] for item in convos: who = item.text.rstrip(': ').rstrip(' *').replace("'", "''") what = str(item.next_sibling).rstrip(' *').replace("'", "''") dialogue.append((who, what)) return (title, dialogue) with Flow( name="xfiles", schedule=Schedule(clocks=[ # TODO: specify the schedule you want this to run, and with what parameters # https://docs.prefect.io/core/concepts/schedules.html CronClock(cron='0 0 * * *', parameter_defaults=dict( url='http://www.insidethex.co.uk/')), ]), storage=Docker( # TODO: change to your docker registry: # https://docs.prefect.io/cloud/recipes/configuring_storage.html registry_url='szelenka', # TODO: 'pin' the exact versions you used on your development machine python_dependencies=[ 'requests==2.23.0', 'beautifulsoup4==4.8.2', 'sqlalchemy==1.3.15' ], ), # TODO: specify how you want to handle results # https://docs.prefect.io/core/concepts/results.html#results-and-result-handlers
import prefect from prefect import task, Flow, Parameter from prefect.schedules import clocks, Schedule from prefect.environments.storage import GitHub @task def hello_world(): print("Hello, World!") clock = clocks.CronClock("0 0 * * *") schedule = Schedule(clocks=[clock]) with Flow("Star GitHub Repositories", schedule=schedule) as flow: hello_world() flow.storage = GitHub( repo="znicholasbrown/star-repos", path="app.py", secrets=["GITHUB_AUTH_TOKEN" ], # Change this to your own GitHub auth token secret ) flow.register(project_name="SOME PROJECT") # flow.run()
rating=rating, release_date=release_date, source_url=url ) return data with Flow( name="example-selenium", schedule=Schedule( clocks=[ # TODO: specify the schedule you want this to run, and with what parameters # https://docs.prefect.io/core/concepts/schedules.html CronClock( cron='0 0 * * *', parameter_defaults=dict( home_page='https://www.metacritic.com/', gaming_platform='Switch' ) ), ] ), storage=Docker( # TODO: change to your docker registry: # https://docs.prefect.io/cloud/recipes/configuring_storage.html registry_url='szelenka', # TODO: need to specify a base Docker image which has the chromedriver dependencies already installed base_image='szelenka/python-selenium-chromium:3.7.4', # TODO: 'pin' the exact versions you used on your development machine python_dependencies=[ 'selenium==3.141.0',
return {'table_name':'daily_covid', 'table_data':filt_df} @task def cleanup_files(): """Remove the source files to avoid cluttering the run directory""" logger = prefect.context.get("logger") for fn in DATASOURCE_JHU_GITHUB: if path.exists(DATASOURCE_JHU_GITHUB[fn]): os.remove(DATASOURCE_JHU_GITHUB[fn]) logger.info(f'Removed:{DATASOURCE_JHU_GITHUB[fn]}') logger.info('Flow completed') #run 4pm and 5pm daily daily_schedule = Schedule(clocks=[CronClock("30 1,2 * * *")]) #Set up a prefect flow and run it on a schedule with Flow('COVID 19 flow', schedule=daily_schedule) as flow: #extract tasks #daily_covid_us_states_data = extract_us_covid19sheets_data() file_dfs = extract_gh_global_covid_ts_data() #transform daily_covid_data = transform_daily_covid_data(file_dfs) #load - add a load task and include it here cleanup_files() flow.run()
from prefect import Flow, task from prefect.schedules import Schedule from prefect.schedules.clocks import CronClock import logging logging.basicConfig(datefmt="") os.environ["TZ"] = "US/Eastern" @task def DoSpyStuff(): return "🕵🏽 Spy stuff done 🕵🏽" agent_1_clock = CronClock(cron="* * * * *", labels=["Derek Flint"]) agent_2_clock = CronClock(cron="* * * * *", labels=["Evenlyn Salt"]) agent_3_clock = CronClock(cron="* * * * *", labels=["George Smiley"]) schedule = Schedule(clocks=[ agent_1_clock, agent_2_clock, agent_3_clock, ]) with Flow("Mission: Possible (with labels)") as flow: DoSpyStuff() flow.run()
from prefect import Flow, Parameter from prefect.schedules import Schedule from prefect.schedules.clocks import CronClock from iris.tasks import read_column, concatenate_columns, features_extraction, target_extraction, train sepal_schedule = CronClock( '0 * * * *', parameter_defaults=dict( files=['col1.csv', 'col2.csv', 'target.csv'])) # Sepal DAG petal_schedule = CronClock( '0 * * * *', parameter_defaults=dict( files=['col3.csv', 'col4.csv', 'target.csv'])) # Petal DAG schedule = Schedule(clocks=[sepal_schedule, petal_schedule]) with Flow('Training', schedule=schedule) as flow: files = Parameter('files', default=['col1.csv', 'col2.csv', 'target.csv']) columns = read_column.map(files) dataset = concatenate_columns(columns) features = features_extraction(dataset) target = target_extraction(dataset) train(features, target) if __name__ == '__main__': flow.register(project_name='airflow_prefect_contest')
# print(f"Working Dir: {working_dir_path}") # docker_flpth = os.path.join(working_dir_path, "Dockerfile") # print(f"Docker flpth: {docker_flpth}") # aws aws_ecr_repo_name = dckr_image_name aws_region = "us-east-2" ############## Schedule when to run the script ############## schedule = Schedule( # fire every day clocks=[clocks.IntervalClock( start_date=pendulum.datetime(2020, 4, 22, 17, 30, tz="America/Toronto"), interval=timedelta(days=1) )], # but only on weekdays filters=[filters.is_weekday], # and not in January TODO: Add TSX Holidays not_filters=[filters.between_dates(1, 1, 1, 31)] ) #tsx_imb_fl.schedule = schedule ############## Storage ecr docker flow ############## dkr_ecr_scrt = PrefectSecret("docker_ecr_login").run() get_ecr_auth_token = ShellTask(helper_script="cd ~") ecr_auth_token = get_ecr_auth_token.run(command=dkr_ecr_scrt)
from prefect import Flow, Parameter from prefect.schedules import Schedule from prefect.schedules.clocks import CronClock a = Parameter('a', default=None, required=False) b = Parameter('b', default=None, required=False) schedule = Schedule(clocks=[ CronClock(' 0 18 * * 6', parameter_defaults={ 'a': 'a', 'b': 'b' }), CronClock(' 0 12 * * 0', parameter_defaults={ 'a': 'a', 'b': 'b' }) ]) flow = Flow(name='test flow', schedule=schedule) # flow.add_task(a) # flow.add_task(b) flow.register(project_name="Demo")
@task def is_snowing_this_week(data): """ Given a list of hourly forecasts, returns a boolean specifying whether there is snow in this week's forecast. """ snow = [ forecast["snow"].get("3h", 0) for forecast in data["list"] if "snow" in forecast ] if not sum([s >= 1 for s in snow]) >= 8: raise SKIP("There is not much snow in the forecast.") notification = SlackTask( message="There is snow in the forecast for this week - it might be time to hit the slopes!", webhook_secret="DAVID_SLACK_URL", ) storage = Docker(registry_url="joshmeek18", image_name="flows") with Flow("Snow Flow", storage=storage, schedule=Schedule( clocks=[CronClock("0 18 * * 1-5", start_date=pendulum.now(tz="US/Pacific"))], )) as flow: forecast = pull_forecast(city=city, api_key=api_key) snow = is_snowing_this_week(forecast) notification.set_upstream(snow) flow.register(project_name="Snow Flow")
repo.remote("origin").push("develop") else: repo.remote("origin").push("master") @task def get_projects(config): base_path = config["general"]["basepath"] projects = json.loads(config["general"]["repos"]) project_paths = [os.path.join(base_path, project) for project in projects] return project_paths schedule = Schedule(clocks=[CronClock("0 4 * * *")]) with Flow("Git-Push", schedule=schedule) as flow: # Load config config = load_config() # Get projects to sync projects = get_projects(config) # Commit and push every project commit_and_push.map(projects) flow.register(project_name="Git-Sync")
# docker_run_config = DockerRun( # env={"sample_key": "sample_value"}, # labels=None, # ) ecs_run_config = ECSRun( env={"sample_key": "sample_value"}, labels=["fargate"], task_role_arn="arn:aws:iam::853106079885:role/ECSTaskS3ECRRole", ) schedule = Schedule( clocks=[ CronClock( "0 12 * * 1-5", start_date=pendulum.now(tz="US/Pacific"), parameter_defaults={"Redshift Table Name": "users"} ), CronClock( "0 12 * * 1-5", start_date=pendulum.now(tz="US/Pacific"), parameter_defaults={"Redshift Table Name": "events"} ), ] ) with Flow( "S3 to Redshift - ECS", storage=storage, schedule=schedule, run_config=ecs_run_config, ) as flow: # ----STAGE 1----
@task(log_stdout=True) def show_file(): with Path("/tmp/flow.me") as fd: print(fd.read_text()) with Flow("three_little_tasks_flow") as flow: t1 = shelltask(command="echo '====== start' >> /tmp/flow.me") t2 = shelltask(command="date >> /tmp/flow.me; sleep 3") t3 = shelltask(command="echo '====== stop' >> /tmp/flow.me") t1.set_downstream(t2) t2.set_downstream(t3) t3.set_downstream(show_file) if __name__ == "__main__": cmd = "run" if len(sys.argv) > 1: cmd = sys.argv[1] if cmd == "run": flow.run() if cmd == "schedule": flow.schedule = Schedule(clocks=[CronClock("* * * * *")]) flow.run() if cmd == "register": flow.schedule = Schedule(clocks=[CronClock("* * * * *")]) r = flow.register(project_name="demo")
@task def say_hello(): print("hello world") curr_schedule = Schedule( # Fire every min clocks=[ schedules.clocks.IntervalClock(interval=timedelta(minutes=1), start_date=pendulum.datetime( 2019, 1, 1, tz='America/New_York')) ], # Only on weekdays filters=[filters.is_weekday], # and only at 8.15am, 9.30am, 3.50pm, 4pm or_filters=[ filters.between_times(pendulum.time(hour=8, minute=15), pendulum.time(hour=8, minute=15)), filters.between_times(pendulum.time(hour=9, minute=30), pendulum.time(hour=9, minute=30)), filters.between_times(pendulum.time(hour=10, minute=37), pendulum.time(hour=10, minute=37)), filters.between_times(pendulum.time(hour=16), pendulum.time(hour=16)), ], # do not run on Christmas not_filters=[filters.between_dates(12, 25, 12, 25)]) with Flow('Sounds alerts', curr_schedule) as flow: say_hello() flow.run()
random_index = random.randint(0, 499) return quote_list[random_index] @task def post_to_slack(quote): print(quote) # WEBHOOK = Secret("SLACK_WEBHOOK_URL").get() # r = requests.post(WEBHOOK, json={"text": quote}) # r.raise_for_status() with Flow( "motivational-flow", schedule=Schedule(clocks=[ CronClock("0 8 * * 1-5", start_date=pendulum.now(tz="US/Pacific")) ], ), storage=Docker( registry_url="joshmeek18", image_name="flows", python_dependencies=["bs4", "lxml", "requests"], ), ) as flow: content = get_content() quote_list = get_quote_list(content) random_quote = get_random_quote(quote_list) post_to_slack(random_quote) flow.run(run_on_schedule=False) # flow.register(project_name="Motivation") # print(datetime.fromtimestamp(pendulum.now(tz="US/Pacific").timestamp()))
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) if ats is not np.nan: tweet = TWEET_ATS.format(title=title, medium_link=medium_link, blog_link=blog_link, ats=ats.strip('"'), hashtags=hashtags) else: tweet = TWEET_NO_ATS.format(title=title, medium_link=medium_link, blog_link=blog_link, hashtags=hashtags) api.update_status(tweet) # Tuesday at 8pm schedule = Schedule(clocks=[CronClock("0 0 * * 3")]) with Flow("Send Tweet", schedule=schedule) as flow: title, medium_link, blog_link, hashtags, ats = get_blob_info() send_tweet(title, medium_link, blog_link, hashtags, ats) flow.register(project_name="Blog-Tweeter")
payment_temp = payments[payment] payment_temp["loan_id"] = payment payment_list.append(payment_temp) return sorted(payment_list, key=itemgetter("payment"), reverse=True) class LogResult(Task): def run(self, res): return self.logger.info(pprint.pprint(res)) schedule = Schedule( clocks=[ IntervalClock(interval=timedelta(weeks=1), start_date=datetime.utcnow()) ], filters=[between_times(time(hour=10), time(hour=23))], ) with Flow(name="Loan Payoff Reminder", schedule=schedule) as flow: budget = Parameter("budget", default=3000) phone_number = Parameter("phone_number", default="+15707306535") # Google Parameters scopes = Parameter( "SCOPES", default=["https://www.googleapis.com/auth/spreadsheets"]) spreadsheet_id = Parameter( "SPREADHSEET_ID", default="1Wb5Anty3nvaa0jHY7NEYz7i6jyIOQ5Q9z2GLqmLjmPA") project_id = Parameter("project_id", default="flows-270323") auth_provider_x509_cert_url = Parameter(
print(f"\nHere's your data: {data}") from prefect.schedules import Schedule from prefect.schedules.clocks import IntervalClock clock1 = IntervalClock( start_date=datetime.now() + timedelta(seconds=5), interval=timedelta(hours=1), parameter_defaults={"length": 6} ) clock2 = IntervalClock( start_date=datetime.now() + timedelta(seconds=15), interval=timedelta(hours=1), parameter_defaults={"length": 50} ) schedule = Schedule(clocks=[clock1, clock2]) with Flow("Evolving ETL", schedule=schedule) as flow: with case(length, 6): e = extract(length) t = transform.map(e) l = load(t) with case(length, 50): e = extract(length) t = transform.map(e) t2 = transform.map(t) l = load(t2) flow.run()
from prefect import task, Flow, Task, Parameter from prefect.schedules import clocks, Schedule diurnal = ['rooster', 'dog'] nocturnal = ['owl', 'hampster'] # Clocks diurnal_clock = clocks.CronClock("51 * * * *", parameter_defaults={"animals": diurnal}) nocturnal_clock = clocks.CronClock("53 * * * *", parameter_defaults={"animals": nocturnal}) # the full schedule schedule = Schedule(clocks=[diurnal_clock, nocturnal_clock]) @task def wakeup(animals): for item in animals: print("Waking up animal %s" % item) # Flow is common to both types, though with different schedules. with Flow(name="wakuptime", schedule=schedule) as this_flow: animals = Parameter("animals", default=[]) wakeup(animals) # will run on the schedule with varying parameter values this_flow.register("Teste")
@task(name="Relógio", log_stdout=True) def imprimir_agendamento(alterado: bool = False): print(f"Essa execução ocorre a cada {4 if alterado else 13} minutos") relogio_padrao = IntervalClock( start_date=pendulum.datetime(2021, 5, 27, 19, 30, tz="America/Sao_Paulo"), interval=timedelta(minutes=11), ) relogio_alterado = IntervalClock( start_date=pendulum.datetime(2021, 5, 27, 19, 30, tz="America/Sao_Paulo"), interval=timedelta(minutes=3), parameter_defaults={"Alterado": True}, ) agendamento = Schedule( clocks=[relogio_padrao, relogio_alterado], filters=[is_weekday, between_times(pendulum.time(19), pendulum.time(23))], ) with Flow("agendamento-avançado", schedule=agendamento) as flow: alterado = Parameter("Alterado", default=False, required=False) imprimir_agendamento(alterado) if __name__ == "__main__": flow.register(project_name="extras")
if not r.ok: raise ValueError( "Slack notification for {} failed".format(tracked_obj)) return new_state @task def get_start_date(): return pendulum.naive(2018, 1, 17) @task(state_handlers=[test_slack]) def calculate_weeks_since(start_date): current_date = pendulum.now().naive() return current_date.diff(start_date).in_weeks() with Flow( "weeks-since", schedule=Schedule(clocks=[ CronClock("30 11 * * 1", start_date=pendulum.now("America/Toronto")) ]), ) as flow: start_date = get_start_date() calculate_weeks_since(start_date) # flow.run(run_on_schedule=False) flow.register(project_name="Demo")
axis="columns") stocks_html = (stocks.style.applymap(color_gains_loss, subset=["Difference"]).format({ "Bought At": "${:20,.2f}", "Current Price": "${:20,.2f}", "Difference": "${:20,.2f}" }).hide_index()) return message.format(stocks_html.render()) schedule = Schedule(clocks=[CronClock("0 21 * * 5")]) email_task = EmailTask(subject="Weekly Holdings Update") with Flow("Stock-API", schedule=schedule) as flow: # Load config config = load_config() # Load the stocks + the initial value # CSV SCHEMA: stock,initial_value stocks = load_stocks() # Split the stocks into even groups of 5 as the Vantage API # only allows 5 api calls per minute split_stocks = split_stocks(stocks["stock"], 5)