PROJECT_ID = 'put-you-project-id-here' dataset = Dataset( project_id=PROJECT_ID, dataset_name='bigflow_cheatsheet', external_tables={ '311_requests': '{}.external_data.311_requests'.format(PROJECT_ID) }, internal_tables=['request_aggregate']) wait_for_requests = bf.bigquery.sensor_component( '311_requests', where_clause="DATE(TIMESTAMP(created_date)) = DATE(TIMESTAMP_ADD(TIMESTAMP('{dt}'), INTERVAL -24 HOUR))", ds=dataset) started_jobs = [] class ExampleJob: def __init__(self, id): self.id = id def run(self, runtime): started_jobs.append(self.id) workflow_1 = bf.Workflow(workflow_id="ID_1", definition=[wait_for_requests.to_job(), wait_for_requests.to_job()], schedule_interval="@once") workflow_2 = bf.Workflow(workflow_id="ID_2", definition=[wait_for_requests.to_job()]) workflow_3 = bf.Workflow(workflow_id="ID_3", definition=[ExampleJob("J_ID_3"), ExampleJob("J_ID_4")]) workflow_4 = bf.Workflow(workflow_id="ID_4", definition=[ExampleJob("J_ID_5")]) print("AAA")
import bigflow as bf workflow_1 = bf.Workflow(workflow_id="ID_1", definition=[], schedule_interval="@once", log_config={ 'gcp_project_id': 'some-project-id', 'log_level': 'INFO', }) workflow_2 = bf.Workflow(workflow_id="ID_2", definition=[], log_config={ 'gcp_project_id': 'some-project-id', 'log_level': 'INFO', })
import bigflow as bf from .job import ExampleJob workflow2 = bf.Workflow(workflow_id='workflow2', definition=[ExampleJob('job1')])
import datetime import bigflow class DailyJob(bigflow.Job): id = 'daily_job' def execute(self, context): dt1 = context.runtime dt2 = dt1 + datetime.timedelta(days=1, seconds=-1) print(f'I should process data with timestamps from: {dt1} to {dt2}') daily_workflow = bigflow.Workflow( workflow_id='daily_workflow', schedule_interval='@daily', definition=[ DailyJob(), ], ) if __name__ == '__main__': daily_workflow.run(datetime.datetime(2020, 1, 1))
'311_requests': '{}.external_data.311_requests'.format(PROJECT_ID) }, internal_tables=['request_aggregate']) wait_for_requests = bf.bigquery.sensor( '311_requests', where_clause="DATE(TIMESTAMP(created_date)) = DATE(TIMESTAMP_ADD(TIMESTAMP('{dt}'), INTERVAL -24 HOUR))", ds=dataset) started_jobs = [] class ExampleJob: def __init__(self, id): self.id = id def execute(self, context): started_jobs.append(self.id) workflow_1 = bf.Workflow(workflow_id="ID_1", definition=[wait_for_requests.to_job(), wait_for_requests.to_job()], schedule_interval="@once", log_config={ 'gcp_project_id': 'some-project-id', 'log_level': 'INFO', }) workflow_2 = bf.Workflow(workflow_id="ID_2", definition=[wait_for_requests.to_job()], log_config={ 'gcp_project_id': 'another-project-id', 'log_level': 'INFO', }) workflow_3 = bf.Workflow(workflow_id="ID_3", definition=[ExampleJob("J_ID_3"), ExampleJob("J_ID_4")]) workflow_4 = bf.Workflow(workflow_id="ID_4", definition=[ExampleJob("J_ID_5")]) print("AAA")
from pathlib import Path import bigflow from bigflow.resources import get_resource_absolute_path class PrintResourceJob(bigflow.Job): id = 'print_resource_job' def execute(self, context: bigflow.JobContext): with open( get_resource_absolute_path('example_resource.txt', Path(__file__))) as f: print(f.read()) resources_workflow = bigflow.Workflow( workflow_id='resources_workflow', definition=[ PrintResourceJob(), ], )
import bigflow class TheJob(bigflow.Job): def execute(self, context: bigflow.JobContext): pass the_workflow = bigflow.Workflow( workflow_id='workflow_one', definition=[ TheJob('the_job'), ], )
import bigflow class HelloWorldJob(bigflow.Job): id = 'hello_world' def execute(self, context: bigflow.JobContext): print(f'Hello world on {context.runtime}!') class SayGoodbyeJob(bigflow.Job): id = 'say_goodbye' def execute(self, context: bigflow.JobContext): print(f'Goodbye!') hello_world_workflow = bigflow.Workflow(workflow_id='hello_world_workflow', definition=[ HelloWorldJob(), SayGoodbyeJob(), ])
import bigflow as bf from .Unused1 import ExampleJob int_1 = 123 workflow_1 = bf.Workflow(workflow_id="ID_5", definition=[ExampleJob("J_ID_6")]) int_2 = 456 int_3 = 789
import datetime import bigflow from bigflow.workflow import hourly_start_time class HourlyJob(bigflow.Job): id = 'hourly_job' def execute(self, context): print( f'I should process data with timestamps from: {context.runtime} ' f'to {context.runtime + datetime.timedelta(minutes=59, seconds=59)}' ) hourly_workflow = bigflow.Workflow( workflow_id='hourly_workflow', schedule_interval='@hourly', start_time_factory=hourly_start_time, definition=[HourlyJob()], ) if __name__ == '__main__': hourly_workflow.run(datetime.datetime(2020, 1, 1, 10))
config = bigflow.Config( name='dev', properties={ 'message_to_print': 'Message to print on DEV' }, ).add_configuration( name='prod', properties={ 'message_to_print': 'Message to print on PROD' }, ) class HelloConfigJob(bigflow.Job): id = 'hello_config_job' def __init__(self, message_to_print): self.message_to_print = message_to_print def execute(self, context: bigflow.JobContext): print(self.message_to_print) hello_world_workflow = bigflow.Workflow( workflow_id='hello_config_workflow', definition=[ HelloConfigJob(config.resolve_property('message_to_print')), ], )