qphiavg_plot_task = CMSTask(name="qphiavg_plot_task", callback=qphiavg_plot_func) img_dag_dict = { input_task: { to_thumb_task: None, parse_attributes_task: None, circavg_task: 'image', qphiavg_task: None, }, parse_attributes_task: [make_calibration_task, generate_mask_task], #parse_attributes_task: generate_mask_task, # TODO : Adding these seems to affect keys that make_calibration_task gets make_calibration_task: { circavg_task: 'calibration', qphiavg_task: 'calibration' }, generate_mask_task: { circavg_task: 'mask', qphiavg_task: 'mask' }, #circavg_task: circavg_plot_task, circavg_task: [circavg_plot_task, peakfind_task], peakfind_task: peakfind_plot_task, qphiavg_task: qphiavg_plot_task } one_image_dag = Dag("img_dag", autostart=False, queue='cms-oneimage') one_image_dag.define(img_dag_dict)
from lightflow.models import Dag from lightflow.tasks import PythonTask from lightflow_filesystem import GlobTask # the callback function that handles the returned files from the glob task. In this # example it stores them as a list into the data under the key 'files'. def store_files(files, data, store, signal, context): data['files'] = files # the callback for the task that prints the filenames that were returned by the glob task. def print_filenames(data, store, signal, context): print('\n'.join(data['files'])) # create a GlobTask to find all files with the '.file' extension and a PythonTask to # print the result. glob_task = GlobTask(name='glob_task', paths=['/tmp/lightflow_test/'], callback=store_files, pattern='**/*.file', recursive=True) print_task = PythonTask(name='print_task', callback=print_filenames) # create a DAG that runs the glob task first and then the print task. list_dag = Dag('list_dag') list_dag.define({glob_task: print_task})
# the callback function for the task that adds a nested list to the list of filenames and # then extends the list of filenames with two more entries. def add_more_filenames(data, store, signal, context): store.push('filenames', ['nested_a', 'nested_b']) store.extend('filenames', ['file_c.spec', 'file_d.spec']) # create the main DAG d = Dag('main_dag') # create the tasks that call the functions above store_task = PythonTask(name='store_task', callback=store_data) modify_task = PythonTask(name='modify_task', callback=modify_data) add_filename_task = PythonTask(name='add_filename_task', callback=add_filename) add_more_filename_task = PythonTask(name='add_more_filename_task', callback=add_more_filenames) # set up the graph of the DAG, in which the store_task and modify_task are called # in sequence while the add_filename_task and add_more_filename_task are run in parallel. d.define({ store_task: modify_task, modify_task: [add_filename_task, add_more_filename_task] })
# give it some provenance and data new_data = dict(img=img) new_data['md'] = md.copy() new_data = TaskData(data=new_data) new_data = MultiTaskData(dataset=new_data) good_attr = filter_attributes(new_data['md']) if good_attr: print("got a good image") # one image dags should go here dag_name = signal.start_dag(one_image_dag, data=new_data) print("primary node, dag name: {}".format(dag_name)) dag_names.append(dag_name) else: print("Bad attributes!") signal.join_dags(dag_names) # create the main DAG that spawns others #img_dag = Dag('img_dag') primary_task = PythonTask(name="primary_task", callback=primary_func, queue='cms-primary-task') primary_dag_dict = { primary_task: None, } primary_dag = Dag("primary_dag", autostart=True, queue='cms-primary') primary_dag.define(primary_dag_dict)
interp_base=request['processing_info']['interp_base']) elif process_type == 'bin': logger.info("binning (not performed yet)") processor.bin(start_doc, requester=request['requester'], proc_info=request['processing_info'], filepath=request['processing_info']['filepath']) elif process_type == 'request_interpolated_data': logger.info("returning interpolated data (not done yet)") processor.return_interp_data( start_doc, requester=request['requester'], filepath=request['processing_info']['filepath']) t2 = ttime.time() print(f"total processing took {t2-t1} sec") # don't create the request anymore #create_req_task = PythonTask(name="create_req_func", callback=create_req_func, #queue='qas-task') process_run_task = PythonTask(name="process_run_func", callback=process_run_func, queue='qas-task') d = Dag("interpolation", queue="qas-dag") d.define({ process_run_task: None, })
Option('recursive', default=True, help='Run recursively', type=bool), Option('iterations', default=1, help='The number of iterations', type=int), Option('threshold', default=0.4, help='The threshold value', type=float) ]) # the callback function that prints the value of the filepath parameter def print_filepath(data, store, signal, context): print('The filepath is:', store.get('filepath')) # the callback function that prints the value of the iterations parameter def print_iterations(data, store, signal, context): print('Number of iterations:', store.get('iterations')) # create the main DAG d = Dag('main_dag') # task for printing the value of the filepath parameter print_filepath_task = PythonTask(name='print_filepath_task', callback=print_filepath) # task for printing the value of the iterations parameter print_iterations_task = PythonTask(name='print_iterations_task', callback=print_iterations) # set up the graph of the DAG, in which the print_filepath_task has to be executed first, # followed by the print_iterations_task. d.define({print_filepath_task: print_iterations_task})
data['image'] = np.ones((100, 100)) started_dag = signal.start_dag(sub_dag, data=data) dag_names.append(started_dag) signal.join_dags(dag_names) # this callback function prints the dimensions of the received numpy array def sub_dag_print(data, store, signal, context): print('Received an image with dimensions: {}'.format(data['image'].shape)) init_task = PythonTask(name='init_task', callback=print_name) call_dag_task = PythonTask(name='call_dag_task', callback=start_sub_dag) # create the main dag that runs the init task first, followed by the call_dag task. main_dag = Dag('main_dag') main_dag.define({init_task: call_dag_task}) # create the tasks for the sub dag that simply prints the shape of the numpy array # passed down from the main dag. print_task = PythonTask(name='print_task', callback=sub_dag_print) # create the sub dag that is being called by the main dag. In order to stop the # system from automatically starting the dag when the workflow is run, set the autostart # parameter to false. sub_dag = Dag('sub_dag', autostart=False) sub_dag.define({print_task: None})
# acquire some basic statistics for each file as long as it is not a symbolic link def acquire_stats(entry, data, store, signal, context): if not entry.is_symlink(): data['count'] += 1 data['size'] += entry.stat(follow_symlinks=False).st_size # print the acquired statistics def print_stats(data, store, signal, context): print('Statistics for folder: {}'.format(store.get('path'))) print('Number files: {}'.format(data['count'])) print('Total size (bytes): {}'.format(data['size'])) # the task for setting up the data for the workflow setup_task = PythonTask(name='setup_task', callback=setup) # traverse a directory and call the statistics callable for each file walk_task = WalkTask(name='walk_task', path=lambda data, store: store.get('path'), callback=acquire_stats, recursive=True) # print the acquired statistics print_task = PythonTask(name='print_task', callback=print_stats) # create a DAG that runs the setup, walk and print task consecutively. main_dag = Dag('main_dag') main_dag.define({setup_task: walk_task, walk_task: print_task})
lightflow worker start -q special """ from lightflow.models import Dag from lightflow.tasks import PythonTask # the callback function for the tasks that simply prints the context def print_text(data, store, signal, context): print('Task {task_name} being run in DAG {dag_name} ' 'for workflow {workflow_name} ({workflow_id})'.format( **context.to_dict())) # create the main DAG d = Dag('main_dag') # create the two task, where the first task is executed on the 'task' queue and the # second task on the 'special' queue print_task = PythonTask(name='print_task', callback=print_text) print_special = PythonTask(name='print_special', callback=print_text, queue='special') # set up the graph of the DAG, in which the print_task has to be executed first, # followed by the print_special. d.define({print_task: print_special})
array, and as this is not the case aborts the workflow gracefully. The abort is accomplished by using the Abort exception. """ from lightflow.models import Dag, AbortWorkflow from lightflow.tasks import PythonTask # the callback function for the task that stores the array of three image file names def collect_data(data, store, signal, context): data['images'] = ['img_001.tif', 'img_002.tif', 'img_003.tif'] # the callback function for the task that checks the number of stored file names def check_data(data, store, signal, context): if len(data['images']) < 5: raise AbortWorkflow('At least 5 images are required') # create the main DAG d = Dag('main_dag') # create the two tasks for storing and checking data collect_task = PythonTask(name='collect_task', callback=collect_data) check_task = PythonTask(name='check_task', callback=check_data) # set up the graph of the DAG d.define({collect_task: check_task})
print('Task {task_name} being run in DAG {dag_name} ' 'for workflow {workflow_name} ({workflow_id}) ' 'on {worker_hostname}'.format(**context.to_dict())) if 'value' not in data: data['value'] = 0 data['value'] = data['value'] + 1 print('This is task #{}'.format(data['value'])) # create the main DAG d = Dag('main_dag') # create the 3 tasks that increment a number task_1 = PythonTask(name='task_1', callback=inc_number) task_2 = PythonTask(name='task_2', callback=inc_number) task_3 = PythonTask(name='task_3', callback=inc_number) # set up the graph of the DAG as a linear sequence of tasks d.define({ task_1: task_2, task_2: task_3 })
'second')['value'] # create the main DAG based on the diagram above d = Dag('main_dag') put_task = PythonTask(name='put_task', callback=put_data) square_task = PythonTask(name='square_task', callback=square_data) multiply_task = PythonTask(name='multiply_task', callback=multiply_data) subtract_task = PythonTask(name='subtract_task', callback=subtract_data) print_task_1 = PythonTask(name='print_task_1', callback=print_data) print_task_2 = PythonTask(name='print_task_2', callback=print_data) print_task_3 = PythonTask(name='print_task_3', callback=print_data) print_task_4 = PythonTask(name='print_task_4', callback=print_data) d.define({ put_task: { print_task_1: None, square_task: None, multiply_task: None, subtract_task: 'first' }, square_task: [print_task_2, multiply_task], multiply_task: { print_task_3: None, subtract_task: 'second' }, subtract_task: print_task_4 })
worker consuming the 'main' queue, the DAG and the print_task on the second worker, and the print_memory task on the third worker. """ from lightflow.models import Dag from lightflow.tasks import PythonTask # the callback function for the tasks that simply prints the context def print_text(data, store, signal, context): print('Task {task_name} being run in DAG {dag_name} ' 'for workflow {workflow_name} ({workflow_id})'.format( **context.to_dict())) # create the main DAG and have it scheduled on the 'graph' queue d = Dag('main_dag', queue='graph') # create the two task, where the first task is executed on the default 'task' queue # while the second task is processed on the 'high_memory' queue print_task = PythonTask(name='print_task', callback=print_text) print_memory = PythonTask(name='print_memory', callback=print_text, queue='high_memory') # set up the graph of the DAG, in which the print_task has to be executed first, # followed by the print_memory task. d.define({print_task: print_memory})
''' Store the image.''' plot_path = "/home/xf11bm/test.png" img = data['image'] print(img) print(np) plt.figure(0) plt.clf() plt.imshow(np.log(img)) plt.savefig(plot_path) # create the main DAG that spawns others main_dag = Dag('main_dag') main_task = PythonTask(name="main", callback=main) main_dag.define({ main_task: None, }) from SciStreams.XS_Streams import filter_attributes, pick_allowed_detectors # the secondary circular average task # create the two tasks for storing and retrieving data put_task = PythonTask(name='put_task', callback=put_data) grab_image_task = PythonTask(name='grab_image', callback=grab_image) plot_image_task = PythonTask(name='plot_image', callback=plot_image) # set up the graph of the DAG, in which the put_task has to be executed first, # followed by the print_task. circavg_dag_dict = { put_task: grab_image_task, grab_image_task: plot_image_task,
'blah_0001.dat', '|', 'blah_0002.dat', 'blah2_0001.dat'] def print_list(data, store, signal, context): print(context.task_name) print('==================================') print(data['my_list']) print('==================================') print_dag = Dag('print_dag', autostart=False) print_list_task = PythonTask(name='print_list', callback=print_list) print_dag.define({print_list_task: None}) chunk_dag = Dag('chunk_dag') make_list_task = PythonTask(name='make_list', callback=make_list) chunk_task = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False, match_pattern='(?P<match>[0-9A-Za-z]*)_', in_key='my_list') chunk_task2 = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False, match_pattern='[0-9A-Za-z]*_', in_key='my_list') chunk_dag.define({make_list_task: [chunk_task, chunk_task2]})
# the callback function that prints the new PV value after it got changed. def pv_printout(data, store, signal, context): print('PV {} has value: {}'.format(data['pv_name'], data['pv_value'])) # set up the PV monitoring dag. pv_monitor_dag = Dag('pv_monitor_dag') startup_task = PythonTask(name='startup_task', callback=startup) monitor_task = PvTriggerTask(name='monitor_task', pv_name=lambda data, data_store: data_store.get('pvname'), callback=pv_callback, event_trigger_time=0.1, stop_polling_rate=2, skip_initial_callback=True) pv_monitor_dag.define({startup_task: monitor_task}) # set up the PV action dag. pv_action_dag = Dag('pv_action_dag', autostart=False) printout_task = PythonTask(name='printout_task', callback=pv_printout) pv_action_dag.define({printout_task: None})
**context.to_dict())) # create the main DAG d = Dag('main_dag') start_task = PythonTask(name='start_task', callback=start_all) bash_task = BashTask(name='bash_task', command='for i in `seq 1 10`; do echo "$i"; done', callback_stdout=bash_stdout) stop_noskip_task = PythonTask(name='stop_noskip_task', callback=stop_noskip) stop_task = PythonTask(name='stop_task', callback=stop) print_task_1 = PythonTask(name='print_task_1', callback=print_context) print_task_2 = PythonTask(name='print_task_2', callback=print_context) print_task_3 = PythonTask(name='print_task_3', callback=print_context) # set up the graph of the DAG with a start task and three paths with different stop # conditions. d.define({ start_task: [bash_task, stop_noskip_task, stop_task], bash_task: print_task_1, stop_noskip_task: print_task_2, stop_task: print_task_3 })
# create the main DAG d = Dag('main_dag') # task for storing the data put_task = PythonTask(name='put_task', callback=put_data) # task that limits the branching to certain successor tasks branch_task = PythonTask(name='branch_task', callback=branch_with_limit) # first task, first lane, simply prints the value stored in the put_task lane1_print_task = PythonTask(name='lane1_print_task', callback=print_value) # first task, second lane, simply prints the value stored in the put_task lane2_print_task = PythonTask(name='lane2_print_task', callback=print_value) # first task, third lane, simply prints the value stored in the put_task lane3_print_task = PythonTask(name='lane3_print_task', callback=print_value) # joins all three lanes together and waits for the predecessor tasks to finish processing join_task = PythonTask(name='t_join_me', callback=print_value) # set up the graph of the DAG as illustrated above. Please note how a list of tasks # defines tasks that are run in parallel (branched out). d.define({ put_task: branch_task, branch_task: [lane1_print_task, lane2_print_task, lane3_print_task], lane1_print_task: join_task, lane2_print_task: join_task, lane3_print_task: join_task })
# the callback function that is called as soon as new lines were appended to the text # file. It stores the new lines into the data and starts the 'print_dag' dag. def start_print_dag(lines, data, store, signal, context): data['lines'] = lines signal.start_dag('print_dag', data=data) # the callback for printing the new lines from the 'print_dag' dag. def print_lines(data, store, signal, context): print('\n'.join(data['lines'])) # create the task that watches for newly appended lines and the associated dag. new_line_task = NewLineTriggerTask(name='new_line_task', path='/tmp/lightflow_test/watch_lines.txt', callback=start_print_dag, aggregate=None, use_existing=False, flush_existing=False) list_dag = Dag('line_dag') list_dag.define({new_line_task: None}) # create the print dag and set its autostart value to false. print_task = PythonTask(name='print_task', callback=print_lines) print_dag = Dag('print_dag', autostart=False) print_dag.define({print_task: None})
data['number'] = random() if data['number'] < 0.5: return Action(data, limit=[small_number_task]) else: return Action(data, limit=[large_number_task]) # the callback function for the small number route def print_small_number(data, store, signal, context): print('Small number: {}'.format(data['number'])) # the callback function for the large number route def print_large_number(data, store, signal, context): print('Large number: {}'.format(data['number'])) # task definitions decision_task = PythonTask(name='decision_task', callback=decide_on_successor) small_number_task = PythonTask(name='small_number_task', callback=print_small_number) large_number_task = PythonTask(name='large_number_task', callback=print_large_number) # create the main DAG d = Dag('main_dag') d.define({decision_task: [small_number_task, large_number_task]})
def random_sleep(data, store, signal, context): sleep(random() * 4) # the callback function for the task that prints the run times def print_times(data, store, signal, context): dag_log = store.get(key='log.{}'.format(context.dag_name), section=DataStoreDocumentSection.Meta) for task, fields in dag_log.items(): print(task, 'on', fields['worker'], 'took', fields['duration'], 'seconds') # create the main DAG d = Dag('main_dag') # create the sleep tasks sleep_task_1 = PythonTask(name='sleep_task_1', callback=random_sleep) sleep_task_2 = PythonTask(name='sleep_task_2', callback=random_sleep) sleep_task_3 = PythonTask(name='sleep_task_3', callback=random_sleep) # create the print task print_task = PythonTask(name='print_task', callback=print_times) # set up the DAG d.define({ sleep_task_1: sleep_task_2, sleep_task_2: sleep_task_3, sleep_task_3: print_task })
from lightflow.models import Dag from lightflow.tasks import PythonTask # the callback function for the task that stores the value 5 def put_data(data, store, signal, context): print('Task {task_name} being run in DAG {dag_name} ' 'for workflow {workflow_name} ({workflow_id}) ' 'on {worker_hostname}'.format(**context.to_dict())) data['value'] = 5 # the callback function for the task that prints the data def print_value(data, store, signal, context): print('The value is: {}'.format(data['value'])) # create the main DAG d = Dag('main_dag') # create the two tasks for storing and retrieving data put_task = PythonTask(name='put_task', callback=put_data) print_task = PythonTask(name='print_task', callback=print_value) # set up the graph of the DAG, in which the put_task has to be executed first, # followed by the print_task. d.define({put_task: print_task})
]) notify_task = NotifyTriggerTask(name='notify_task', path='/tmp/lightflow_test/input', callback=start_file_dag, recursive=True, aggregate=5, use_existing=False, skip_duplicate=False, on_file_create=False, on_file_close=True, on_file_delete=False, on_file_move=False) notify_dag = Dag('notify_dag') notify_dag.define({mkdir_task: notify_task}) # define the file handling dag. copy_backup_task = CopyTask(name='copy_backup_task', sources=lambda data, store: data['files'], destination='/tmp/lightflow_test/backup') chmod_backup_task = ChmodTask( name='chmod_backup_task', paths=lambda data, store: [ os.path.join('/tmp/lightflow_test/backup', os.path.basename(f)) for f in data['files'] ], permission='400') move_output_task = MoveTask(name='move_output_task',
# the callback function that prints the content of the temporary directory. def list_files(files, data, store, signal, context): for file in files: print('{}\n'.format(file)) mkdir_task = MakeDirTask(name='mkdir_task', paths='/tmp/lightflow_test/delete_me') files_task = BashTask(name='create_files_task', command='touch a; touch b; touch c', cwd='/tmp/lightflow_test/delete_me') list_task = GlobTask(name='list_task', paths='/tmp/lightflow_test/delete_me', callback=list_files) remove_task = RemoveTask(name='remove_task', paths='/tmp/lightflow_test/delete_me') # create a DAG that creates temporary directory and add three files. # Then list the names of the files and remove the whole directory. rm_dag = Dag('remove_dag') rm_dag.define({ mkdir_task: files_task, files_task: list_task, list_task: remove_task })
#signal.join_dags(dag_names) def subsub_func(data, store, signal, context): print("completed") main_task = PythonTask(name="main_task", callback=main_func, queue='cms-main-task') main_dag_dict = { main_task: None, } main_dag = Dag("main_dag", autostart=True, queue='cms-main') main_dag.define(main_dag_dict) sub_task = PythonTask(name="test_task", callback=sub_func, queue='cms-primary-task') sub_dag_dict = { sub_task: None, } sub_dag = Dag("test_dag", autostart=False, queue='cms-primary') sub_dag.define(sub_dag_dict) from functools import partial OneImageTask = partial(PythonTask, queue='cms-oneimage-task')
from lightflow.models.task_data import TaskData, MultiTaskData # TODO : make callback something else callback # from databroker import Broker import matplotlib.pyplot as plt import numpy as np from SciStreams.config import config config['foo'] = 'bar' def test_func(data, store, signal, context): print("printing config\n\n") print(config['foo']) print("done\n\n") config['foo'] = 'far' # create the main DAG that spawns others #img_dag = Dag('img_dag') test_task = PythonTask(name="main", callback=test_func) test_task2 = PythonTask(name="main2", callback=test_func) test_dag_dict = { test_task: test_task2, } test_dag = Dag("test", autostart=True) test_dag.define(test_dag_dict)
# this callback is called after the process completed. Print the line counter and the # full output of stdout and stderr. def proc_end(return_code, stdout_file, stderr_file, data, store, signal, context): print('\n') print('Process return code: {}'.format(return_code)) print('Number lines: {}'.format(data['num_lines'])) print('\n') print('stdout:\n{}\n'.format(stdout_file.read().decode())) print('stderr:\n{}\n'.format(stderr_file.read().decode())) # create the main DAG and the bash task. Please note how the output of stderr is being # handled by the stdout callback. d = Dag('main_dag') proc_task = BashTask( name='proc_task', command='for i in `seq 1 10`; do echo "This is line $i"; done', capture_stdout=True, capture_stderr=True, callback_stdout=proc_stdout, callback_stderr=proc_stdout, callback_process=proc_start, callback_end=proc_end) # this DAG has only a single task d.define({proc_task: None})