Exemple #1
0
qphiavg_plot_task = CMSTask(name="qphiavg_plot_task",
                            callback=qphiavg_plot_func)

img_dag_dict = {
    input_task: {
        to_thumb_task: None,
        parse_attributes_task: None,
        circavg_task: 'image',
        qphiavg_task: None,
    },
    parse_attributes_task: [make_calibration_task, generate_mask_task],
    #parse_attributes_task: generate_mask_task,
    # TODO : Adding these seems to affect keys that make_calibration_task gets
    make_calibration_task: {
        circavg_task: 'calibration',
        qphiavg_task: 'calibration'
    },
    generate_mask_task: {
        circavg_task: 'mask',
        qphiavg_task: 'mask'
    },
    #circavg_task: circavg_plot_task,
    circavg_task: [circavg_plot_task, peakfind_task],
    peakfind_task: peakfind_plot_task,
    qphiavg_task: qphiavg_plot_task
}

one_image_dag = Dag("img_dag", autostart=False, queue='cms-oneimage')
one_image_dag.define(img_dag_dict)
from lightflow.models import Dag
from lightflow.tasks import PythonTask
from lightflow_filesystem import GlobTask


# the callback function that handles the returned files from the glob task. In this
# example it stores them as a list into the data under the key 'files'.
def store_files(files, data, store, signal, context):
    data['files'] = files


# the callback for the task that prints the filenames that were returned by the glob task.
def print_filenames(data, store, signal, context):
    print('\n'.join(data['files']))


# create a GlobTask to find all files with the '.file' extension and a PythonTask to
# print the result.
glob_task = GlobTask(name='glob_task',
                     paths=['/tmp/lightflow_test/'],
                     callback=store_files,
                     pattern='**/*.file',
                     recursive=True)

print_task = PythonTask(name='print_task', callback=print_filenames)

# create a DAG that runs the glob task first and then the print task.
list_dag = Dag('list_dag')
list_dag.define({glob_task: print_task})
Exemple #3
0
# the callback function for the task that adds a nested list to the list of filenames and
# then extends the list of filenames with two more entries.
def add_more_filenames(data, store, signal, context):
    store.push('filenames', ['nested_a', 'nested_b'])
    store.extend('filenames', ['file_c.spec', 'file_d.spec'])


# create the main DAG
d = Dag('main_dag')

# create the tasks that call the functions above
store_task = PythonTask(name='store_task',
                        callback=store_data)

modify_task = PythonTask(name='modify_task',
                         callback=modify_data)

add_filename_task = PythonTask(name='add_filename_task',
                               callback=add_filename)

add_more_filename_task = PythonTask(name='add_more_filename_task',
                                    callback=add_more_filenames)

# set up the graph of the DAG, in which the store_task and modify_task are called
# in sequence while the add_filename_task and add_more_filename_task are run in parallel.
d.define({
    store_task: modify_task,
    modify_task: [add_filename_task, add_more_filename_task]
})
Exemple #4
0
        # give it some provenance and data
        new_data = dict(img=img)
        new_data['md'] = md.copy()
        new_data = TaskData(data=new_data)
        new_data = MultiTaskData(dataset=new_data)
        good_attr = filter_attributes(new_data['md'])
        if good_attr:
            print("got a good image")
            # one image dags should go here
            dag_name = signal.start_dag(one_image_dag, data=new_data)
            print("primary node, dag name: {}".format(dag_name))
            dag_names.append(dag_name)
        else:
            print("Bad attributes!")

    signal.join_dags(dag_names)


# create the main DAG that spawns others
#img_dag = Dag('img_dag')
primary_task = PythonTask(name="primary_task",
                          callback=primary_func,
                          queue='cms-primary-task')
primary_dag_dict = {
    primary_task: None,
}

primary_dag = Dag("primary_dag", autostart=True, queue='cms-primary')
primary_dag.define(primary_dag_dict)
Exemple #5
0
                interp_base=request['processing_info']['interp_base'])

        elif process_type == 'bin':
            logger.info("binning (not performed yet)")
            processor.bin(start_doc,
                          requester=request['requester'],
                          proc_info=request['processing_info'],
                          filepath=request['processing_info']['filepath'])

        elif process_type == 'request_interpolated_data':
            logger.info("returning interpolated data (not done yet)")
            processor.return_interp_data(
                start_doc,
                requester=request['requester'],
                filepath=request['processing_info']['filepath'])
    t2 = ttime.time()
    print(f"total processing took {t2-t1} sec")


# don't create the request anymore
#create_req_task = PythonTask(name="create_req_func", callback=create_req_func,
#queue='qas-task')
process_run_task = PythonTask(name="process_run_func",
                              callback=process_run_func,
                              queue='qas-task')

d = Dag("interpolation", queue="qas-dag")
d.define({
    process_run_task: None,
})
Exemple #6
0
    Option('recursive', default=True, help='Run recursively', type=bool),
    Option('iterations', default=1, help='The number of iterations', type=int),
    Option('threshold', default=0.4, help='The threshold value', type=float)
])


# the callback function that prints the value of the filepath parameter
def print_filepath(data, store, signal, context):
    print('The filepath is:', store.get('filepath'))


# the callback function that prints the value of the iterations parameter
def print_iterations(data, store, signal, context):
    print('Number of iterations:', store.get('iterations'))


# create the main DAG
d = Dag('main_dag')

# task for printing the value of the filepath parameter
print_filepath_task = PythonTask(name='print_filepath_task',
                                 callback=print_filepath)

# task for printing the value of the iterations parameter
print_iterations_task = PythonTask(name='print_iterations_task',
                                   callback=print_iterations)

# set up the graph of the DAG, in which the print_filepath_task has to be executed first,
# followed by the print_iterations_task.
d.define({print_filepath_task: print_iterations_task})
Exemple #7
0
        data['image'] = np.ones((100, 100))
        started_dag = signal.start_dag(sub_dag, data=data)
        dag_names.append(started_dag)

    signal.join_dags(dag_names)


# this callback function prints the dimensions of the received numpy array
def sub_dag_print(data, store, signal, context):
    print('Received an image with dimensions: {}'.format(data['image'].shape))


init_task = PythonTask(name='init_task', callback=print_name)

call_dag_task = PythonTask(name='call_dag_task', callback=start_sub_dag)

# create the main dag that runs the init task first, followed by the call_dag task.
main_dag = Dag('main_dag')
main_dag.define({init_task: call_dag_task})

# create the tasks for the sub dag that simply prints the shape of the numpy array
# passed down from the main dag.
print_task = PythonTask(name='print_task', callback=sub_dag_print)

# create the sub dag that is being called by the main dag. In order to stop the
# system from automatically starting the dag when the workflow is run, set the autostart
# parameter to false.
sub_dag = Dag('sub_dag', autostart=False)

sub_dag.define({print_task: None})
Exemple #8
0
# acquire some basic statistics for each file as long as it is not a symbolic link
def acquire_stats(entry, data, store, signal, context):
    if not entry.is_symlink():
        data['count'] += 1
        data['size'] += entry.stat(follow_symlinks=False).st_size


# print the acquired statistics
def print_stats(data, store, signal, context):
    print('Statistics for folder: {}'.format(store.get('path')))
    print('Number files: {}'.format(data['count']))
    print('Total size (bytes): {}'.format(data['size']))


# the task for setting up the data for the workflow
setup_task = PythonTask(name='setup_task', callback=setup)

# traverse a directory and call the statistics callable for each file
walk_task = WalkTask(name='walk_task',
                     path=lambda data, store: store.get('path'),
                     callback=acquire_stats,
                     recursive=True)

# print the acquired statistics
print_task = PythonTask(name='print_task', callback=print_stats)

# create a DAG that runs the setup, walk and print task consecutively.
main_dag = Dag('main_dag')
main_dag.define({setup_task: walk_task, walk_task: print_task})
Exemple #9
0
    lightflow worker start -q special

"""

from lightflow.models import Dag
from lightflow.tasks import PythonTask


# the callback function for the tasks that simply prints the context
def print_text(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id})'.format(
              **context.to_dict()))


# create the main DAG
d = Dag('main_dag')

# create the two task, where the first task is executed on the 'task' queue and the
# second task on the 'special' queue
print_task = PythonTask(name='print_task', callback=print_text)

print_special = PythonTask(name='print_special',
                           callback=print_text,
                           queue='special')

# set up the graph of the DAG, in which the print_task has to be executed first,
# followed by the print_special.
d.define({print_task: print_special})
Exemple #10
0
array, and as this is not the case aborts the workflow gracefully. The abort is
accomplished by using the Abort exception.

"""

from lightflow.models import Dag, AbortWorkflow
from lightflow.tasks import PythonTask


# the callback function for the task that stores the array of three image file names
def collect_data(data, store, signal, context):
    data['images'] = ['img_001.tif', 'img_002.tif', 'img_003.tif']


# the callback function for the task that checks the number of stored file names
def check_data(data, store, signal, context):
    if len(data['images']) < 5:
        raise AbortWorkflow('At least 5 images are required')


# create the main DAG
d = Dag('main_dag')

# create the two tasks for storing and checking data
collect_task = PythonTask(name='collect_task', callback=collect_data)

check_task = PythonTask(name='check_task', callback=check_data)

# set up the graph of the DAG
d.define({collect_task: check_task})
Exemple #11
0
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id}) '
          'on {worker_hostname}'.format(**context.to_dict()))

    if 'value' not in data:
        data['value'] = 0

    data['value'] = data['value'] + 1
    print('This is task #{}'.format(data['value']))


# create the main DAG
d = Dag('main_dag')

# create the 3 tasks that increment a number
task_1 = PythonTask(name='task_1',
                    callback=inc_number)

task_2 = PythonTask(name='task_2',
                    callback=inc_number)

task_3 = PythonTask(name='task_3',
                    callback=inc_number)


# set up the graph of the DAG as a linear sequence of tasks
d.define({
    task_1: task_2,
    task_2: task_3
})
Exemple #12
0
        'second')['value']


# create the main DAG based on the diagram above
d = Dag('main_dag')

put_task = PythonTask(name='put_task', callback=put_data)
square_task = PythonTask(name='square_task', callback=square_data)
multiply_task = PythonTask(name='multiply_task', callback=multiply_data)
subtract_task = PythonTask(name='subtract_task', callback=subtract_data)

print_task_1 = PythonTask(name='print_task_1', callback=print_data)
print_task_2 = PythonTask(name='print_task_2', callback=print_data)
print_task_3 = PythonTask(name='print_task_3', callback=print_data)
print_task_4 = PythonTask(name='print_task_4', callback=print_data)

d.define({
    put_task: {
        print_task_1: None,
        square_task: None,
        multiply_task: None,
        subtract_task: 'first'
    },
    square_task: [print_task_2, multiply_task],
    multiply_task: {
        print_task_3: None,
        subtract_task: 'second'
    },
    subtract_task: print_task_4
})
Exemple #13
0
worker consuming the 'main' queue, the DAG and the print_task on the second worker, and
the print_memory task on the third worker.

"""

from lightflow.models import Dag
from lightflow.tasks import PythonTask


# the callback function for the tasks that simply prints the context
def print_text(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id})'.format(
              **context.to_dict()))


# create the main DAG and have it scheduled on the 'graph' queue
d = Dag('main_dag', queue='graph')

# create the two task, where the first task is executed on the default 'task' queue
# while the second task is processed on the 'high_memory' queue
print_task = PythonTask(name='print_task', callback=print_text)

print_memory = PythonTask(name='print_memory',
                          callback=print_text,
                          queue='high_memory')

# set up the graph of the DAG, in which the print_task has to be executed first,
# followed by the print_memory task.
d.define({print_task: print_memory})
Exemple #14
0
    ''' Store the image.'''
    plot_path = "/home/xf11bm/test.png"
    img = data['image']
    print(img)
    print(np)
    plt.figure(0)
    plt.clf()
    plt.imshow(np.log(img))
    plt.savefig(plot_path)


# create the main DAG that spawns others
main_dag = Dag('main_dag')
main_task = PythonTask(name="main", callback=main)
main_dag.define({
    main_task: None,
})

from SciStreams.XS_Streams import filter_attributes, pick_allowed_detectors

# the secondary circular average task
# create the two tasks for storing and retrieving data
put_task = PythonTask(name='put_task', callback=put_data)
grab_image_task = PythonTask(name='grab_image', callback=grab_image)
plot_image_task = PythonTask(name='plot_image', callback=plot_image)

# set up the graph of the DAG, in which the put_task has to be executed first,
# followed by the print_task.
circavg_dag_dict = {
    put_task: grab_image_task,
    grab_image_task: plot_image_task,
Exemple #15
0
                       'blah_0001.dat', '|', 'blah_0002.dat', 'blah2_0001.dat']


def print_list(data, store, signal, context):
    print(context.task_name)
    print('==================================')
    print(data['my_list'])
    print('==================================')


print_dag = Dag('print_dag', autostart=False)

print_list_task = PythonTask(name='print_list',
                             callback=print_list)

print_dag.define({print_list_task: None})


chunk_dag = Dag('chunk_dag')

make_list_task = PythonTask(name='make_list',
                            callback=make_list)

chunk_task = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False,
                          match_pattern='(?P<match>[0-9A-Za-z]*)_', in_key='my_list')

chunk_task2 = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False,
                           match_pattern='[0-9A-Za-z]*_', in_key='my_list')

chunk_dag.define({make_list_task: [chunk_task, chunk_task2]})

# the callback function that prints the new PV value after it got changed.
def pv_printout(data, store, signal, context):
    print('PV {} has value: {}'.format(data['pv_name'], data['pv_value']))


# set up the PV monitoring dag.
pv_monitor_dag = Dag('pv_monitor_dag')

startup_task = PythonTask(name='startup_task',
                          callback=startup)

monitor_task = PvTriggerTask(name='monitor_task',
                             pv_name=lambda data, data_store: data_store.get('pvname'),
                             callback=pv_callback,
                             event_trigger_time=0.1,
                             stop_polling_rate=2,
                             skip_initial_callback=True)

pv_monitor_dag.define({startup_task: monitor_task})


# set up the PV action dag.
pv_action_dag = Dag('pv_action_dag', autostart=False)

printout_task = PythonTask(name='printout_task',
                           callback=pv_printout)

pv_action_dag.define({printout_task: None})
Exemple #17
0
              **context.to_dict()))


# create the main DAG
d = Dag('main_dag')

start_task = PythonTask(name='start_task', callback=start_all)

bash_task = BashTask(name='bash_task',
                     command='for i in `seq 1 10`; do echo "$i"; done',
                     callback_stdout=bash_stdout)

stop_noskip_task = PythonTask(name='stop_noskip_task', callback=stop_noskip)

stop_task = PythonTask(name='stop_task', callback=stop)

print_task_1 = PythonTask(name='print_task_1', callback=print_context)

print_task_2 = PythonTask(name='print_task_2', callback=print_context)

print_task_3 = PythonTask(name='print_task_3', callback=print_context)

# set up the graph of the DAG with a start task and three paths with different stop
# conditions.
d.define({
    start_task: [bash_task, stop_noskip_task, stop_task],
    bash_task: print_task_1,
    stop_noskip_task: print_task_2,
    stop_task: print_task_3
})
Exemple #18
0
# create the main DAG
d = Dag('main_dag')

# task for storing the data
put_task = PythonTask(name='put_task', callback=put_data)

# task that limits the branching to certain successor tasks
branch_task = PythonTask(name='branch_task', callback=branch_with_limit)

# first task, first lane, simply prints the value stored in the put_task
lane1_print_task = PythonTask(name='lane1_print_task', callback=print_value)

# first task, second lane, simply prints the value stored in the put_task
lane2_print_task = PythonTask(name='lane2_print_task', callback=print_value)

# first task, third lane, simply prints the value stored in the put_task
lane3_print_task = PythonTask(name='lane3_print_task', callback=print_value)

# joins all three lanes together and waits for the predecessor tasks to finish processing
join_task = PythonTask(name='t_join_me', callback=print_value)

# set up the graph of the DAG as illustrated above. Please note how a list of tasks
# defines tasks that are run in parallel (branched out).
d.define({
    put_task: branch_task,
    branch_task: [lane1_print_task, lane2_print_task, lane3_print_task],
    lane1_print_task: join_task,
    lane2_print_task: join_task,
    lane3_print_task: join_task
})

# the callback function that is called as soon as new lines were appended to the text
# file. It stores the new lines into the data and starts the 'print_dag' dag.
def start_print_dag(lines, data, store, signal, context):
    data['lines'] = lines
    signal.start_dag('print_dag', data=data)


# the callback for printing the new lines from the 'print_dag' dag.
def print_lines(data, store, signal, context):
    print('\n'.join(data['lines']))


# create the task that watches for newly appended lines and the associated dag.
new_line_task = NewLineTriggerTask(name='new_line_task',
                                   path='/tmp/lightflow_test/watch_lines.txt',
                                   callback=start_print_dag,
                                   aggregate=None,
                                   use_existing=False,
                                   flush_existing=False)

list_dag = Dag('line_dag')
list_dag.define({new_line_task: None})

# create the print dag and set its autostart value to false.
print_task = PythonTask(name='print_task', callback=print_lines)

print_dag = Dag('print_dag', autostart=False)
print_dag.define({print_task: None})
Exemple #20
0
    data['number'] = random()
    if data['number'] < 0.5:
        return Action(data, limit=[small_number_task])
    else:
        return Action(data, limit=[large_number_task])


# the callback function for the small number route
def print_small_number(data, store, signal, context):
    print('Small number: {}'.format(data['number']))


# the callback function for the large number route
def print_large_number(data, store, signal, context):
    print('Large number: {}'.format(data['number']))


# task definitions
decision_task = PythonTask(name='decision_task', callback=decide_on_successor)

small_number_task = PythonTask(name='small_number_task',
                               callback=print_small_number)

large_number_task = PythonTask(name='large_number_task',
                               callback=print_large_number)

# create the main DAG
d = Dag('main_dag')

d.define({decision_task: [small_number_task, large_number_task]})
Exemple #21
0
def random_sleep(data, store, signal, context):
    sleep(random() * 4)


# the callback function for the task that prints the run times
def print_times(data, store, signal, context):
    dag_log = store.get(key='log.{}'.format(context.dag_name),
                        section=DataStoreDocumentSection.Meta)
    for task, fields in dag_log.items():
        print(task, 'on', fields['worker'], 'took', fields['duration'],
              'seconds')


# create the main DAG
d = Dag('main_dag')

# create the sleep tasks
sleep_task_1 = PythonTask(name='sleep_task_1', callback=random_sleep)
sleep_task_2 = PythonTask(name='sleep_task_2', callback=random_sleep)
sleep_task_3 = PythonTask(name='sleep_task_3', callback=random_sleep)

# create the print task
print_task = PythonTask(name='print_task', callback=print_times)

# set up the DAG
d.define({
    sleep_task_1: sleep_task_2,
    sleep_task_2: sleep_task_3,
    sleep_task_3: print_task
})
Exemple #22
0
from lightflow.models import Dag
from lightflow.tasks import PythonTask


# the callback function for the task that stores the value 5
def put_data(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id}) '
          'on {worker_hostname}'.format(**context.to_dict()))

    data['value'] = 5


# the callback function for the task that prints the data
def print_value(data, store, signal, context):
    print('The value is: {}'.format(data['value']))


# create the main DAG
d = Dag('main_dag')

# create the two tasks for storing and retrieving data
put_task = PythonTask(name='put_task', callback=put_data)

print_task = PythonTask(name='print_task', callback=print_value)

# set up the graph of the DAG, in which the put_task has to be executed first,
# followed by the print_task.
d.define({put_task: print_task})
Exemple #23
0
                         ])

notify_task = NotifyTriggerTask(name='notify_task',
                                path='/tmp/lightflow_test/input',
                                callback=start_file_dag,
                                recursive=True,
                                aggregate=5,
                                use_existing=False,
                                skip_duplicate=False,
                                on_file_create=False,
                                on_file_close=True,
                                on_file_delete=False,
                                on_file_move=False)

notify_dag = Dag('notify_dag')
notify_dag.define({mkdir_task: notify_task})

# define the file handling dag.
copy_backup_task = CopyTask(name='copy_backup_task',
                            sources=lambda data, store: data['files'],
                            destination='/tmp/lightflow_test/backup')

chmod_backup_task = ChmodTask(
    name='chmod_backup_task',
    paths=lambda data, store: [
        os.path.join('/tmp/lightflow_test/backup', os.path.basename(f))
        for f in data['files']
    ],
    permission='400')

move_output_task = MoveTask(name='move_output_task',

# the callback function that prints the content of the temporary directory.
def list_files(files, data, store, signal, context):
    for file in files:
        print('{}\n'.format(file))


mkdir_task = MakeDirTask(name='mkdir_task',
                         paths='/tmp/lightflow_test/delete_me')

files_task = BashTask(name='create_files_task',
                      command='touch a; touch b; touch c',
                      cwd='/tmp/lightflow_test/delete_me')

list_task = GlobTask(name='list_task',
                     paths='/tmp/lightflow_test/delete_me',
                     callback=list_files)

remove_task = RemoveTask(name='remove_task',
                         paths='/tmp/lightflow_test/delete_me')

# create a DAG that creates temporary directory and add three files.
# Then list the names of the files and remove the whole directory.
rm_dag = Dag('remove_dag')
rm_dag.define({
    mkdir_task: files_task,
    files_task: list_task,
    list_task: remove_task
})
Exemple #25
0
    #signal.join_dags(dag_names)


def subsub_func(data, store, signal, context):
    print("completed")


main_task = PythonTask(name="main_task",
                       callback=main_func,
                       queue='cms-main-task')
main_dag_dict = {
    main_task: None,
}

main_dag = Dag("main_dag", autostart=True, queue='cms-main')
main_dag.define(main_dag_dict)

sub_task = PythonTask(name="test_task",
                      callback=sub_func,
                      queue='cms-primary-task')

sub_dag_dict = {
    sub_task: None,
}

sub_dag = Dag("test_dag", autostart=False, queue='cms-primary')
sub_dag.define(sub_dag_dict)

from functools import partial

OneImageTask = partial(PythonTask, queue='cms-oneimage-task')
Exemple #26
0
from lightflow.models.task_data import TaskData, MultiTaskData

# TODO : make callback something else callback
#
from databroker import Broker
import matplotlib.pyplot as plt
import numpy as np

from SciStreams.config import config

config['foo'] = 'bar'


def test_func(data, store, signal, context):
    print("printing config\n\n")
    print(config['foo'])
    print("done\n\n")
    config['foo'] = 'far'


# create the main DAG that spawns others
#img_dag = Dag('img_dag')
test_task = PythonTask(name="main", callback=test_func)
test_task2 = PythonTask(name="main2", callback=test_func)
test_dag_dict = {
    test_task: test_task2,
}

test_dag = Dag("test", autostart=True)
test_dag.define(test_dag_dict)
Exemple #27
0

# this callback is called after the process completed. Print the line counter and the
# full output of stdout and stderr.
def proc_end(return_code, stdout_file, stderr_file, data, store, signal,
             context):
    print('\n')
    print('Process return code: {}'.format(return_code))
    print('Number lines: {}'.format(data['num_lines']))
    print('\n')
    print('stdout:\n{}\n'.format(stdout_file.read().decode()))
    print('stderr:\n{}\n'.format(stderr_file.read().decode()))


# create the main DAG and the bash task. Please note how the output of stderr is being
# handled by the stdout callback.
d = Dag('main_dag')

proc_task = BashTask(
    name='proc_task',
    command='for i in `seq 1 10`; do echo "This is line $i"; done',
    capture_stdout=True,
    capture_stderr=True,
    callback_stdout=proc_stdout,
    callback_stderr=proc_stdout,
    callback_process=proc_start,
    callback_end=proc_end)

# this DAG has only a single task
d.define({proc_task: None})