Exemple #1
0
def plot_image(data, store, signal, context):
    ''' Store the image.'''
    plot_path = "/home/xf11bm/test.png"
    img = data['image']
    print(img)
    print(np)
    plt.figure(0)
    plt.clf()
    plt.imshow(np.log(img))
    plt.savefig(plot_path)


# create the main DAG that spawns others
main_dag = Dag('main_dag')
main_task = PythonTask(name="main", callback=main)
main_dag.define({
    main_task: None,
})

from SciStreams.XS_Streams import filter_attributes, pick_allowed_detectors

# the secondary circular average task
# create the two tasks for storing and retrieving data
put_task = PythonTask(name='put_task', callback=put_data)
grab_image_task = PythonTask(name='grab_image', callback=grab_image)
plot_image_task = PythonTask(name='plot_image', callback=plot_image)

# set up the graph of the DAG, in which the put_task has to be executed first,
# followed by the print_task.
circavg_dag_dict = {
Exemple #2
0
# dataset in the list of all datasets. The second dataset is referenced by its index==1.
def multiply_data(data, store, signal, context):
    data['value'] = data['value'] * data.get_by_index(1)['value']


# subtract two values by using the aliases of the two datasets and different functions
# for illustration purposes: get_by_alias() and the shorthand notation ([alias])
def subtract_data(data, store, signal, context):
    data['value'] = data.get_by_alias('first')['value'] - data(
        'second')['value']


# create the main DAG based on the diagram above
d = Dag('main_dag')

put_task = PythonTask(name='put_task', callback=put_data)
square_task = PythonTask(name='square_task', callback=square_data)
multiply_task = PythonTask(name='multiply_task', callback=multiply_data)
subtract_task = PythonTask(name='subtract_task', callback=subtract_data)

print_task_1 = PythonTask(name='print_task_1', callback=print_data)
print_task_2 = PythonTask(name='print_task_2', callback=print_data)
print_task_3 = PythonTask(name='print_task_3', callback=print_data)
print_task_4 = PythonTask(name='print_task_4', callback=print_data)

d.define({
    put_task: {
        print_task_1: None,
        square_task: None,
        multiply_task: None,
        subtract_task: 'first'
Exemple #3
0
from lightflow.models import Dag
from lightflow.tasks import PythonTask


# the callback function for the tasks
def print_info(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id})'.format(
              **context.to_dict()))


# create the main DAG
d = Dag('main_dag')

# task that limits the branching to certain successor tasks
branch_task = PythonTask(name='branch_task', callback=print_info)

# first task, first lane
lane1_print_task = PythonTask(name='lane1_print_task', callback=print_info)

# first task, second lane
lane2_print_task = PythonTask(name='lane2_print_task', callback=print_info)

# first task, third lane
lane3_print_task = PythonTask(name='lane3_print_task', callback=print_info)

# joins all three lanes together and waits for the predecessor tasks to finish processing
join_task = PythonTask(name='t_join_me', callback=print_info)

# set up the graph of the DAG as illustrated above. Please note how a list of tasks
# defines tasks that are run in parallel (branched out).
Exemple #4
0
from lightflow.models import Dag
from lightflow.tasks import PythonTask


# the callback function for the task that stores the value 5
def put_data(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id}) '
          'on {worker_hostname}'.format(**context.to_dict()))

    data['value'] = 5


# the callback function for the task that prints the data
def print_value(data, store, signal, context):
    print('The value is: {}'.format(data['value']))


# create the main DAG
d = Dag('main_dag')

# create the two tasks for storing and retrieving data
put_task = PythonTask(name='put_task', callback=put_data)

print_task = PythonTask(name='print_task', callback=print_value)

# set up the graph of the DAG, in which the put_task has to be executed first,
# followed by the print_task.
d.define({put_task: print_task})
Exemple #5
0
        # give it some provenance and data
        new_data = dict(img=img)
        new_data['md'] = md.copy()
        new_data = TaskData(data=new_data)
        new_data = MultiTaskData(dataset=new_data)
        good_attr = filter_attributes(new_data['md'])
        if good_attr:
            print("got a good image")
            # one image dags should go here
            dag_name = signal.start_dag(one_image_dag, data=new_data)
            print("primary node, dag name: {}".format(dag_name))
            dag_names.append(dag_name)
        else:
            print("Bad attributes!")

    signal.join_dags(dag_names)


# create the main DAG that spawns others
#img_dag = Dag('img_dag')
primary_task = PythonTask(name="primary_task",
                          callback=primary_func,
                          queue='cms-primary-task')
primary_dag_dict = {
    primary_task: None,
}

primary_dag = Dag("primary_dag", autostart=True, queue='cms-primary')
primary_dag.define(primary_dag_dict)
Exemple #6
0
# print tasks in lane 1 and lane 2. The successor tasks can be specified by either their
# name or the task object itself. Both methods are shown here.
def branch_with_limit(data, store, signal, context):
    return Action(data, limit=[lane1_print_task, 'lane2_print_task'])


# the callback function for tasks that print the data
def print_value(data, store, signal, context):
    print('Task {} and value {}'.format(context.task_name, data['value']))


# create the main DAG
d = Dag('main_dag')

# task for storing the data
put_task = PythonTask(name='put_task', callback=put_data)

# task that limits the branching to certain successor tasks
branch_task = PythonTask(name='branch_task', callback=branch_with_limit)

# first task, first lane, simply prints the value stored in the put_task
lane1_print_task = PythonTask(name='lane1_print_task', callback=print_value)

# first task, second lane, simply prints the value stored in the put_task
lane2_print_task = PythonTask(name='lane2_print_task', callback=print_value)

# first task, third lane, simply prints the value stored in the put_task
lane3_print_task = PythonTask(name='lane3_print_task', callback=print_value)

# joins all three lanes together and waits for the predecessor tasks to finish processing
join_task = PythonTask(name='t_join_me', callback=print_value)
Exemple #7
0
    data['number'] = random()
    if data['number'] < 0.5:
        return Action(data, limit=[small_number_task])
    else:
        return Action(data, limit=[large_number_task])


# the callback function for the small number route
def print_small_number(data, store, signal, context):
    print('Small number: {}'.format(data['number']))


# the callback function for the large number route
def print_large_number(data, store, signal, context):
    print('Large number: {}'.format(data['number']))


# task definitions
decision_task = PythonTask(name='decision_task', callback=decide_on_successor)

small_number_task = PythonTask(name='small_number_task',
                               callback=print_small_number)

large_number_task = PythonTask(name='large_number_task',
                               callback=print_large_number)

# create the main DAG
d = Dag('main_dag')

d.define({decision_task: [small_number_task, large_number_task]})
Exemple #8
0
def stop(data, store, signal, context):
    raise StopTask('Stop task {} and all successor tasks'.format(
        context.task_name))


# callback for printing the current task context
def print_context(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id})'.format(
              **context.to_dict()))


# create the main DAG
d = Dag('main_dag')

start_task = PythonTask(name='start_task', callback=start_all)

bash_task = BashTask(name='bash_task',
                     command='for i in `seq 1 10`; do echo "$i"; done',
                     callback_stdout=bash_stdout)

stop_noskip_task = PythonTask(name='stop_noskip_task', callback=stop_noskip)

stop_task = PythonTask(name='stop_task', callback=stop)

print_task_1 = PythonTask(name='print_task_1', callback=print_context)

print_task_2 = PythonTask(name='print_task_2', callback=print_context)

print_task_3 = PythonTask(name='print_task_3', callback=print_context)
Exemple #9
0
from lightflow.models import Dag, Parameters, Option
from lightflow.tasks import PythonTask
from lightflow.models.task_data import TaskData, MultiTaskData


def test_func(data, store, signal, context):
    import logging
    logging.basicConfig(filename='/home/xf11bm/SciStreams/SciStreams/test.log',
                        level=logging.DEBUG)
    logging.debug('Testing a log write again')
    #logging.info('So should this')
    #logging.warning('And this, too')


test_task = PythonTask(name="test func", callback=test_func, queue='test')

test_dag_dict = {
    test_task: None,
}

test_dag = Dag("test", autostart=True)
test_dag.define(test_dag_dict)
Exemple #10
0
    dag_names = []
    for i in range(5):
        sleep(1)
        data['image'] = np.ones((100, 100))
        started_dag = signal.start_dag(sub_dag, data=data)
        dag_names.append(started_dag)

    signal.join_dags(dag_names)


# this callback function prints the dimensions of the received numpy array
def sub_dag_print(data, store, signal, context):
    print('Received an image with dimensions: {}'.format(data['image'].shape))


init_task = PythonTask(name='init_task', callback=print_name)

call_dag_task = PythonTask(name='call_dag_task', callback=start_sub_dag)

# create the main dag that runs the init task first, followed by the call_dag task.
main_dag = Dag('main_dag')
main_dag.define({init_task: call_dag_task})

# create the tasks for the sub dag that simply prints the shape of the numpy array
# passed down from the main dag.
print_task = PythonTask(name='print_task', callback=sub_dag_print)

# create the sub dag that is being called by the main dag. In order to stop the
# system from automatically starting the dag when the workflow is run, set the autostart
# parameter to false.
sub_dag = Dag('sub_dag', autostart=False)
Exemple #11
0
def make_list(data, store, signal, context):
    print(context.task_name)
    data['my_list'] = ['asdf_0001.dat', 'asdf_0002.dat', 'sdfa_0001.dat', 'sdfa_0002.dat', 'sdfa_0003.dat',
                       'blah_0001.dat', '|', 'blah_0002.dat', 'blah2_0001.dat']


def print_list(data, store, signal, context):
    print(context.task_name)
    print('==================================')
    print(data['my_list'])
    print('==================================')


print_dag = Dag('print_dag', autostart=False)

print_list_task = PythonTask(name='print_list',
                             callback=print_list)

print_dag.define({print_list_task: None})


chunk_dag = Dag('chunk_dag')

make_list_task = PythonTask(name='make_list',
                            callback=make_list)

chunk_task = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False,
                          match_pattern='(?P<match>[0-9A-Za-z]*)_', in_key='my_list')

chunk_task2 = ChunkingTask(name='chunk_me', dag_name='print_dag', force_consecutive=True, flush_on_end=False,
                           match_pattern='[0-9A-Za-z]*_', in_key='my_list')
Exemple #12
0
array, and as this is not the case aborts the workflow gracefully. The abort is
accomplished by using the Abort exception.

"""

from lightflow.models import Dag, AbortWorkflow
from lightflow.tasks import PythonTask


# the callback function for the task that stores the array of three image file names
def collect_data(data, store, signal, context):
    data['images'] = ['img_001.tif', 'img_002.tif', 'img_003.tif']


# the callback function for the task that checks the number of stored file names
def check_data(data, store, signal, context):
    if len(data['images']) < 5:
        raise AbortWorkflow('At least 5 images are required')


# create the main DAG
d = Dag('main_dag')

# create the two tasks for storing and checking data
collect_task = PythonTask(name='collect_task', callback=collect_data)

check_task = PythonTask(name='check_task', callback=check_data)

# set up the graph of the DAG
d.define({collect_task: check_task})
Exemple #13
0
    lightflow worker start -q special

"""

from lightflow.models import Dag
from lightflow.tasks import PythonTask


# the callback function for the tasks that simply prints the context
def print_text(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id})'.format(
              **context.to_dict()))


# create the main DAG
d = Dag('main_dag')

# create the two task, where the first task is executed on the 'task' queue and the
# second task on the 'special' queue
print_task = PythonTask(name='print_task', callback=print_text)

print_special = PythonTask(name='print_special',
                           callback=print_text,
                           queue='special')

# set up the graph of the DAG, in which the print_task has to be executed first,
# followed by the print_special.
d.define({print_task: print_special})
Exemple #14
0
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id}) '
          'on {worker_hostname}'.format(**context.to_dict()))

    if 'value' not in data:
        data['value'] = 0

    data['value'] = data['value'] + 1
    print('This is task #{}'.format(data['value']))


# create the main DAG
d = Dag('main_dag')

# create the 3 tasks that increment a number
task_1 = PythonTask(name='task_1',
                    callback=inc_number)

task_2 = PythonTask(name='task_2',
                    callback=inc_number)

task_3 = PythonTask(name='task_3',
                    callback=inc_number)


# set up the graph of the DAG as a linear sequence of tasks
d.define({
    task_1: task_2,
    task_2: task_3
})
        data['pv_name'] = pvname
        data['pv_value'] = value

        signal.start_dag('pv_action_dag', data=data)
        return data


# the callback function that prints the new PV value after it got changed.
def pv_printout(data, store, signal, context):
    print('PV {} has value: {}'.format(data['pv_name'], data['pv_value']))


# set up the PV monitoring dag.
pv_monitor_dag = Dag('pv_monitor_dag')

startup_task = PythonTask(name='startup_task',
                          callback=startup)

monitor_task = PvTriggerTask(name='monitor_task',
                             pv_name=lambda data, data_store: data_store.get('pvname'),
                             callback=pv_callback,
                             event_trigger_time=0.1,
                             stop_polling_rate=2,
                             skip_initial_callback=True)

pv_monitor_dag.define({startup_task: monitor_task})


# set up the PV action dag.
pv_action_dag = Dag('pv_action_dag', autostart=False)

printout_task = PythonTask(name='printout_task',

# the callback function that is called as soon as new lines were appended to the text
# file. It stores the new lines into the data and starts the 'print_dag' dag.
def start_print_dag(lines, data, store, signal, context):
    data['lines'] = lines
    signal.start_dag('print_dag', data=data)


# the callback for printing the new lines from the 'print_dag' dag.
def print_lines(data, store, signal, context):
    print('\n'.join(data['lines']))


# create the task that watches for newly appended lines and the associated dag.
new_line_task = NewLineTriggerTask(name='new_line_task',
                                   path='/tmp/lightflow_test/watch_lines.txt',
                                   callback=start_print_dag,
                                   aggregate=None,
                                   use_existing=False,
                                   flush_existing=False)

list_dag = Dag('line_dag')
list_dag.define({new_line_task: None})

# create the print dag and set its autostart value to false.
print_task = PythonTask(name='print_task', callback=print_lines)

print_dag = Dag('print_dag', autostart=False)
print_dag.define({print_task: None})
Exemple #17
0
# acquire some basic statistics for each file as long as it is not a symbolic link
def acquire_stats(entry, data, store, signal, context):
    if not entry.is_symlink():
        data['count'] += 1
        data['size'] += entry.stat(follow_symlinks=False).st_size


# print the acquired statistics
def print_stats(data, store, signal, context):
    print('Statistics for folder: {}'.format(store.get('path')))
    print('Number files: {}'.format(data['count']))
    print('Total size (bytes): {}'.format(data['size']))


# the task for setting up the data for the workflow
setup_task = PythonTask(name='setup_task', callback=setup)

# traverse a directory and call the statistics callable for each file
walk_task = WalkTask(name='walk_task',
                     path=lambda data, store: store.get('path'),
                     callback=acquire_stats,
                     recursive=True)

# print the acquired statistics
print_task = PythonTask(name='print_task', callback=print_stats)

# create a DAG that runs the setup, walk and print task consecutively.
main_dag = Dag('main_dag')
main_dag.define({setup_task: walk_task, walk_task: print_task})
Exemple #18
0
def random_sleep(data, store, signal, context):
    sleep(random() * 4)


# the callback function for the task that prints the run times
def print_times(data, store, signal, context):
    dag_log = store.get(key='log.{}'.format(context.dag_name),
                        section=DataStoreDocumentSection.Meta)
    for task, fields in dag_log.items():
        print(task, 'on', fields['worker'], 'took', fields['duration'],
              'seconds')


# create the main DAG
d = Dag('main_dag')

# create the sleep tasks
sleep_task_1 = PythonTask(name='sleep_task_1', callback=random_sleep)
sleep_task_2 = PythonTask(name='sleep_task_2', callback=random_sleep)
sleep_task_3 = PythonTask(name='sleep_task_3', callback=random_sleep)

# create the print task
print_task = PythonTask(name='print_task', callback=print_times)

# set up the DAG
d.define({
    sleep_task_1: sleep_task_2,
    sleep_task_2: sleep_task_3,
    sleep_task_3: print_task
})
Exemple #19
0
                interp_base=request['processing_info']['interp_base'])

        elif process_type == 'bin':
            logger.info("binning (not performed yet)")
            processor.bin(start_doc,
                          requester=request['requester'],
                          proc_info=request['processing_info'],
                          filepath=request['processing_info']['filepath'])

        elif process_type == 'request_interpolated_data':
            logger.info("returning interpolated data (not done yet)")
            processor.return_interp_data(
                start_doc,
                requester=request['requester'],
                filepath=request['processing_info']['filepath'])
    t2 = ttime.time()
    print(f"total processing took {t2-t1} sec")


# don't create the request anymore
#create_req_task = PythonTask(name="create_req_func", callback=create_req_func,
#queue='qas-task')
process_run_task = PythonTask(name="process_run_func",
                              callback=process_run_func,
                              queue='qas-task')

d = Dag("interpolation", queue="qas-dag")
d.define({
    process_run_task: None,
})
Exemple #20
0
                data['descriptor'] = descriptor_dict[event['descriptor']]
                dag_name = signal.start_dag(primary_dag, data=data)
                # the
                print("dag name: {}".format(dag_name))
                #dag_names.append(dag_name)
                # I will join after every send for debugging
                signal.join_dags([dag_name])
        # ignore maxnum for now
        #if MAXNUM is not None and cnt > MAXNUM:
        #break
    print("Main job submission finished, found {} images".format(cnt))

    #signal.join_dags(dag_names)


def make_descriptor_dict(descriptors):
    desc_dict = dict()
    for descriptor in descriptors:
        desc_dict[descriptor['uid']] = descriptor
    return desc_dict


# create the main DAG that spawns others
main_dag = Dag('main_dag', queue='cms-main')
main_task = PythonTask(name="main_task",
                       callback=main_func,
                       queue="cms-main-task")
main_dag.define({
    main_task: None,
})
Exemple #21
0
    Option('recursive', default=True, help='Run recursively', type=bool),
    Option('iterations', default=1, help='The number of iterations', type=int),
    Option('threshold', default=0.4, help='The threshold value', type=float)
])


# the callback function that prints the value of the filepath parameter
def print_filepath(data, store, signal, context):
    print('The filepath is:', store.get('filepath'))


# the callback function that prints the value of the iterations parameter
def print_iterations(data, store, signal, context):
    print('Number of iterations:', store.get('iterations'))


# create the main DAG
d = Dag('main_dag')

# task for printing the value of the filepath parameter
print_filepath_task = PythonTask(name='print_filepath_task',
                                 callback=print_filepath)

# task for printing the value of the iterations parameter
print_iterations_task = PythonTask(name='print_iterations_task',
                                   callback=print_iterations)

# set up the graph of the DAG, in which the print_filepath_task has to be executed first,
# followed by the print_iterations_task.
d.define({print_filepath_task: print_iterations_task})
Exemple #22
0
            dag_name = dag_names.popleft()

            if len(dag_names) == 0:
                stopped = True
        else:
            time.sleep(.1)

    #signal.join_dags(dag_names)


def subsub_func(data, store, signal, context):
    print("completed")


main_task = PythonTask(name="main_task",
                       callback=main_func,
                       queue='cms-main-task')
main_dag_dict = {
    main_task: None,
}

main_dag = Dag("main_dag", autostart=True, queue='cms-main')
main_dag.define(main_dag_dict)

sub_task = PythonTask(name="test_task",
                      callback=sub_func,
                      queue='cms-primary-task')

sub_dag_dict = {
    sub_task: None,
}
Exemple #23
0
def add_filename(data, store, signal, context):
    store.push('filenames', 'file_b.spec')


# the callback function for the task that adds a nested list to the list of filenames and
# then extends the list of filenames with two more entries.
def add_more_filenames(data, store, signal, context):
    store.push('filenames', ['nested_a', 'nested_b'])
    store.extend('filenames', ['file_c.spec', 'file_d.spec'])


# create the main DAG
d = Dag('main_dag')

# create the tasks that call the functions above
store_task = PythonTask(name='store_task',
                        callback=store_data)

modify_task = PythonTask(name='modify_task',
                         callback=modify_data)

add_filename_task = PythonTask(name='add_filename_task',
                               callback=add_filename)

add_more_filename_task = PythonTask(name='add_more_filename_task',
                                    callback=add_more_filenames)

# set up the graph of the DAG, in which the store_task and modify_task are called
# in sequence while the add_filename_task and add_more_filename_task are run in parallel.
d.define({
    store_task: modify_task,
    modify_task: [add_filename_task, add_more_filename_task]
from lightflow.models import Dag
from lightflow.tasks import PythonTask
from lightflow_filesystem import GlobTask


# the callback function that handles the returned files from the glob task. In this
# example it stores them as a list into the data under the key 'files'.
def store_files(files, data, store, signal, context):
    data['files'] = files


# the callback for the task that prints the filenames that were returned by the glob task.
def print_filenames(data, store, signal, context):
    print('\n'.join(data['files']))


# create a GlobTask to find all files with the '.file' extension and a PythonTask to
# print the result.
glob_task = GlobTask(name='glob_task',
                     paths=['/tmp/lightflow_test/'],
                     callback=store_files,
                     pattern='**/*.file',
                     recursive=True)

print_task = PythonTask(name='print_task', callback=print_filenames)

# create a DAG that runs the glob task first and then the print task.
list_dag = Dag('list_dag')
list_dag.define({glob_task: print_task})
Exemple #25
0
from lightflow.models.task_data import TaskData, MultiTaskData

# TODO : make callback something else callback
#
from databroker import Broker
import matplotlib.pyplot as plt
import numpy as np

from SciStreams.config import config

config['foo'] = 'bar'


def test_func(data, store, signal, context):
    print("printing config\n\n")
    print(config['foo'])
    print("done\n\n")
    config['foo'] = 'far'


# create the main DAG that spawns others
#img_dag = Dag('img_dag')
test_task = PythonTask(name="main", callback=test_func)
test_task2 = PythonTask(name="main2", callback=test_func)
test_dag_dict = {
    test_task: test_task2,
}

test_dag = Dag("test", autostart=True)
test_dag.define(test_dag_dict)
Exemple #26
0
worker consuming the 'main' queue, the DAG and the print_task on the second worker, and
the print_memory task on the third worker.

"""

from lightflow.models import Dag
from lightflow.tasks import PythonTask


# the callback function for the tasks that simply prints the context
def print_text(data, store, signal, context):
    print('Task {task_name} being run in DAG {dag_name} '
          'for workflow {workflow_name} ({workflow_id})'.format(
              **context.to_dict()))


# create the main DAG and have it scheduled on the 'graph' queue
d = Dag('main_dag', queue='graph')

# create the two task, where the first task is executed on the default 'task' queue
# while the second task is processed on the 'high_memory' queue
print_task = PythonTask(name='print_task', callback=print_text)

print_memory = PythonTask(name='print_memory',
                          callback=print_text,
                          queue='high_memory')

# set up the graph of the DAG, in which the print_task has to be executed first,
# followed by the print_memory task.
d.define({print_task: print_memory})