def generate_pipeline(nid):

    p = Pipeline()
    s1 = Stage()
    s2 = Stage()
    t1 = Task()

    p.name = 'p%s' % nid
    s1.name = 's1'
    s2.name = 's2'
    t1.name = 't1'

    t1.executable = '/bin/echo'
    t1.arguments = ['hello']

    s1.add_tasks(t1)
    p.add_stages(s1)

    for cnt in range(10):

        tn = Task()
        tn.name = 't%s' % (cnt + 1)
        tn.executable = '/bin/echo'
        tn.arguments = ['world']

        # Copy data from the task in first stage to the current task's location
        tn.copy_input_data = [
            '$Pipeline_%s_Stage_%s_Task_%s/output.txt' %
            (p.name, s1.name, t1.name)
        ]
        s2.add_tasks(tn)

    p.add_stages(s2)

    return p
Beispiel #2
0
    def generate_agent_stage(self) -> Stage:
        stage = Stage()
        stage.name = self.AGENT_STAGE_NAME
        cfg = self.cfg.agent_stage
        stage_api = self.api.agent_stage

        task_idx = 0
        output_path = stage_api.task_dir(self.stage_idx, task_idx, mkdir=True)
        assert output_path is not None

        # Update base parameters
        cfg.task_config.experiment_directory = self.cfg.experiment_directory
        cfg.task_config.stage_idx = self.stage_idx
        cfg.task_config.task_idx = task_idx
        cfg.task_config.node_local_path = self.cfg.node_local_path
        cfg.task_config.output_path = output_path

        # Write yaml configuration
        cfg_path = stage_api.config_path(self.stage_idx, task_idx)
        cfg.task_config.dump_yaml(cfg_path)
        task = generate_task(cfg)
        task.arguments += ["-c", cfg_path.as_posix()]
        stage.add_tasks(task)

        return stage
def test_wfp_check_processor():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.add_tasks(t)
    p.add_stages(s)

    amgr = Amgr(hostname=hostname, port=port)
    amgr._setup_mqs()

    wfp = WFprocessor(sid=amgr._sid,
                      workflow=[p],
                      pending_queue=amgr._pending_queue,
                      completed_queue=amgr._completed_queue,
                      mq_hostname=amgr._mq_hostname,
                      port=amgr._port,
                      resubmit_failed=False)

    wfp.start_processor()
    assert wfp.check_processor()

    wfp.terminate_processor()
    assert not wfp.check_processor()
def test_stage_post_exec():

    global p1
    
    p1.name = 'p1'

    s = Stage()
    s.name = 's1'

    for t in range(NUM_TASKS):
        s.add_tasks(create_single_task())

    s.post_exec = condition

    p1.add_stages(s)

    res_dict = {

            'resource': 'local.localhost',
            'walltime': 30,
            'cpus': 1,
    }

    os.environ['RADICAL_PILOT_DBURL'] = MLAB
    appman = AppManager(rts='radical.pilot', hostname=hostname, port=port)
    appman.resource_desc = res_dict
    appman.workflow = [p1]
    appman.run()
Beispiel #5
0
    def generate_machine_learning_stage(self) -> Stage:
        stage = Stage()
        stage.name = self.MACHINE_LEARNING_STAGE_NAME
        cfg = self.cfg.machine_learning_stage
        stage_api = self.api.machine_learning_stage

        task_idx = 0
        output_path = stage_api.task_dir(self.stage_idx, task_idx, mkdir=True)
        assert output_path is not None

        # Update base parameters
        cfg.task_config.experiment_directory = self.cfg.experiment_directory
        cfg.task_config.stage_idx = self.stage_idx
        cfg.task_config.task_idx = task_idx
        cfg.task_config.node_local_path = self.cfg.node_local_path
        cfg.task_config.output_path = output_path
        cfg.task_config.model_tag = stage_api.unique_name(output_path)
        if self.stage_idx > 0:
            # Machine learning should use model selection API
            cfg.task_config.init_weights_path = None

        # Write yaml configuration
        cfg_path = stage_api.config_path(self.stage_idx, task_idx)
        cfg.task_config.dump_yaml(cfg_path)
        task = generate_task(cfg)
        task.arguments += ["-c", cfg_path.as_posix()]
        stage.add_tasks(task)

        return stage
def test_stage_parent_pipeline_assignment(l, i, b):

    s = Stage()
    data_type = [l, i, b]
    for data in data_type:
        with pytest.raises(TypeError):
            s.parent_pipeline = data
Beispiel #7
0
def test_wfp_start_processor():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.add_tasks(t)
    p.add_stages(s)

    amgr = Amgr(hostname=hostname, port=port)
    amgr._setup_mqs()

    wfp = WFprocessor(sid=amgr._sid,
                      workflow=[p],
                      pending_queue=amgr._pending_queue,
                      completed_queue=amgr._completed_queue,
                      mq_hostname=amgr._mq_hostname,
                      port=amgr._port,
                      resubmit_failed=False)

    assert wfp.start_processor()
    assert not wfp._enqueue_thread
    assert not wfp._dequeue_thread
    assert not wfp._enqueue_thread_terminate.is_set()
    assert not wfp._dequeue_thread_terminate.is_set()
    assert not wfp._wfp_terminate.is_set()
    assert wfp._wfp_process.is_alive()

    wfp._wfp_terminate.set()
    wfp._wfp_process.join()
def generate_pipeline(name, stages):

    # Create a Pipeline object
    p = Pipeline()
    p.name = name


    for s_cnt in range(stages):

        # Create a Stage object
        s = Stage()
        s.name = 'Stage %s'%s_cnt

        for t_cnt in range(5):

            # Create a Task object
            t = Task()
            t.name = 'my-task'        # Assign a name to the task (optional)
            t.executable = '/bin/echo'   # Assign executable to the task
            # Assign arguments for the task executable
            t.arguments = ['I am task %s in %s in %s'%(t_cnt, s_cnt, name)]

            # Add the Task to the Stage
            s.add_tasks(t)

        # Add Stage to the Pipeline
        p.add_stages(s)

    return p
Beispiel #9
0
    def generate_aggregating_stage():
        """ 
        Function to concatenate the MD trajectory (h5 contact map) 
        """
        s2 = Stage()
        s2.name = 'aggregating'

        # Aggregation task
        t2 = Task()
        # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/MD_to_CVAE/MD_to_CVAE.py
        t2.pre_exec = []

        t2.pre_exec += [
            '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'
        ]
        t2.pre_exec += ['conda activate rp.copy']
        t2.pre_exec += [
            'cd /gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_to_CVAE'
        ]
        t2.executable = ['/ccs/home/hrlee/.conda/envs/rp.copy/bin/python'
                         ]  # MD_to_CVAE.py
        t2.arguments = [
            '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_to_CVAE/MD_to_CVAE.py',
            '-f',
            '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_exps/fs-pep'
        ]

        # Add the aggregation task to the aggreagating stage
        s2.add_tasks(t2)
        return s2
def test_pipeline_decrement_stage():

    p = Pipeline()
    s1 = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s1.tasks = t
    s2 = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s2.tasks = t
    p.add_stages([s1, s2])

    p._increment_stage()
    p._increment_stage()
    assert p._stage_count == 2
    assert p._cur_stage == 2
    assert p._completed_flag.is_set() == True

    p._decrement_stage()
    assert p._stage_count == 2
    assert p._cur_stage == 1
    assert p._completed_flag.is_set() == False

    p._decrement_stage()
    assert p._stage_count == 2
    assert p._cur_stage == 0
    assert p._completed_flag.is_set() == False
Beispiel #11
0
    def test_sync_with_master(self, mocked_init, mocked_Logger, mocked_Profiler):

        # --------------------------------------------------------------------------
        #
        def component_execution(packets, conn_params, queue):

            tmgr = BaseTmgr(None, None, None, None, None, None)
            tmgr._log = mocked_Logger
            tmgr._prof = mocked_Profiler
            mq_connection2 = pika.BlockingConnection(rmq_conn_params)
            mq_channel2 = mq_connection2.channel()
            for obj_type, obj, in packets:
                tmgr._sync_with_master(obj, obj_type, mq_channel2, conn_params,
                                       queue)
                if mq_channel2.is_open:
                    mq_channel2.close()

        task = Task()
        task.parent_stage = {'uid':'stage.0000', 'name': 'stage.0000'}
        packets = [('Task', task)]
        stage = Stage()
        stage.parent_pipeline = {'uid':'pipe.0000', 'name': 'pipe.0000'}
        packets.append(('Stage', stage))
        hostname = os.environ.get('RMQ_HOSTNAME', 'localhost')
        port = int(os.environ.get('RMQ_PORT', '5672'))
        username = os.environ.get('RMQ_USERNAME','guest')
        password = os.environ.get('RMQ_PASSWORD','guest')
        credentials = pika.PlainCredentials(username, password)
        rmq_conn_params = pika.ConnectionParameters(host=hostname, port=port,
                credentials=credentials)
        mq_connection = pika.BlockingConnection(rmq_conn_params)
        mq_channel = mq_connection.channel()
        mq_channel.queue_declare(queue='master')
        master_thread = mt.Thread(target=component_execution,
                                  name='tmgr_sync', 
                                  args=(packets, rmq_conn_params, 'master'))
        master_thread.start()

        time.sleep(1)
        try:
            while packets:
                packet = packets.pop(0)
                _, _, body = mq_channel.basic_get(queue='master')
                msg = json.loads(body)
                self.assertEqual(msg['object'], packet[1].to_dict())
                self.assertEqual(msg['type'], packet[0])
        except Exception as ex:
            print(ex)
            print(json.loads(body))
            master_thread.join()
            mq_channel.queue_delete(queue='master')
            mq_channel.close()
            mq_connection.close()
            raise ex
        else:
            master_thread.join()
            mq_channel.queue_delete(queue='master')
            mq_channel.close()
            mq_connection.close()
Beispiel #12
0
def test_amgr_synchronizer():

    amgr = Amgr(hostname=hostname, port=port)
    amgr._setup_mqs()

    p = Pipeline()
    s = Stage()

    # Create and add 10 tasks to the stage
    for cnt in range(10):

        t = Task()
        t.executable = 'some-executable-%s' % cnt

        s.add_tasks(t)

    p.add_stages(s)
    p._assign_uid(amgr._sid)
    p._validate()

    amgr.workflow = [p]

    sid  = 'test.0016'
    rmgr = BaseRmgr({}, sid, None, {})
    tmgr = BaseTmgr(sid=sid,
                    pending_queue=['pending-1'],
                    completed_queue=['completed-1'],
                    rmgr=rmgr,
                    mq_hostname=hostname,
                    port=port,
                    rts=None)

    amgr._rmgr         = rmgr
    rmgr._task_manager = tmgr

    for t in p.stages[0].tasks:
        assert t.state == states.INITIAL

    assert p.stages[0].state == states.INITIAL
    assert p.state           == states.INITIAL

    # Start the synchronizer method in a thread
    amgr._terminate_sync = mt.Event()
    sync_thread = mt.Thread(target=amgr._synchronizer,
                            name='synchronizer-thread')
    sync_thread.start()

    # Start the synchronizer method in a thread
    proc = mp.Process(target=func_for_synchronizer_test, name='temp-proc',
                      args=(amgr._sid, p, tmgr))

    proc.start()
    proc.join()

    amgr._terminate_sync.set()
    sync_thread.join()

    for t in p.stages[0].tasks:
        assert t.state == states.COMPLETED
Beispiel #13
0
def create_inversion_dict_stage(cmt_file_db, param_path, task_counter):
    """Creates stage for the creation of the inversion files. This stage is
    tiny, but required before the actual inversion.

    :param cmt_file_db:
    :param param_path:
    :param task_counter:
    :return:
    """

    # Get database parameter path
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    # Load Parameters
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Function
    inv_dict_func = os.path.join(bin_path, "write_inversion_dicts.py")

    # Create Process Paths Stage (CPP)
    # Create a Stage object
    inv_dict_stage = Stage()
    inv_dict_stage.name = "Creating"

    # Create Task
    inv_dict_task = Task()

    # This way the task gets the name of the path file
    inv_dict_task.name = "Inversion-Dictionaries"

    inv_dict_task.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]

    inv_dict_task.executable = [DB_params["bin-python"]]  # Assign exec
    # to the task

    inv_dict_task.arguments = [
        inv_dict_func, "-f", cmt_file_db, "-p", param_path
    ]

    # In the future maybe to database dir as a total log?
    inv_dict_task.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), inv_dict_task.name))

    inv_dict_task.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), inv_dict_task.name))

    inv_dict_stage.add_tasks(inv_dict_task)

    task_counter += 1

    return inv_dict_stage, task_counter
Beispiel #14
0
    def generate_stage(self):
        s = Stage()
        s.name = self.name
        s.add_tasks(
            {self.generate_task(**x)
             for x in self._ensemble_product()})

        return s
Beispiel #15
0
def main():

    cmd = "{0} 'ls {1}'".format(ssh, dir_)
    p = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
    out, _ = p.communicate()

    out = out.decode('utf-8').strip().split(linesep)

    fullpaths = [op.join(dir_, p) for p in out]
    print(fullpaths)

    # Start radical entk pipeline

    p = Pipeline()

    for i in range(iterations):

        s = Stage()

        for fp in fullpaths:

            t = Task()
            t.name = 'Incrementation {}'.format(i)
            t.pre_exec = [
                'source /home/vhayot/miniconda3/etc/profile.d/conda.sh',
                'conda activate radenv'
            ]
            t.executable = 'python /home/vhayot/inc.py'

            if i == 0:
                t.arguments = [fp, out_dir, i]
            else:
                # Note: assuming all data is accessible through shared dir
                # radical entk functions without sharedfs, however
                t.arguments = [
                    op.join(out_dir,
                            "it-{0}-{1}".format(i - 1, op.basename(fp))),
                    out_dir, i
                ]

            s.add_tasks(t)

        # Create a new stage everytime there's a dependency
        p.add_stages(s)

    appman = AppManager(hostname=hostname, port=port)

    appman.resource_desc = {
        'resource': 'xsede.bridges',
        'walltime': 20,
        'cpus': 5,
        'project': 'mc3bggp',
        'schema': 'gsissh'
    }

    appman.workflow = set([p])

    appman.run()
def test_state_order():

    """
    **Purpose**: Test if the Pipeline, Stage and Task are assigned their states in the correct order
    """

    def create_single_task():

        t1 = Task()
        t1.name = 'simulation'
        t1.executable = ['/bin/date']
        t1.copy_input_data = []
        t1.copy_output_data = []

        return t1

    p1 = Pipeline()
    p1.name = 'p1'

    s = Stage()
    s.name = 's1'
    s.tasks = create_single_task()
    s.add_tasks(create_single_task())

    p1.add_stages(s)

    res_dict = {

            'resource': 'local.localhost',
            'walltime': 5,
            'cpus': 1,
            'project': ''

    }

    os.environ['RADICAL_PILOT_DBURL'] = MLAB
    os.environ['RP_ENABLE_OLD_DEFINES'] = 'True'
    
    appman = Amgr(hostname=hostname, port=port)
    appman.resource_desc = res_dict

    appman.workflow = [p1]
    appman.run()

    p_state_hist = p1.state_history
    assert p_state_hist == ['DESCRIBED', 'SCHEDULING', 'DONE']

    s_state_hist = p1.stages[0].state_history
    assert s_state_hist == ['DESCRIBED', 'SCHEDULING', 'SCHEDULED', 'DONE']

    tasks = p1.stages[0].tasks

    for t in tasks:

        t_state_hist = t.state_history
        assert t_state_hist == ['DESCRIBED', 'SCHEDULING', 'SCHEDULED', 'SUBMITTING', 'SUBMITTED',
                            'EXECUTED', 'DEQUEUEING', 'DEQUEUED', 'DONE']
Beispiel #17
0
def test_wfp_workflow_incomplete():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.add_tasks(t)
    p.add_stages(s)

    amgr = Amgr(hostname=hostname, port=port)
    amgr._setup_mqs()

    wfp = WFprocessor(sid=amgr._sid,
                      workflow=[p],
                      pending_queue=amgr._pending_queue,
                      completed_queue=amgr._completed_queue,
                      mq_hostname=amgr._mq_hostname,
                      port=amgr._port,
                      resubmit_failed=False)

    wfp._initialize_workflow()

    assert wfp.workflow_incomplete()

    amgr.workflow = [p]
    profiler = ru.Profiler(name='radical.entk.temp')

    p.stages[0].state == states.SCHEDULING
    p.state == states.SCHEDULED
    for t in p.stages[0].tasks:
        t.state = states.COMPLETED

    import json
    import pika

    task_as_dict = json.dumps(t.to_dict())
    mq_connection = pika.BlockingConnection(
        pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port))
    mq_channel = mq_connection.channel()
    mq_channel.basic_publish(exchange='',
                             routing_key='%s-completedq-1' % amgr._sid,
                             body=task_as_dict)

    amgr._terminate_sync = Event()
    sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread')
    sync_thread.start()

    proc = Process(target=func_for_dequeue_test,
                   name='temp-proc',
                   args=(wfp, ))
    proc.start()
    proc.join()

    amgr._terminate_sync.set()
    sync_thread.join()

    assert not wfp.workflow_incomplete()
Beispiel #18
0
def test_wfp_dequeue():

    p = Pipeline()
    s = Stage()
    t = Task()

    t.executable = '/bin/date'
    s.add_tasks(t)
    p.add_stages(s)

    amgr = Amgr(hostname=hostname, port=port)
    amgr._setup_mqs()

    wfp = WFprocessor(sid=amgr._sid,
                      workflow=[p],
                      pending_queue=amgr._pending_queue,
                      completed_queue=amgr._completed_queue,
                      mq_hostname=amgr._hostname,
                      port=amgr._port,
                      resubmit_failed=False)

    wfp.initialize_workflow()

    assert p.state == states.INITIAL
    assert p.stages[0].state == states.INITIAL

    for t in p.stages[0].tasks:
        assert t.state == states.INITIAL

    p.state == states.SCHEDULED
    p.stages[0].state == states.SCHEDULING

    for t in p.stages[0].tasks:
        t.state = states.COMPLETED

    task_as_dict = json.dumps(t.to_dict())
    mq_connection = pika.BlockingConnection(
        pika.ConnectionParameters(host=amgr._hostname, port=amgr._port))
    mq_channel = mq_connection.channel()

    mq_channel.basic_publish(exchange='',
                             routing_key='%s' % amgr._completed_queue[0],
                             body=task_as_dict)

    wfp.start_processor()

    th = mt.Thread(target=func_for_dequeue_test, name='temp-proc', args=(p, ))
    th.start()
    th.join()

    wfp.terminate_processor()

    assert p.state == states.DONE
    assert p.stages[0].state == states.DONE

    for t in p.stages[0].tasks:
        assert t.state == states.DONE
Beispiel #19
0
    def generate_ML_stage(num_ML=1):
        """
        Function to generate the learning stage
        """
        s3 = Stage()
        s3.name = 'learning'

        # learn task
        for i in range(num_ML):
            t3 = Task()
            # https://github.com/radical-collaboration/hyperspace/blob/MD/microscope/experiments/CVAE_exps/train_cvae.py
            t3.pre_exec = []
            t3.pre_exec = ['module reset']
            t3.pre_exec += [
                '. /sw/summit/python/2.7/anaconda2/5.3.0/etc/profile.d/conda.sh'
            ]
            t3.pre_exec += ['module load cuda/9.1.85']
            t3.pre_exec += ['conda activate rp.copy']
            t3.pre_exec += ['export CUDA_VISIBLE_DEVICES=0']

            t3.pre_exec += [
                'export PYTHONPATH=/gpfs/alpine/scratch/hrlee/bip179/hyperspace/microscope/experiments/CVAE_exps:$PYTHONPATH'
            ]
            t3.pre_exec += [
                'cd /gpfs/alpine/scratch/hrlee/bip179/hyperspace/microscope/experiments/CVAE_exps'
            ]
            time_stamp = int(time.time())
            dim = i + 3
            cvae_dir = 'cvae_runs_%.2d_%d' % (dim, time_stamp)
            t3.pre_exec += ['mkdir -p {0} && cd {0}'.format(cvae_dir)]
            t3.executable = ['/ccs/home/hrlee/.conda/envs/rp.copy/bin/python'
                             ]  # train_cvae.py
            t3.arguments = [
                '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/CVAE_exps/train_cvae.py',
                '-f',
                '/gpfs/alpine/bip179/scratch/hrlee/hyperspace/microscope/experiments/MD_to_CVAE/cvae_input.h5',
                '-d', dim
            ]

            t3.cpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 4,
                'thread_type': 'OpenMP'
            }
            t3.gpu_reqs = {
                'processes': 1,
                'process_type': None,
                'threads_per_process': 1,
                'thread_type': 'CUDA'
            }

            # Add the learn task to the learning stage
            s3.add_tasks(t3)
            time.sleep(1)
        return s3
def test_stage_to_dict():

    s = Stage()
    d = s.to_dict()

    assert d == {'uid': None,
                 'name': None,
                 'state': states.INITIAL,
                 'state_history': [states.INITIAL],
                 'parent_pipeline': {'uid': None, 'name': None}}
Beispiel #21
0
def test_stage_to_dict():

    s = Stage()
    d = s.to_dict()

    assert d == {'uid': 'stage.0000',
                 'name': None,
                 'state': states.INITIAL,
                 'state_history': [states.INITIAL],
                 'parent_pipeline': {'uid': None, 'name': None}}
Beispiel #22
0
    def test_stage_from_dict(self, mocked_init):

        d = {
            'uid': 're.Stage.0000',
            'name': 's1',
            'state': states.DONE,
            'state_history': [states.INITIAL, states.DONE],
            'parent_pipeline': {
                'uid': 'p1',
                'name': 'pipe1'
            }
        }

        s = Stage()
        s._uid = None
        s._name = None
        s._state = None
        s._state_history = None
        s._p_pipeline = None
        s.from_dict(d)

        self.assertEqual(s._uid, d['uid'])
        self.assertEqual(s._name, d['name'])
        self.assertEqual(s._state, d['state'])
        self.assertEqual(s._state_history, d['state_history'])
        self.assertEqual(s._p_pipeline, d['parent_pipeline'])
Beispiel #23
0
def generate_pipeline():
    def func_condition():

        global CUR_NEW_STAGE, MAX_NEW_STAGE

        if CUR_NEW_STAGE <= MAX_NEW_STAGE:
            func_on_true()

        func_on_false()

    def func_on_true():

        global CUR_NEW_STAGE

        CUR_NEW_STAGE += 1

        s = Stage()

        for i in range(10):
            t = Task()
            t.executable = '/bin/sleep'
            t.arguments = ['30']

            s.add_tasks(t)

        # Add post-exec to the Stage
        s.post_exec = func_condition

        p.add_stages(s)

    def func_on_false():
        print('Done')

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    for i in range(10):

        t1 = Task()
        t1.executable = 'sleep'
        t1.arguments = ['30']

        # Add the Task to the Stage
        s1.add_tasks(t1)

    # Add post-exec to the Stage
    s1.post_exec = func_condition

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
Beispiel #24
0
def get_pipeline(shared_fs=False, size=1):

    p = Pipeline()
    p.name = 'p'

    n = 4

    s1 = Stage()
    s1.name = 's1'
    for x in range(n):
        t = Task()
        t.name = 't%s'%x

        # dd if=/dev/random bs=<byte size of a chunk> count=<number of chunks> of=<output file name>

        t.executable = ['dd']

        if not shared_fs:
            t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s1_t%s.txt'%x]
        else:
            t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=/home/vivek91/s1_t%s.txt'%x]

        t.cpu_reqs['processes'] = 1        
        t.cpu_reqs['threads_per_process'] = 24
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.lfs_per_process = 1024

        s1.add_tasks(t)

    p.add_stages(s1)

    s2 = Stage()
    s2.name = 's2'
    for x in range(n):
        t = Task()
        t.executable = ['dd']

        if not shared_fs:
            t.arguments = ['if=$NODE_LFS_PATH/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s2_t%s.txt'%x]
        else:
            t.arguments = ['if=/home/vivek91/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=/home/vivek91/s2_t%s.txt'%x]

        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 24
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.tag = 't%s'%x

        s2.add_tasks(t)


    p.add_stages(s2)    

    return p
def test_wfp_workflow_incomplete():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.add_tasks(t)
    p.add_stages(s)

    amgr = Amgr(hostname=hostname, port=port)
    amgr._setup_mqs()

    wfp = WFprocessor(sid=amgr._sid,
                      workflow=[p],
                      pending_queue=amgr._pending_queue,
                      completed_queue=amgr._completed_queue,
                      mq_hostname=amgr._mq_hostname,
                      port=amgr._port,
                      resubmit_failed=False)

    wfp._initialize_workflow()

    assert wfp.workflow_incomplete()

    amgr.workflow = [p]
    profiler = ru.Profiler(name='radical.entk.temp')

    p.stages[0].state == states.SCHEDULING
    p.state == states.SCHEDULED
    for t in p.stages[0].tasks:
        t.state = states.COMPLETED

    import json
    import pika

    task_as_dict = json.dumps(t.to_dict())
    mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=amgr._mq_hostname, port=amgr._port))
    mq_channel = mq_connection.channel()
    mq_channel.basic_publish(exchange='',
                             routing_key='%s-completedq-1' % amgr._sid,
                             body=task_as_dict)

    amgr._terminate_sync = Event()
    sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread')
    sync_thread.start()

    proc = Process(target=func_for_dequeue_test, name='temp-proc', args=(wfp,))
    proc.start()
    proc.join()

    amgr._terminate_sync.set()
    sync_thread.join()

    assert not wfp.workflow_incomplete()
Beispiel #26
0
def write_sources(cmt_file_db, param_path, task_counter):
    """ This function creates a stage that modifies the CMTSOLUTION files
    before the simulations are run.

    :param cmt_file_db: cmtfile in the database
    :param param_path: path to parameter file directory
    :param task_counter: total task count up until now in pipeline
    :return: EnTK Stage

    """

    # Get Database parameters
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Path to function
    write_source_func = os.path.join(bin_path, "write_sources.py")

    # Create a Stage object
    w_sources = Stage()

    w_sources.name = "Write-Sources"

    # Create Task for stage
    w_sources_t = Task()
    w_sources_t.name = "Task-Sources"
    w_sources_t.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    w_sources_t.executable = DB_params["bin-python"]  #
    # Assign executable
    # to the task
    w_sources_t.arguments = [write_source_func, cmt_file_db]

    # In the future maybe to database dir as a total log?
    w_sources_t.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), w_sources_t.name))

    w_sources_t.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), w_sources_t.name))

    # Add Task to the Stage
    w_sources.add_tasks(w_sources_t)

    task_counter += 1

    return w_sources, task_counter
Beispiel #27
0
def create_process_path_files(cmt_file_db, param_path, task_counter):
    """This function creates the path files used for processing both
    synthetic and observed data in ASDF format, as well as the following
    windowing procedure.

    :param cmt_file_db: cmtfile in the database
    :param param_path: path to parameter file directory
    :param pipelinedir: path to pipeline directory
    :return: EnTK Stage

    """

    # Get database parameter path
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    # Load Parameters
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Process path function
    create_process_path_bin = os.path.join(bin_path, "create_path_files.py")

    # Create Process Paths Stage (CPP)
    # Create a Stage object
    cpp = Stage()
    cpp.name = "CreateProcessPaths"

    # Create Task
    cpp_t = Task()
    cpp_t.name = "CPP-Task"
    cpp_t.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    cpp_t.executable = DB_params["bin-python"]  # Assign executable
    # to the task
    cpp_t.arguments = [create_process_path_bin, cmt_file_db]

    # In the future maybe to database dir as a total log?
    cpp_t.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), cpp_t.name))

    cpp_t.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), cpp_t.name))

    task_counter += 1

    cpp.add_tasks(cpp_t)

    return cpp, task_counter
Beispiel #28
0
def create_inversion_stage(cmt_file_db, param_path, task_counter):
    """Creates inversion stage.

    :param cmt_file_db:
    :param param_path:
    :return:
    """

    # Get database parameter path
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")

    # Load Parameters
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Function
    inversion_func = os.path.join(bin_path, "inversion.py")

    # Create a Stage object
    inversion_stage = Stage()
    inversion_stage.name = "CMT3D"

    # Create Task
    inversion_task = Task()

    # This way the task gets the name of the path file
    inversion_task.name = "Inversion"

    inversion_task.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]

    inversion_task.executable = DB_params["bin-python"]  # Assign exec
    # to the task

    inversion_task.arguments = [
        inversion_func, "-f", cmt_file_db, "-p", param_path
    ]

    # In the future maybe to database dir as a total log?
    inversion_task.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), inversion_task.name))

    inversion_task.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), inversion_task.name))

    inversion_stage.add_tasks(inversion_task)

    return inversion_stage
Beispiel #29
0
def test_amgr_synchronizer():

    logger = ru.get_logger('radical.entk.temp_logger')
    profiler = ru.Profiler(name='radical.entk.temp')
    amgr = Amgr(hostname=hostname, port=port)

    mq_connection = pika.BlockingConnection(pika.ConnectionParameters(host=hostname, port=port))
    mq_channel = mq_connection.channel()

    amgr._setup_mqs()

    p = Pipeline()
    s = Stage()

    # Create and add 100 tasks to the stage
    for cnt in range(100):

        t = Task()
        t.executable = ['some-executable-%s' % cnt]

        s.add_tasks(t)

    p.add_stages(s)
    p._assign_uid(amgr._sid)
    p._validate()

    amgr.workflow = [p]

    for t in p.stages[0].tasks:
        assert t.state == states.INITIAL

    assert p.stages[0].state == states.INITIAL
    assert p.state == states.INITIAL

    # Start the synchronizer method in a thread
    amgr._terminate_sync = Event()
    sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread')
    sync_thread.start()

    # Start the synchronizer method in a thread
    proc = Process(target=func_for_synchronizer_test, name='temp-proc',
                   args=(amgr._sid, p, logger, profiler))

    proc.start()
    proc.join()

    for t in p.stages[0].tasks:
        assert t.state == states.SCHEDULING

    assert p.stages[0].state == states.SCHEDULING
    assert p.state == states.SCHEDULING

    amgr._terminate_sync.set()
    sync_thread.join()
def test_uid_assignment():

    p = Pipeline()
    s = Stage()
    t = Task()

    s.tasks = t
    p.stages = s

    assert t._parent_pipeline == p.uid
    assert t._parent_stage == s.uid
    assert s._parent_pipeline == p.uid
Beispiel #31
0
 def _generate_stage(self, stage_type):
     """
     Parameters
     ----------
     stage_type : str
         key into self.stages dictionary to retrieve stage name and taskmanagers.
     """
     stage = Stage()
     stage.name = self.stages[stage_type].name
     for taskman in self.stages[stage_type].taskmanagers:
         stage.add_tasks(set(taskman.tasks(self.current_iter)))
     return stage
Beispiel #32
0
def test_pipeline_stage_assignment():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = '/bin/date'
    s.tasks = t
    p.stages = s

    assert type(p.stages) == list
    assert p._stage_count == 1
    assert p._cur_stage == 1
    assert p.stages[0] == s
def test_stage_task_assignment():
    """
    ***Purpose***: Test if necessary attributes are automatically updates upon task assignment
    """

    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.tasks = t

    assert type(s.tasks) == set
    assert s._task_count == 1
    assert t in s.tasks
def test_pipeline_stage_assignment():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.tasks = t
    p.stages = s

    assert type(p.stages) == list
    assert p._stage_count == 1
    assert p._cur_stage == 1
    assert p.stages[0] == s
Beispiel #35
0
def test_stage_task_assignment():
    """
    ***Purpose***: Test if necessary attributes are automatically updates upon task assignment
    """

    s = Stage()
    t = Task()
    t.executable = '/bin/date'
    s.tasks = t

    assert type(s.tasks) == set
    assert s._task_count == 1
    assert t in s.tasks
def test_amgr_synchronizer():

    logger = ru.Logger('radical.entk.temp_logger')
    profiler = ru.Profiler(name='radical.entk.temp')
    amgr = Amgr(hostname=hostname, port=port)

    amgr._setup_mqs()

    p = Pipeline()
    s = Stage()

    # Create and add 100 tasks to the stage
    for cnt in range(100):

        t = Task()
        t.executable = ['some-executable-%s' % cnt]

        s.add_tasks(t)

    p.add_stages(s)
    p._assign_uid(amgr._sid)
    p._validate()

    amgr.workflow = [p]

    for t in p.stages[0].tasks:
        assert t.state == states.INITIAL

    assert p.stages[0].state == states.INITIAL
    assert p.state == states.INITIAL

    # Start the synchronizer method in a thread
    amgr._terminate_sync = Event()
    sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread')
    sync_thread.start()

    # Start the synchronizer method in a thread
    proc = Process(target=func_for_synchronizer_test, name='temp-proc',
                   args=(amgr._sid, p, logger, profiler))

    proc.start()
    proc.join()

    for t in p.stages[0].tasks:
        assert t.state == states.SCHEDULING

    assert p.stages[0].state == states.SCHEDULING
    assert p.state == states.SCHEDULING

    amgr._terminate_sync.set()
    sync_thread.join()
Beispiel #37
0
def test_stage_validate():

    s = Stage()
    s._state = 'test'
    with pytest.raises(ValueError):
        s._validate()

    s = Stage()
    with pytest.raises(MissingError):
        s._validate()
Beispiel #38
0
def data_request(cmt_file_db, param_path, task_counter):
    """ This function creates the request for the observed data and returns
    it as an EnTK Stage

    :param cmt_file_db: cmt_file in the database
    :param param_path: path to parameter file directory
    :param task_counter: total task count up until now in pipeline
    :return: EnTK Stage

    """

    # Get Database parameters
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # # Path to function
    request_data_func = os.path.join(bin_path, "request_data.py")

    # Create a Stage object
    datarequest = Stage()

    datarequest_t = Task()
    datarequest_t.name = "data-request"
    datarequest_t.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    datarequest_t.executable = DB_params["bin-python"]  # Assign executable
    # to the task
    datarequest_t.arguments = [request_data_func, cmt_file_db]

    # In the future maybe to database dir as a total log?
    datarequest_t.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), datarequest_t.name))

    datarequest_t.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), datarequest_t.name))

    # Add Task to the Stage
    datarequest.add_tasks(datarequest_t)

    # Increase task-counter
    task_counter += 1

    return datarequest, task_counter
Beispiel #39
0
def test_stage_task_addition():

    s = Stage()
    t1 = Task()
    t1.executable = '/bin/date'
    t2 = Task()
    t2.executable = '/bin/date'
    s.add_tasks(set([t1, t2]))

    assert type(s.tasks) == set
    assert s._task_count == 2
    assert t1 in s.tasks
    assert t2 in s.tasks

    s = Stage()
    t1 = Task()
    t1.executable = '/bin/date'
    t2 = Task()
    t2.executable = '/bin/date'
    s.add_tasks([t1, t2])

    assert type(s.tasks) == set
    assert s._task_count == 2
    assert t1 in s.tasks
    assert t2 in s.tasks
Beispiel #40
0
def test_stage_post_exec_assignment(l, d):

    s = Stage()

    def func():
        return True

    with pytest.raises(TypeError):
        s.post_exec = l

    with pytest.raises(ValueError):
        s.post_exec = d

    pe_d = {'condition': 1, 'on_true': 2, 'on_false': 3}

    with pytest.raises(TypeError):
        s.post_exec = pe_d

    pe_d['condition'] = func
    with pytest.raises(TypeError):
        s.post_exec = pe_d

    pe_d['on_true'] = func
    with pytest.raises(TypeError):
        s.post_exec = pe_d

    pe_d['on_false'] = func
    s.post_exec = pe_d
Beispiel #41
0
def specfem_clean_up(cmt_file_db, param_path, task_counter):
    """ Cleaning up the simulation directories since we don"t need all the
    files for the future.

    :param cmt_file_db: cmtfile in the database
    :param param_path: path to parameter file directory
    :param pipelinedir: path to pipeline directory
    :return: EnTK Stage

    """

    # Get Database parameters
    databaseparam_path = os.path.join(param_path,
                                      "Database/DatabaseParameters.yml")
    # Database parameters.
    DB_params = read_yaml_file(databaseparam_path)

    # Earthquake specific database parameters: Dir and Cid
    Cdir, Cid = get_Centry_path(DB_params["databasedir"], cmt_file_db)

    # Path to function
    clean_up_func = os.path.join(bin_path, "clean_up_simdirs.py")

    # Create a Stage object
    clean_up = Stage()
    clean_up.name = "Clean-Up"

    # Create Task for stage
    clean_up_t = Task()
    clean_up_t.name = "Task-Clean-Up"
    clean_up_t.pre_exec = [  # Conda activate
        DB_params["conda-activate"]
    ]
    clean_up_t.executable = DB_params["bin-python"]  # Assign executable
    # to the task
    clean_up_t.arguments = [clean_up_func, cmt_file_db]

    # In the future maybe to database dir as a total log?
    clean_up_t.stdout = os.path.join(
        "%s" % Cdir, "logs", "stdout.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), clean_up_t.name))

    clean_up_t.stderr = os.path.join(
        "%s" % Cdir, "logs", "stderr.pipeline_%s.task_%s.%s" %
        (Cid, str(task_counter).zfill(4), clean_up_t.name))

    # Add Task to the Stage
    clean_up.add_tasks(clean_up_t)

    return clean_up, task_counter
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.executable = '/bin/bash'
    t1.arguments = ['-l', '-c', 'base64 /dev/urandom | head -c 1000000 > output.txt']

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    # Create another Stage object to hold character count tasks
    s2 = Stage()

    # Create a Task object
    t2 = Task()
    t2.executable = '/bin/bash'
    t2.arguments = ['-l', '-c', 'grep -o . output.txt | sort | uniq -c > ccount.txt']
    # Copy data from the task in the first stage to the current task's location
    t2.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/output.txt' % (p.uid, s1.uid, t1.uid)]

    # Add the Task to the Stage
    s2.add_tasks(t2)

    # Add Stage to the Pipeline
    p.add_stages(s2)

    # Create another Stage object to hold checksum tasks
    s3 = Stage()

    # Create a Task object
    t3 = Task()
    t3.executable = '/bin/bash'
    t3.arguments = ['-l', '-c', 'sha1sum ccount.txt > chksum.txt']
    # Copy data from the task in the first stage to the current task's location
    t3.copy_input_data = ['$Pipline_%s_Stage_%s_Task_%s/ccount.txt' % (p.uid, s2.uid, t2.uid)]
    # Download the output of the current task to the current location
    t3.download_output_data = ['chksum.txt > chksum_%s.txt' % cnt]

    # Add the Task to the Stage
    s3.add_tasks(t3)

    # Add Stage to the Pipeline
    p.add_stages(s3)

    return p
def generate_pipeline():

    def func_condition():

        global CUR_NEW_STAGE, MAX_NEW_STAGE

        if CUR_NEW_STAGE <= MAX_NEW_STAGE:
            return True

        return False

    def func_on_true():

        global CUR_NEW_STAGE
        CUR_NEW_STAGE += 1

        shuffle(p.stages[CUR_NEW_STAGE:])

    def func_on_false():
        print 'Done'

    # Create a Pipeline object
    p = Pipeline()

    for s in range(MAX_NEW_STAGE+1):

        # Create a Stage object
        s1 = Stage()

        for i in range(CUR_TASKS):

            t1 = Task()
            t1.executable = '/bin/sleep'
            t1.arguments = [ '30']

            # Add the Task to the Stage
            s1.add_tasks(t1)

        # Add post-exec to the Stage
        s1.post_exec = {
                        condition': func_condition,
                        on_true': func_on_true,
                        on_false': func_on_false
                        }

        # Add Stage to the Pipeline
        p.add_stages(s1)

    return p
def on_true():

    global NUM_TASKS, CUR_STAGE

    NUM_TASKS *= 2

    s = Stage()
    s.name = 's%s'%CUR_STAGE

    for t in range(NUM_TASKS):
        s.add_tasks(create_single_task())

    s.post_exec = condition

    p1.add_stages(s)
def test_stage_assign_uid():

    s = Stage()
    try:
        import glob
        import shutil
        import os
        home = os.environ.get('HOME','/home')
        test_fold = glob.glob('%s/.radical/utils/test*'%home)
        for f in test_fold:
            shutil.rmtree(f)
    except:
        pass
    s._assign_uid('test')
    assert s.uid == 'stage.0000'
def test_stage_state_assignment(t, l, i, b):

    s = Stage()

    data_type = [l, i, b]

    for data in data_type:
        with pytest.raises(TypeError):
            s.state = data

    if isinstance(t, str):
        with pytest.raises(ValueError):
            s.state = t

    for val in states._stage_state_values.keys():
        s.state = val
    def create_pipeline():

        p = Pipeline()

        s = Stage()

        t1 = Task()
        t1.name = 'simulation'
        t1.executable = ['sleep']
        t1.arguments = ['10']

        s.add_tasks(t1)

        p.add_stages(s)

        return p
def test_stage_validate_entities(t, l, i, b, se):

    s = Stage()

    data_type = [t, l, i, b, se]

    for data in data_type:
        with pytest.raises(TypeError):
            s._validate_entities(data)

    t = Task()
    assert isinstance(s._validate_entities(t), set)

    t1 = Task()
    t2 = Task()
    assert set([t1, t2]) == s._validate_entities([t1, t2])
def test_wfp_enqueue():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.add_tasks(t)
    p.add_stages(s)

    amgr = Amgr(hostname=hostname, port=port)
    amgr._setup_mqs()

    wfp = WFprocessor(sid=amgr._sid,
                      workflow=[p],
                      pending_queue=amgr._pending_queue,
                      completed_queue=amgr._completed_queue,
                      mq_hostname=amgr._mq_hostname,
                      port=amgr._port,
                      resubmit_failed=False)

    wfp._initialize_workflow()

    amgr.workflow = [p]
    profiler = ru.Profiler(name='radical.entk.temp')

    for t in p.stages[0].tasks:
        assert t.state == states.INITIAL

    assert p.stages[0].state == states.INITIAL
    assert p.state == states.INITIAL

    amgr._terminate_sync = Event()
    sync_thread = Thread(target=amgr._synchronizer, name='synchronizer-thread')
    sync_thread.start()

    proc = Process(target=func_for_enqueue_test, name='temp-proc', args=(wfp,))
    proc.start()
    proc.join()

    amgr._terminate_sync.set()
    sync_thread.join()

    for t in p.stages[0].tasks:
        assert t.state == states.SCHEDULED

    assert p.stages[0].state == states.SCHEDULED
    assert p.state == states.SCHEDULING
def get_pipeline(shared_fs=False, size=1):

    p = Pipeline()
    p.name = 'p'

    n = 4

    s1 = Stage()
    s1.name = 's1'
    for x in range(n):
        t = Task()
        t.name = 't%s'%x

        # dd if=/dev/random bs=<byte size of a chunk> count=<number of chunks> of=<output file name>

        t.executable = 'dd'

        if not shared_fs:
            t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s1_t%s.txt'%x]
        else:
            t.arguments = ['if=/dev/urandom','bs=%sM'%size, 'count=1', 'of=/home/vivek91/s1_t%s.txt'%x]

        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 24
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.lfs_per_process = 1024

        s1.add_tasks(t)

    p.add_stages(s1)

    s2 = Stage()
    s2.name = 's2'
    for x in range(n):
        t = Task()
        t.executable = ['dd']

        if not shared_fs:
            t.arguments = ['if=$NODE_LFS_PATH/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=$NODE_LFS_PATH/s2_t%s.txt'%x]
        else:
            t.arguments = ['if=/home/vivek91/s1_t%s.txt'%x,'bs=%sM'%size, 'count=1', 'of=/home/vivek91/s2_t%s.txt'%x]

        t.cpu_reqs['processes'] = 1
        t.cpu_reqs['threads_per_process'] = 24
        t.cpu_reqs['thread_type'] = ''
        t.cpu_reqs['process_type'] = ''
        t.tag = 't%s'%x

        s2.add_tasks(t)


    p.add_stages(s2)

    return p
def test_pipeline_stage_addition():

    p = Pipeline()
    s1 = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s1.tasks = t
    s2 = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s2.tasks = t
    p.add_stages([s1, s2])

    assert type(p.stages) == list
    assert p._stage_count == 2
    assert p._cur_stage == 1
    assert p.stages[0] == s1
    assert p.stages[1] == s2
    def create_pipeline():

        p = Pipeline()

        s = Stage()

        t1 = Task()
        t1.name = 'simulation'
        t1.executable = ['/bin/echo']
        t1.arguments = ['hello']
        t1.copy_input_data = []
        t1.copy_output_data = []

        s.add_tasks(t1)

        p.add_stages(s)

        return p
def test_stage_from_dict():

    d = {'uid': 're.Stage.0000',
         'name': 's1',
         'state': states.DONE,
         'state_history': [states.INITIAL, states.DONE],
         'parent_pipeline': {'uid': 'p1',
                             'name': 'pipe1'}
         }

    s = Stage()
    s.from_dict(d)

    assert s.uid == d['uid']
    assert s.name == d['name']
    assert s.state == d['state']
    assert s.state_history == d['state_history']
    assert s.parent_pipeline == d['parent_pipeline']
def generate_pipeline():

    def func_condition():

        p.suspend()
        print 'Suspending pipeline %s for 10 seconds' %p.uid
        sleep(10)
        return True

    def func_on_true():

        print 'Resuming pipeline %s' %p.uid
        p.resume()

    def func_on_false():
        pass

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    for i in range(10):

        t1 = Task()
        t1.executable = '/bin/sleep'
        t1.arguments = ['30']

        # Add the Task to the Stage
        s1.add_tasks(t1)

    # Add post-exec to the Stage
    s1.post_exec = {
        'condition': func_condition,
        'on_true': func_on_true,
        'on_false': func_on_false
    }

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def test_integration_local():

    """
    **Purpose**: Run an EnTK application on localhost
    """

    def create_single_task():

        t1 = Task()
        t1.name = 'simulation'
        t1.executable = ['/bin/echo']
        t1.arguments = ['hello']
        t1.copy_input_data = []
        t1.copy_output_data = []

        return t1

    p1 = Pipeline()
    p1.name = 'p1'

    s = Stage()
    s.name = 's1'
    s.tasks = create_single_task()
    s.add_tasks(create_single_task())

    p1.add_stages(s)

    res_dict = {

            'resource': 'local.localhost',
            'walltime': 5,
            'cpus': 1,
            'project': ''

    }

    os.environ['RADICAL_PILOT_DBURL'] = MLAB

    appman = AppManager(hostname=hostname, port=port)
    appman.resource_desc = res_dict
    appman.workflow = [p1]
    appman.run()
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.executable = ['/bin/sleep']
    t1.arguments = ['300']

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def test_stage_validate():

    s = Stage()
    s._state = 'test'
    with pytest.raises(ValueError):
        s._validate()

    s = Stage()
    with pytest.raises(MissingError):
        s._validate()
def generate_pipeline():

    # Create a Pipeline object
    p = Pipeline()

    # Create a Stage object
    s1 = Stage()

    # Create a Task object which creates a file named 'output.txt' of size 1 MB
    t1 = Task()
    t1.executable = ['mv']
    t1.arguments = ['temp','/tmp/']
    t1.upload_input_data = ['%s/temp'%cur_dir]

    # Add the Task to the Stage
    s1.add_tasks(t1)

    # Add Stage to the Pipeline
    p.add_stages(s1)

    return p
def test_stage_task_addition():

    s = Stage()
    t1 = Task()
    t1.executable = ['/bin/date']
    t2 = Task()
    t2.executable = ['/bin/date']
    s.add_tasks(set([t1, t2]))

    assert type(s.tasks) == set
    assert s._task_count == 2
    assert t1 in s.tasks
    assert t2 in s.tasks

    s = Stage()
    t1 = Task()
    t1.executable = ['/bin/date']
    t2 = Task()
    t2.executable = ['/bin/date']
    s.add_tasks([t1, t2])

    assert type(s.tasks) == set
    assert s._task_count == 2
    assert t1 in s.tasks
    assert t2 in s.tasks
def test_wfp_initialize_workflow():

    p = Pipeline()
    s = Stage()
    t = Task()
    t.executable = ['/bin/date']
    s.add_tasks(t)
    p.add_stages(s)

    wfp = WFprocessor(sid='test',
                      workflow=[p],
                      pending_queue=list(),
                      completed_queue=list(),
                      mq_hostname=hostname,
                      port=port,
                      resubmit_failed=False)

    wfp._initialize_workflow()
    assert p.uid is not None
    assert p.stages[0].uid is not None
    for t in p.stages[0].tasks:
        assert t.uid is not None