Ejemplo n.º 1
0
 def test_iterative_big_pipeline_workflow(self):
     self.big_pipeline.files_to_create = [["toto", "tutu"],
                                      ["tata", "titi", "tete"]]
     self.big_pipeline.dynamic_parameter = [[1, 2], [3, 4, 5]]
     self.big_pipeline.other_input = 5
     self.big_pipeline.output_image = [
         [os.path.join(self.directory, 'toto_out'),
          os.path.join(self.directory, 'tutu_out')],
         [os.path.join(self.directory, 'tata_out'),
          os.path.join(self.directory, 'titi_out'),
          os.path.join(self.directory, 'tete_out')]]
     self.big_pipeline.other_output = [[1.1, 2.1], [3.1, 4.1, 5.1]]
     workflow = pipeline_workflow.workflow_from_pipeline(self.big_pipeline)
     # expect 6 + 7 jobs
     self.assertEqual(len(workflow.jobs), 13)
     subjects = set()
     for job in workflow.jobs:
         if not job.name.startswith('DummyProcess'):
             continue
         kwargs = eval(re.match('^.*kwargs=({.*}); kwargs.update.*$',
                                job.command[2]).group(1))
         self.assertEqual(kwargs["other_input"], 5)
         # get argument of 'input_image' file parameter
         subject = job.command[4::2][job.command[3::2].index('input_image')]
         subjects.add(subject)
         if sys.version_info >= (2, 7):
             self.assertIn(subject,
                           ["toto", "tutu", "tata", "titi", "tete"])
         else:
             self.assertTrue(subject in
                             ["toto", "tutu", "tata", "titi", "tete"])
     self.assertEqual(subjects,
                      set(["toto", "tutu", "tata", "titi", "tete"]))
Ejemplo n.º 2
0
 def test_iterative_big_pipeline_workflow(self):
     self.big_pipeline.files_to_create = [["toto", "tutu"],
                                      ["tata", "titi", "tete"]]
     self.big_pipeline.dynamic_parameter = [[1, 2], [3, 4, 5]]
     self.big_pipeline.other_input = 5
     self.big_pipeline.output_image = [
         [os.path.join(self.directory, 'toto_out'),
          os.path.join(self.directory, 'tutu_out')],
         [os.path.join(self.directory, 'tata_out'),
          os.path.join(self.directory, 'titi_out'),
          os.path.join(self.directory, 'tete_out')]]
     self.big_pipeline.other_output = [[1.1, 2.1], [3.1, 4.1, 5.1]]
     workflow = pipeline_workflow.workflow_from_pipeline(self.big_pipeline)
     # expect 6 + 7 jobs
     self.assertEqual(len(workflow.jobs), 13)
     subjects = set()
     for job in workflow.jobs:
         if not job.name.startswith('DummyProcess'):
             continue
         kwargs = eval(re.match('^.*kwargs=({.*}); kwargs.update.*$',
                                job.command[2]).group(1))
         self.assertEqual(kwargs["other_input"], 5)
         # get argument of 'input_image' file parameter
         subject = job.command[4::2][job.command[3::2].index('input_image')]
         subjects.add(subject)
         if sys.version_info >= (2, 7):
             self.assertIn(subject,
                           ["toto", "tutu", "tata", "titi", "tete"])
         else:
             self.assertTrue(subject in
                             ["toto", "tutu", "tata", "titi", "tete"])
     self.assertEqual(subjects,
                      set(["toto", "tutu", "tata", "titi", "tete"]))
 def test_full_wf(self):
     self.pipeline.enable_all_pipeline_steps()
     wf = pipeline_workflow.workflow_from_pipeline(
         self.pipeline, study_config=self.study_config)
     # 5 jobs including the output directories creation
     self.assertEqual(len(wf.jobs), 5)
     # 4 deps (1 additional, dirs->node1)
     self.assertEqual(len(wf.dependencies), 4)
Ejemplo n.º 4
0
 def test_full_wf(self):
     self.pipeline.enable_all_pipeline_steps()
     wf = pipeline_workflow.workflow_from_pipeline(
         self.pipeline, study_config=self.study_config)
     # 5 jobs including the output directories creation
     self.assertEqual(len(wf.jobs), 5)
     # 4 deps (1 additional, dirs->node1)
     self.assertEqual(len(wf.dependencies), 4)
Ejemplo n.º 5
0
 def test_partial_wf2(self):
     self.pipeline.enable_all_pipeline_steps()
     self.pipeline.pipeline_steps.step2 = False
     wf = pipeline_workflow.workflow_from_pipeline(
         self.pipeline, study_config=self.study_config,
         create_directories=False)
     self.assertEqual(len(wf.jobs), 3)
     self.assertEqual(len(wf.dependencies), 0)
Ejemplo n.º 6
0
 def test_requirements(self):
     engine = self.study_config.engine
     with engine.settings as session:
         session.remove_config('spm', 'global', 'spm12-standalone')
     self.pipeline.enable_all_pipeline_steps()
     with self.assertRaises(ValueError):
         wf = pipeline_workflow.workflow_from_pipeline(
             self.pipeline, study_config=self.study_config)
Ejemplo n.º 7
0
 def test_partial_wf2(self):
     self.pipeline.enable_all_pipeline_steps()
     self.pipeline.pipeline_steps.step2 = False
     wf = pipeline_workflow.workflow_from_pipeline(
         self.pipeline, study_config=self.study_config,
         create_directories=False)
     self.assertEqual(len(wf.jobs), 3)
     self.assertEqual(len(wf.dependencies), 0)
Ejemplo n.º 8
0
 def _test_loo_pipeline(self, pipeline2):
     pipeline2.main_inputs = [
         os.path.join(self.temp_dir, 'file%d' % i) for i in range(4)
     ]
     pipeline2.subjects = ['subject%d' % i for i in range(4)]
     pipeline2.output_directory = os.path.join(self.temp_dir, 'out_dir')
     pipeline2.test_output = os.path.join(self.temp_dir, 'out_dir',
                                          'outputs')
     wf = pipeline_workflow.workflow_from_pipeline(pipeline2,
                                                   create_directories=False)
     import soma_workflow.client as swc
     swc.Helper.serialize(
         os.path.join(self.temp_dir, 'custom_nodes.workflow'), wf)
     import six
     #print('workflow:')
     #print('jobs:', wf.jobs)
     #print('dependencies:', sorted([(x[0].name, x[1].name) for x in wf.dependencies]))
     #print('dependencies:', wf.dependencies)
     #print('links:', {n.name: {p: (l[0].name, l[1]) for p, l in six.iteritems(links)} for n, links in six.iteritems(wf.param_links)})
     self.assertEqual(len(wf.jobs), 31)
     self.assertEqual(len(wf.dependencies), 16 * 4 + 1)
     deps = sorted([
         ['Pipeline1_map', 'LOO'], ['Pipeline1_map', 'intermediate_output'],
         ['Pipeline1_map', 'train2'], ['Pipeline1_map', 'output_file'],
         ['Pipeline1_map', 'test'], ['Pipeline1_map', 'test_output'],
         ['LOO', 'train1'], ['train1', 'train2'],
         ['train1', 'intermediate_output'], ['train2', 'test'],
         ['train2', 'output_file'], ['test', 'test_output'],
         ['intermediate_output', 'Pipeline1_reduce'],
         ['output_file', 'Pipeline1_reduce'],
         ['test_output', 'Pipeline1_reduce'], ['test', 'Pipeline1_reduce']
     ] * 4 + [['Pipeline1_reduce', 'global_output']])
     self.assertEqual(
         sorted([[x.name for x in d] for d in wf.dependencies]), deps)
     train1_jobs = [job for job in wf.jobs if job.name == 'train1']
     self.assertEqual(
         sorted([job.param_dict['out1'] for job in train1_jobs]), [
             os.path.join(pipeline2.output_directory,
                          'subject%d_interm' % i) for i in range(4)
         ])
     train2_jobs = [job for job in wf.jobs if job.name == 'train2']
     self.assertEqual(
         sorted([job.param_dict['out1'] for job in train2_jobs]), [
             os.path.join(pipeline2.output_directory, 'subject%d' % i)
             for i in range(4)
         ])
     test_jobs = [job for job in wf.jobs if job.name == 'test']
     self.assertEqual(len(test_jobs), 4)
     test_outputs = [job for job in wf.jobs if job.name == 'test_output']
     #print('test_output jobs:', test_outputs)
     #for j in test_outputs:
     #print('param_dict:', j.param_dict)
     out = sorted([job.param_dict['out_file'] for job in test_outputs])
     self.assertEqual(
         sorted([job.param_dict['out_file'] for job in test_outputs]), [
             os.path.join(pipeline2.output_directory,
                          'subject%d_test_output' % i) for i in range(4)
         ])
Ejemplo n.º 9
0
 def test_atomic_dependencies(self):
     workflow = workflow_from_pipeline(self.atomic_pipeline)
     dependencies = [(x.name, y.name) for x, y in workflow.dependencies]
     self.assertTrue(len(dependencies) == 4)
     self.assertTrue(("node1", "node2") in dependencies)
     self.assertTrue(("node1", "node3") in dependencies)
     self.assertTrue(("node2", "node4") in dependencies)
     self.assertTrue(("node3", "node4") in dependencies)
     self.assertEqual(workflow.groups, [])
Ejemplo n.º 10
0
 def test_atomic_dependencies(self):
     workflow = workflow_from_pipeline(self.atomic_pipeline)
     dependencies = [(x.name, y.name) for x, y in workflow.dependencies]
     self.assertTrue(len(dependencies) == 4)
     self.assertTrue(("node1", "node2") in dependencies)
     self.assertTrue(("node1", "node3") in dependencies)
     self.assertTrue(("node2", "node4") in dependencies)
     self.assertTrue(("node3", "node4") in dependencies)
     self.assertEqual(workflow.groups, [])
Ejemplo n.º 11
0
 def test_composite_dependencies(self):
     workflow = workflow_from_pipeline(self.composite_pipeline)
     dependencies = [(x.name, y.name) for x, y in workflow.dependencies]
     self.assertTrue(len(dependencies) == 16)
     self.assertEqual(dependencies.count(("node1", "node2")), 1)
     self.assertEqual(dependencies.count(("node1", "node3")), 2)
     self.assertEqual(dependencies.count(("node2", "node4")), 1)
     self.assertEqual(dependencies.count(("node3", "node4")), 2)
     self.assertEqual(dependencies.count(("node1", "node2_input")), 1)
     self.assertEqual(dependencies.count(("node2_output", "node4")), 1)
     self.assertTrue(len(workflow.groups) == 1)
Ejemplo n.º 12
0
 def test_composite_dependencies(self):
     workflow = workflow_from_pipeline(self.composite_pipeline)
     dependencies = [(x.name, y.name) for x, y in workflow.dependencies]
     self.assertTrue(len(dependencies) == 16)
     self.assertEqual(dependencies.count(("node1", "node2")), 1)
     self.assertEqual(dependencies.count(("node1", "node3")), 2)
     self.assertEqual(dependencies.count(("node2", "node4")), 1)
     self.assertEqual(dependencies.count(("node3", "node4")), 2)
     self.assertEqual(dependencies.count(("node1", "node2_input")), 1)
     self.assertEqual(dependencies.count(("node2_output", "node4")), 1)
     self.assertTrue(len(workflow.groups) == 1)
Ejemplo n.º 13
0
 def test_custom_nodes_workflow(self):
     sc = StudyConfig()
     pipeline = sc.get_process_instance(Pipeline1)
     pipeline.main_input = '/dir/file'
     pipeline.output_directory = '/dir/out_dir'
     wf = pipeline_workflow.workflow_from_pipeline(pipeline,
                                                   create_directories=False)
     self.assertEqual(len(wf.jobs), 3)
     self.assertEqual(len(wf.dependencies), 2)
     self.assertEqual(
         sorted([[x.name for x in d] for d in wf.dependencies]),
         sorted([['train1', 'train2'], ['train2', 'test']]))
Ejemplo n.º 14
0
 def test_partial_wf3_fail(self):
     self.pipeline.enable_all_pipeline_steps()
     self.pipeline.pipeline_steps.step1 = False
     try:
         wf = pipeline_workflow.workflow_from_pipeline(
             self.pipeline, study_config=self.study_config)
     except ValueError:
         pass # OK
     else:
         # no exception, this is a bug.
         raise ValueError('workflow should have failed due to a missing '
             'temporary file')
Ejemplo n.º 15
0
 def test_partial_wf3_fail(self):
     self.pipeline.enable_all_pipeline_steps()
     self.pipeline.pipeline_steps.step1 = False
     try:
         wf = pipeline_workflow.workflow_from_pipeline(
             self.pipeline, study_config=self.study_config)
     except ValueError:
         pass # OK
     else:
         # no exception, this is a bug.
         raise ValueError('workflow should have failed due to a missing '
             'temporary file')
Ejemplo n.º 16
0
 def test_custom_nodes_workflow(self):
     sc = StudyConfig()
     pipeline = sc.get_process_instance(Pipeline1)
     pipeline.main_input = os.path.join(self.temp_dir, 'file')
     pipeline.output_directory = os.path.join(self.temp_dir, 'out_dir')
     wf = pipeline_workflow.workflow_from_pipeline(pipeline,
                                                   create_directories=False)
     self.assertEqual(len(wf.jobs), 7)
     self.assertEqual(len(wf.dependencies), 6)
     self.assertEqual(
         sorted([[x.name for x in d] for d in wf.dependencies]),
         sorted([['LOO', 'train1'], ['train1', 'train2'],
                 ['train1', 'intermediate_output'], ['train2', 'test'],
                 ['train2', 'output_file'], ['test', 'test_output']]))
Ejemplo n.º 17
0
    def test_iterative_pipeline_workflow_run(self):
        import soma_workflow.configuration as swconfig
        import soma_workflow.constants as swconstants
        import soma_workflow.client as swclient

        self.small_pipeline.output_image = [
            os.path.join(self.directory, 'toto_out'),
            os.path.join(self.directory, 'tutu_out')]
        self.small_pipeline.other_output = [1., 2.]
        workflow = pipeline_workflow.workflow_from_pipeline(
            self.small_pipeline)

        # use a temporary sqlite database in soma-workflow to avoid concurrent
        # access problems
        config = swconfig.Configuration.load_from_file()
        tmpdb = tempfile.mkstemp('.db', prefix='swf_')
        os.close(tmpdb[0])
        os.unlink(tmpdb[1])
        config._database_file = tmpdb[1]
        controller = swclient.WorkflowController(config=config)
        try:
            wf_id = controller.submit_workflow(workflow)
            print('* running pipeline...')
            swclient.Helper.wait_workflow(wf_id, controller)
            print('* finished.')
            workflow_status = controller.workflow_status(wf_id)
            elements_status = controller.workflow_elements_status(wf_id)
            failed_jobs = [element for element in elements_status[0] \
                if element[1] != swconstants.DONE \
                    or element[3][0] != swconstants.FINISHED_REGULARLY]
            if not debug:
                controller.delete_workflow(wf_id)
            self.assertTrue(workflow_status == swconstants.WORKFLOW_DONE,
                'Workflow did not finish regularly: %s' % workflow_status)
            self.assertTrue(len(failed_jobs) == 0, 'Jobs failed: %s'
                            % failed_jobs)
            # check output files contents
            for ifname, fname in zip(self.small_pipeline.files_to_create,
                                    self.small_pipeline.output_image):
                content = open(fname).read()
                self.assertEqual(content, "file: %s\n" % ifname)
        finally:
            # remove the temporary database
            del controller
            del config
            if not debug:
                os.unlink(tmpdb[1])
                if os.path.exists(tmpdb[1] + '-journal'):
                    os.unlink(tmpdb[1] + '-journal')
Ejemplo n.º 18
0
 def test_iterative_pipeline_workflow(self):
     self.small_pipeline.output_image = [
         os.path.join(self.directory, 'toto_out'),
         os.path.join(self.directory, 'tutu_out')]
     self.small_pipeline.other_output = [1., 2.]
     workflow = pipeline_workflow.workflow_from_pipeline(
         self.small_pipeline)
     #expect 2 + 2 (iter) + 2 (barriers) jobs
     self.assertEqual(len(workflow.jobs), 6)
     # expect 6 dependencies:
     # init -> iterative input barrier
     # iterative output barrier -> end
     # iterative input barrier -> iterative jobs (2)
     # iterative jobs -> iterative output barrier (2)
     self.assertEqual(len(workflow.dependencies), 6)
Ejemplo n.º 19
0
 def test_iterative_pipeline_workflow(self):
     self.small_pipeline.output_image = [
         os.path.join(self.directory, 'toto_out'),
         os.path.join(self.directory, 'tutu_out')]
     self.small_pipeline.other_output = [1., 2.]
     workflow = pipeline_workflow.workflow_from_pipeline(
         self.small_pipeline)
     #expect 2 + 2 (iter) + 2 (barriers) jobs
     self.assertEqual(len(workflow.jobs), 6)
     # expect 6 dependencies:
     # init -> iterative input barrier
     # iterative output barrier -> end
     # iterative input barrier -> iterative jobs (2)
     # iterative jobs -> iterative output barrier (2)
     self.assertEqual(len(workflow.dependencies), 6)
Ejemplo n.º 20
0
    def test_iter_workflow(self):
        engine = self.study_config.engine
        pipeline = engine.get_process_instance(DummyPipelineIter)
        self.assertTrue(pipeline is not None)
        niter = 2
        pipeline.input = [osp.join(self.tmpdir, 'file_in%d' % i)
                          for i in range(niter)]
        pipeline.output1 = osp.join(self.tmpdir, 'file_out1')
        pipeline.output2 = osp.join(self.tmpdir, 'file_out2')
        pipeline.output3 = osp.join(self.tmpdir, 'file_out3')

        wf = pipeline_workflow.workflow_from_pipeline(
            pipeline, study_config=self.study_config,
            create_directories=False)
        njobs = 4*niter + 3 + 2  # 3 after + map / reduce
        self.assertEqual(len(wf.jobs), njobs)
        #for job in wf.jobs:
            #print(job.name)
            #print(job.command)
            #print('ref inputs:', job.referenced_input_files)
            #print('ref outputs:', job.referenced_output_files)
            #print()

        #import soma_workflow.client as swc
        #swc.Helper.serialize('/tmp/workflow.wf', wf)

        for i, filein in enumerate(pipeline.input):
            with open(filein, 'w') as f:
                print('MAIN INPUT %d' % i, file=f)

        exec_id = engine.start(pipeline, workflow=wf)
        self.exec_ids.append(exec_id)

        print('execution started')
        status = engine.wait(exec_id, pipeline=pipeline)
        print('finished:', status)

        self.assertEqual(status, 'workflow_done')
        self.assertTrue(osp.exists(pipeline.output1))
        self.assertTrue(osp.exists(pipeline.output2))
        self.assertTrue(osp.exists(pipeline.output3))
        lens = [16, 20, 20]
        for o in range(3):
            #print('** output%d: **' % (o+1))
            with open(getattr(pipeline, 'output%d' % (o+1))) as f:
                text = f.read()
                #print(text)
                self.assertEqual(len(text.split('\n')), lens[o])
Ejemplo n.º 21
0
    def test_iterative_pipeline_workflow_run(self):
        import soma_workflow.configuration as swconfig
        import soma_workflow.constants as swconstants
        import soma_workflow.client as swclient

        self.small_pipeline.output_image = [
            os.path.join(self.directory, 'toto_out'),
            os.path.join(self.directory, 'tutu_out')]
        self.small_pipeline.other_output = [1., 2.]
        workflow = pipeline_workflow.workflow_from_pipeline(
            self.small_pipeline)

        # use a temporary sqlite database in soma-workflow to avoid concurrent
        # access problems
        config = swconfig.Configuration.load_from_file()
        tmpdb = tempfile.mkstemp('.db', prefix='swf_')
        os.close(tmpdb[0])
        os.unlink(tmpdb[1])
        config._database_file = tmpdb[1]
        controller = swclient.WorkflowController(config=config)
        wf_id = controller.submit_workflow(workflow)
        print('* running pipeline...')
        swclient.Helper.wait_workflow(wf_id, controller)
        print('* finished.')
        workflow_status = controller.workflow_status(wf_id)
        elements_status = controller.workflow_elements_status(wf_id)
        failed_jobs = [element for element in elements_status[0] \
            if element[1] != swconstants.DONE \
                or element[3][0] != swconstants.FINISHED_REGULARLY]
        if not debug:
            controller.delete_workflow(wf_id)
        # remove the temporary database
        del controller
        del config
        if not debug:
            os.unlink(tmpdb[1])
        self.assertTrue(workflow_status == swconstants.WORKFLOW_DONE,
            'Workflow did not finish regularly: %s' % workflow_status)
        self.assertTrue(len(failed_jobs) == 0, 'Jobs failed: %s'
                        % failed_jobs)
        # check output files contents
        for ifname, fname in zip(self.small_pipeline.files_to_create,
                                 self.small_pipeline.output_image):
            content = open(fname).read()
            self.assertEqual(content, "file: %s\n" % ifname)
Ejemplo n.º 22
0
    def _create_workflow(self, subject_ids):
        study_config = self._study
        workflow = Workflow(
            name='Morphologist UI - %s' % study_config.study_name,
            jobs=[])
        workflow.root_group = []
        initial_vol_format = study_config.volumes_format

        priority = (len(subject_ids) - 1) * 100
        for subject_id in subject_ids:
            analysis = self._study.analyses[subject_id]
            subject = self._study.subjects[subject_id]

            analysis.set_parameters(subject)
            #analysis.propagate_parameters()
            pipeline = analysis.pipeline
            pipeline.enable_all_pipeline_steps()
            # force highest priority normalization method
            # FIXME: specific knowledge of Morphologist should not be used here.
            pipeline.Normalization_select_Normalization_pipeline \
                  = 'NormalizeSPM'
            pipeline_tools.disable_runtime_steps_with_existing_outputs(
                pipeline)

            missing = pipeline_tools.nodes_with_missing_inputs(pipeline)
            if missing:
                self.check_missing_models(pipeline, missing)
                print('MISSING INPUTS IN NODES:', missing)
                raise MissingInputFileError("subject: %s" % subject_id)

            wf = pipeline_workflow.workflow_from_pipeline(
                pipeline, study_config=study_config,
                jobs_priority=priority)
            njobs = len([j for j in wf.jobs if isinstance(j, Job)])
            if njobs != 0:
                priority -= 100
                workflow.jobs += wf.jobs
                workflow.dependencies += wf.dependencies
                group = Group(wf.root_group,
                            name='Morphologist %s' % str(subject))
                group.user_storage = subject_id
                workflow.root_group.append(group) # += wf.root_group
                workflow.groups += [group] + wf.groups

        return workflow
Ejemplo n.º 23
0
 def test_mapreduce(self):
     sc = StudyConfig()
     pipeline = sc.get_process_instance(PipelineMapReduce)
     pipeline.main_inputs = [
         os.path.join(self.temp_dir, 'file%d' % i) for i in range(4)
     ]
     pipeline.subjects = ['Robert', 'Gustave']
     pipeline.output_directory = os.path.join(self.temp_dir, 'out_dir')
     self.assertEqual(pipeline.nodes['cat'].process.files, [
         os.path.join(pipeline.output_directory,
                      '%s_test_output' % pipeline.subjects[0]),
         os.path.join(pipeline.output_directory,
                      '%s_test_output' % pipeline.subjects[1])
     ])
     wf = pipeline_workflow.workflow_from_pipeline(pipeline,
                                                   create_directories=False)
     self.assertEqual(len(wf.jobs), 19)
     #print(sorted([(d[0].name, d[1].name) for d in wf.dependencies]))
     self.assertEqual(len(wf.dependencies), 28)
Ejemplo n.º 24
0
    def test_iterative_pipeline_workflow_run(self):
        import soma_workflow.constants as swconstants
        import soma_workflow.client as swclient

        self.small_pipeline.output_image = [
            os.path.join(self.directory, 'toto_out'),
            os.path.join(self.directory, 'tutu_out')
        ]
        self.small_pipeline.other_output = [1., 2.]
        workflow = pipeline_workflow.workflow_from_pipeline(
            self.small_pipeline)
        swclient.Helper.serialize(
            os.path.join(self.directory, 'smallpipeline.workflow'), workflow)

        self.study_config.use_soma_workflow = True

        #controller = swclient.WorkflowController(config=config)
        #try:

        #wf_id = controller.submit_workflow(workflow)
        print('* running pipeline...')
        #swclient.Helper.wait_workflow(wf_id, controller)
        self.study_config.run(self.small_pipeline)
        print('* finished.')
        #workflow_status = controller.workflow_status(wf_id)
        #elements_status = controller.workflow_elements_status(wf_id)
        #failed_jobs = [element for element in elements_status[0] \
        #if element[1] != swconstants.DONE \
        #or element[3][0] != swconstants.FINISHED_REGULARLY]
        #if not debug:
        #controller.delete_workflow(wf_id)
        #self.assertTrue(workflow_status == swconstants.WORKFLOW_DONE,
        #'Workflow did not finish regularly: %s' % workflow_status)
        #self.assertTrue(len(failed_jobs) == 0, 'Jobs failed: %s'
        #% failed_jobs)
        # check output files contents
        for ifname, fname in zip(self.small_pipeline.files_to_create,
                                 self.small_pipeline.output_image):
            with open(fname) as f:
                content = f.read()
            self.assertEqual(content, "file: %s\n" % ifname)
Ejemplo n.º 25
0
 def test_iterative_big_pipeline_workflow(self):
     self.big_pipeline.files_to_create = [["toto", "tutu"],
                                          ["tata", "titi", "tete"]]
     self.big_pipeline.dynamic_parameter = [[1, 2], [3, 4, 5]]
     self.big_pipeline.other_input = 5
     self.big_pipeline.output_image = [
         [
             os.path.join(self.directory, 'toto_out'),
             os.path.join(self.directory, 'tutu_out')
         ],
         [
             os.path.join(self.directory, 'tata_out'),
             os.path.join(self.directory, 'titi_out'),
             os.path.join(self.directory, 'tete_out')
         ]
     ]
     self.big_pipeline.other_output = [[1.1, 2.1], [3.1, 4.1, 5.1]]
     workflow = pipeline_workflow.workflow_from_pipeline(self.big_pipeline)
     # expect 6 + 7 + 2 jobs
     self.assertEqual(len(workflow.jobs), 15)
     subjects = set()
     for job in workflow.jobs:
         if not job.name.startswith('DummyProcess') or '_map' in job.name \
                 or '_reduce' in job.name:
             continue
         param_dict = job.param_dict
         self.assertEqual(param_dict["other_input"], 5)
         subject = param_dict['input_image']
         subjects.add(subject)
         if sys.version_info >= (2, 7):
             self.assertIn(subject,
                           ["toto", "tutu", "tata", "titi", "tete"])
         else:
             self.assertTrue(
                 subject in ["toto", "tutu", "tata", "titi", "tete"])
     self.assertEqual(subjects,
                      set(["toto", "tutu", "tata", "titi", "tete"]))
Ejemplo n.º 26
0
 def _test_loo_pipeline(self, pipeline2):
     pipeline2.main_inputs = ['/dir/file%d' % i for i in range(4)]
     pipeline2.subjects = ['subject%d' % i for i in range(4)]
     pipeline2.output_directory = '/dir/out_dir'
     wf = pipeline_workflow.workflow_from_pipeline(pipeline2,
                                                   create_directories=False)
     self.assertEqual(len(wf.jobs), 12)
     self.assertEqual(len(wf.dependencies), 8)
     deps = sorted([['train1', 'train2'], ['train2', 'test']] * 4)
     self.assertEqual(
         sorted([[x.name for x in d] for d in wf.dependencies]), deps)
     train1_jobs = [job for job in wf.jobs if job.name == 'train1']
     self.assertEqual(
         sorted([
             job.command[job.command.index('out1') + 1]
             for job in train1_jobs
         ]), [
             os.path.join(pipeline2.output_directory,
                          'subject%d_interm' % i) for i in range(4)
         ])
     train2_jobs = [job for job in wf.jobs if job.name == 'train2']
     self.assertEqual(
         sorted([
             job.command[job.command.index('out1') + 1]
             for job in train2_jobs
         ]), [
             os.path.join(pipeline2.output_directory, 'subject%d' % i)
             for i in range(4)
         ])
     test_jobs = [job for job in wf.jobs if job.name == 'test']
     self.assertEqual(
         sorted([
             job.command[job.command.index('out1') + 1] for job in test_jobs
         ]), [
             os.path.join(pipeline2.output_directory,
                          'subject%d_test_output' % i) for i in range(4)
         ])
Ejemplo n.º 27
0
def start(engine,
          process,
          workflow=None,
          history=True,
          get_pipeline=False,
          **kwargs):
    '''
    Asynchronously start the execution of a process or pipeline in the
    connected computing environment. Returns an identifier of
    the process execution and can be used to get the status of the
    execution or wait for its termination.

    TODO:
    if history is True, an entry of the process execution is stored in
    the database. The content of this entry is to be defined but it will
    contain the process parameters (to restart the process) and will be
    updated on process termination (for instance to store execution time
    if possible).

    Parameters
    ----------
    engine: CapsulEngine
    process: Process or Pipeline instance
    workflow: Workflow instance (optional - if already defined before call)
    history: bool (optional)
        TODO: not implemented yet.
    get_pipeline: bool (optional)
        if True, start() will return a tuple (execution_id, pipeline). The
        pipeline is normally the input pipeline (process) if it is actually
        a pipeline. But if the input process is a "single process", it will
        be inserted into a small pipeline for execution. This pipeline will
        be the one actually run, and may be passed to :meth:`wait` to set
        output parameters.

    Returns
    -------
    execution_id: int
        execution identifier (actually a soma-workflow id)
    pipeline: Pipeline instance (optional)
        only returned if get_pipeline is True.
    '''

    # set parameters values
    for k, v in six.iteritems(kwargs):
        setattr(process, k, v)

    missing = process.get_missing_mandatory_parameters()
    if len(missing) != 0:
        ptype = 'process'
        if isinstance(process, Pipeline):
            ptype = 'pipeline'
        raise ValueError('In %s %s: missing mandatory parameters: %s' %
                         (ptype, process.name, ', '.join(missing)))

    # Use soma workflow to execute the pipeline or process in parallel
    # on the local machine

    # Create soma workflow pipeline
    from capsul.pipeline.pipeline_workflow import workflow_from_pipeline
    import soma_workflow.client as swclient

    if workflow is None:
        workflow = workflow_from_pipeline(process)

    swm = engine.study_config.modules['SomaWorkflowConfig']
    swm.connect_resource(engine.connected_to())
    controller = swm.get_workflow_controller()
    resource_id = swm.get_resource_id()
    queue = None
    if hasattr(engine.study_config.somaworkflow_computing_resources_config,
               resource_id):
        res_conf = getattr(
            engine.study_config.somaworkflow_computing_resources_config,
            resource_id)
        queue = res_conf.queue
        if queue is Undefined:
            queue = None
    workflow_name = process.name
    wf_id = controller.submit_workflow(workflow=workflow,
                                       name=workflow_name,
                                       queue=queue)
    swclient.Helper.transfer_input_files(wf_id, controller)

    if get_pipeline:
        return wf_id, workflow.pipeline()
    # else forget the pipeline
    return wf_id
Ejemplo n.º 28
0
    def run(self, process_or_pipeline, output_directory= None,
            executer_qc_nodes=True, verbose=0, **kwargs):
        """Method to execute a process or a pipline in a study configuration
         environment.

         Only pipeline nodes can be filtered on the 'executer_qc_nodes'
         attribute.

         A valid output directory is exepcted to execute the process or the
         pepeline without soma-workflow.

        Parameters
        ----------
        process_or_pipeline: Process or Pipeline instance (mandatory)
            the process or pipeline we want to execute
        output_directory: Directory name (optional)
            the output directory to use for process execution. This replaces
            self.output_directory but left it unchanged.
        execute_qc_nodes: bool (optional, default False)
            if True execute process nodes that are taged as qualtity control
            process nodes.
        verbose: int
            if different from zero, print console messages.
        """
        
        if self.create_output_directories:
            for name, trait in process_or_pipeline.user_traits().items():
                if trait.output and isinstance(trait.handler, (File, Directory)):
                    value = getattr(process_or_pipeline, name)
                    if value is not Undefined and value:
                        base = os.path.dirname(value)
                        if not os.path.exists(base):
                            os.makedirs(base)
                            
        # Use soma worflow to execute the pipeline or porcess in parallel
        # on the local machine
        if self.get_trait_value("use_soma_workflow"):

            # Create soma workflow pipeline
            workflow = workflow_from_pipeline(process_or_pipeline)
            controller, wf_id = local_workflow_run(process_or_pipeline.id,
                                                   workflow)
            workflow_status = controller.workflow_status(wf_id)
            elements_status = controller.workflow_elements_status(wf_id)
            # FIXME: it would be better if study_config does not require
            # soma_workflow modules.
            from soma_workflow import constants as swconstants
            self.failed_jobs = [
                element for element in elements_status[0]
                if element[1] != swconstants.DONE
                or element[3][0] != swconstants.FINISHED_REGULARLY]
            # if execution was OK, delete the workflow
            if workflow_status == swconstants.WORKFLOW_DONE \
                    and len(self.failed_jobs) == 0:
                controller.delete_workflow(wf_id)
            else:
                # something went wrong: return the controller and workflow id
                # so that one can handle them if needed
                # WARNING: return values not very consistent. We should find
                # a better way to return the status.
                return controller, wf_id

        # Use the local machine to execute the pipeline or process
        else:
            if output_directory is None or output_directory is Undefined:
                output_directory = self.output_directory
            # Not all processes need an output_directory defined on
            # StudyConfig
            if output_directory is not None and output_directory is not Undefined:
                # Check the output directory is valid
                if not isinstance(output_directory, basestring):
                    raise ValueError(
                        "'{0}' is not a valid directory. A valid output "
                        "directory is expected to run the process or "
                        "pipeline.".format(output_directory))
                try:
                    if not os.path.isdir(output_directory):
                        os.makedirs(output_directory)
                except:
                    raise ValueError(
                        "Can't create folder '{0}', please investigate.".format(
                            output_directory))

            # Temporary files can be generated for pipelines
            temporary_files = []
            result = None
            try:
                # Generate ordered execution list
                execution_list = []
                if isinstance(process_or_pipeline, Pipeline):
                    execution_list = \
                        process_or_pipeline.workflow_ordered_nodes()
                    # Filter process nodes if necessary
                    if not executer_qc_nodes:
                        execution_list = [node for node in execution_list
                                        if node.node_type != "view_node"]
                    for node in execution_list:
                        # check temporary outputs and allocate files
                        process_or_pipeline._check_temporary_files_for_node(
                            node, temporary_files)
                elif isinstance(process_or_pipeline, Process):
                    execution_list.append(process_or_pipeline)
                else:
                    raise Exception(
                        "Unknown instance type. Got {0}and expect Process or "
                        "Pipeline instances".format(
                            process_or_pipeline.__module__.name__))

                # Execute each process node element
                for process_node in execution_list:
                    # Execute the process instance contained in the node
                    if isinstance(process_node, Node):
                        result = self._run(process_node.process, 
                                           output_directory, 
                                           verbose, **kwargs)

                    # Execute the process instance
                    else:
                        result = self._run(process_node, output_directory,
                                           verbose, **kwargs)
            finally:
                # Destroy temporary files
                if temporary_files:
                    # If temporary files have been created, we are sure that
                    # process_or_pipeline is a pipeline with a method
                    # _free_temporary_files.
                    process_or_pipeline._free_temporary_files(temporary_files)
            return result
Ejemplo n.º 29
0
    def run(self, process_or_pipeline, executer_qc_nodes=True, verbose=1,
            **kwargs):
        """ Method to execute a process or a pipline in a study configuration
         environment.

         Only pipeline nodes can be filtered on the 'executer_qc_nodes'
         attribute.

         A valid output directory is exepcted to execute the process or the
         pepeline without soma-workflow.

        Parameters
        ----------
        process_or_pipeline: Process or Pipeline instance (mandatory)
            the process or pipeline we want to execute
        execute_qc_nodes: bool (optional, default False)
            if True execute process nodes that are taged as qualtity control
            process nodes.
        verbose: int
            if different from zero, print console messages.
        """
        # Use soma worflow to execute the pipeline or porcess in parallel
        # on the local machine
        if self.get_trait_value("use_soma_workflow"):

            # Create soma workflow pipeline
            workflow = workflow_from_pipeline(process_or_pipeline)
            controller, wf_id = local_workflow_run(process_or_pipeline.id,
                                                   workflow)
            workflow_status = controller.workflow_status(wf_id)
            elements_status = controller.workflow_elements_status(wf_id)
            # FIXME: it would be better if study_config does not require
            # soma_workflow modules.
            from soma_workflow import constants as swconstants
            self.failed_jobs = [
                element for element in elements_status[0]
                if element[1] != swconstants.DONE
                or element[3][0] != swconstants.FINISHED_REGULARLY]
            # if execution was OK, delete the workflow
            if workflow_status == swconstants.WORKFLOW_DONE \
                    and len(self.failed_jobs) == 0:
                controller.delete_workflow(wf_id)
            else:
                # something went wrong: return the controller and workflow id
                # so that one can handle them if needed
                # WARNING: return values not very consistent. We should find
                # a better way to return the status.
                return controller, wf_id

        # Use the local machine to execute the pipeline or process
        else:

            # Check the output directory is valid
            if (self.output_directory is Undefined or
                    not isinstance(self.output_directory, basestring)):
                raise ValueError(
                    "'{0}' is not a valid directory. A valid output "
                    "directory is expected to run the process or "
                    "pipeline.".format(self.output_directory))
            try:
                if not os.path.isdir(self.output_directory):
                    os.makedirs(self.output_directory)
            except:
                raise ValueError(
                    "Can't create folder '{0}', please investigate.".format(
                        self.output_directory))

            # Generate ordered execution list
            execution_list = []
            if isinstance(process_or_pipeline, Pipeline):
                execution_list = process_or_pipeline.workflow_ordered_nodes()
                # Filter process nodes if necessary
                if not executer_qc_nodes:
                    execution_list = [node for node in execution_list
                                      if node.node_type != "view_node"]
            elif isinstance(process_or_pipeline, Process):
                execution_list.append(process_or_pipeline)
            else:
                raise Exception(
                    "Unknown instance type. Got {0}and expect Process or "
                    "Pipeline instances".format(
                        process_or_pipeline.__module__.name__))

            # Execute each process node element
            for process_node in execution_list:
                # Execute the process instance contained in the node
                if isinstance(process_node, Node):
                    self._run(process_node.process, verbose, **kwargs)

                # Execute the process instance
                else:
                    self._run(process_node, verbose, **kwargs)
Ejemplo n.º 30
0
def morphologist_all(t1file, sid, outdir, study="morphologist", waittime=10,
                     somaworkflow=False,
                     spmexec="/i2bm/local/spm8-standalone/run_spm8.sh",
                     spmdir="/i2bm/local/spm8-standalone"):
    """ Performs all the Morphologist steps.

    Steps:

    1- Ensure image orientation and reorient it if needed (Prepare Subject for
       Anatomical Pipeline).
    2- Computation of a brain mask (Brain Mask Segmentation).
    3- Computation of a mask for each hemisphere (Split Brain Mask).
    4- A grey/white classification of each hemisphere to perform "Voxel Based
       Morphometry" (Grey White Classification) and spherical triangulation of
       cortical hemispheres (Grey White Surface).
    5- Spherical triangulation of the external interface of the cortex of one
       or two hemispheres (Get Spherical Hemi Surface).
    6- Computation of a graph representing the cortical fold topography
       (Cortical Fold Graph).
    7- Automatic identification of the cortical sulci (Automatic Sulci
       Recognition), located in the "sulci" toolbox.

    The execution is performed with soma_workflow that has to be installed in
    the bv_env environment.

    To check the worklow submission, use the 'soma_workflow_gui' command.

    If the input 't1file' has no the expected extension, an Exception will
    be raised.
    If the $outdir/$study/$sid has already been created, an Exception will
    be raised.

    Parameters
    ----------
    t1file: str (mandatory)
        the path to a ".nii.gz" anatomical T1 weighted file.
    sid: str (mandatory)
        a subject identifier.
    outdir: str (mandatory)
        the morphologist output files will be written in $outdir/$study/$sid.
    study: str (mandatory)
        the name of the study.
    waittime: float (optional, default 10)
        a delay (in seconds) used to check the worflow status.
    somaworkflow: bool (optional, default False)
        if True use somaworkflow for the execution.
    spmexec: str (optional)
        the path to the standalone SPM execution file.
    spmdir: str (optional)
        the standalone SPM directory.

    Returns
    -------
    wffile: str
        a file containing the submitted workflow.
    wfid: int
        the submitted workflow identifier.
    wfstatus: str
        the submited worflow status afer 'waittime' seconds.
    """
    # Check roughly the input file extension
    if not t1file.endswith(".nii.gz"):
        raise Exception("'{0}' is not a COMPRESSED NIFTI file.".format(t1file))

    # Create a configuration for the morphologist study
    study_config = StudyConfig(
        modules=StudyConfig.default_modules + ["FomConfig", "BrainVISAConfig"])
    study_dict = {
        "name": "morphologist_fom",
        "input_directory": outdir,
        "output_directory": outdir,
        "input_fom": "morphologist-auto-nonoverlap-1.0",
        "output_fom": "morphologist-auto-nonoverlap-1.0",
        "shared_fom": "shared-brainvisa-1.0",
        "spm_directory": spmdir,
        "use_soma_workflow": True,
        "use_fom": True,
        "spm_standalone": True,
        "use_matlab": False,
        "volumes_format": "NIFTI gz",
        "meshes_format": "GIFTI",
        "use_spm": True,
        "spm_exec": spmexec,
        "study_config.somaworkflow_computing_resource": "localhost",
        "somaworkflow_computing_resources_config": {
            "localhost": {
            }
        }
    }
    study_config.set_study_configuration(study_dict)

    # Create the morphologist pipeline
    pipeline = get_process_instance(
        "morphologist.capsul.morphologist.Morphologist")
    morphologist_pipeline = process_with_fom.ProcessWithFom(
        pipeline, study_config)
    morphologist_pipeline.attributes = dict(
        (trait_name, getattr(morphologist_pipeline, trait_name))
        for trait_name in morphologist_pipeline.user_traits())
    morphologist_pipeline.attributes["center"] = "morphologist"
    morphologist_pipeline.attributes["subject"] = sid
    morphologist_pipeline.create_completion()

    # Create morphologist expected tree
    # ToDo: use ImportT1 from axon
    subjectdir = os.path.join(outdir, study, sid)
    if os.path.isdir(subjectdir):
        raise Exception("Folder '{0}' already created.".format(subjectdir))
    os.makedirs(os.path.join(
        subjectdir, "t1mri", "default_acquisition",
        "default_analysis", "folds", "3.1", "default_session_auto"))
    os.makedirs(os.path.join(
        subjectdir, "t1mri", "default_acquisition",
        "registration"))
    os.makedirs(os.path.join(
        subjectdir, "t1mri", "default_acquisition",
        "segmentation", "mesh"))
    os.makedirs(os.path.join(
        subjectdir, "t1mri", "default_acquisition",
        "tmp"))

    # Copy T1 file in the morphologist expected location
    destfile = os.path.join(subjectdir, "t1mri",
                            "default_acquisition", sid + ".nii.gz")
    shutil.copy(t1file, destfile)

    # Create source_referential morphologist expected file
    source_referential = {"uuid": str(soma.uuid.Uuid())}
    referential_file = os.path.join(
        subjectdir, "t1mri", "default_acquisition", "registration",
        "RawT1-{0}_default_acquisition.referential".format(sid))
    attributes = "attributes = {0}".format(json.dumps(source_referential))
    with open(referential_file, "w") as openfile:
        openfile.write(attributes)

    # Create a worflow from the morphologist pipeline
    workflow = Workflow(name="{0} {1}".format(study, sid),
                        jobs=[])
    workflow.root_group = []

    # Create the workflow
    wf = pipeline_workflow.workflow_from_pipeline(
        morphologist_pipeline.process, study_config=study_config)
    workflow.add_workflow(wf, as_group="{0}_{1}".format(study, sid))
    wffile = os.path.join(subjectdir, "{0}.wf".format(study))
    pickle.dump(workflow, open(wffile, "w"))

    # Execute the workflow with somaworkflow
    if somaworkflow:
        controller = WorkflowController()
        wfid = controller.submit_workflow(
            workflow=workflow, name="{0}_{1}".format(study, sid))

        # Return the worflow status after execution
        while True:
            time.sleep(waittime)
            wfstatus = controller.workflow_status(wfid)
            if wfstatus not in [
                    "worklflow_not_started", "workflow_in_progress"]:
                break

    # Execute the workflow with subprocess
    else:
        # -> construct the ordered list of commands to be executed
        workflow_repr = workflow.to_dict()
        graph = Graph()
        for job in workflow_repr["jobs"]:
            graph.add_node(GraphNode(job, None))
        for link in workflow_repr["dependencies"]:
            graph.add_link(link[0], link[1])
        ordered_nodes = [str(node[0]) for node in graph.topological_sort()]
        commands = []
        jobs = workflow_repr["serialized_jobs"]
        temporaries = workflow_repr["serialized_temporary_paths"]
        barriers = workflow_repr["serialized_barriers"]
        for index in ordered_nodes:
            if index in jobs:
                commands.append(jobs[index]["command"])
            elif index in barriers:
                continue
            else:
                raise Exception("Unexpected node in workflow.")

        # -> Go through all commands
        tmpmap = {}
        for cmd in commands:
            # -> deal with temporary files
            for index, item in enumerate(cmd):
                if not isinstance(item, basestring):
                    if str(item) not in tmpmap:
                        if str(item) in temporaries:
                            struct = temporaries[str(item)]
                            name = cmd[2].split(";")[1].split()[-1]
                            tmppath = os.path.join(
                                subjectdir, "t1mri", "default_acquisition",
                                "tmp", str(item) + name + struct["suffix"])
                            tmpmap[str(item)] = tmppath
                        else:
                            raise MorphologistError(
                                "Can't complete command '{0}'.".format(
                                    cmd))
                    cmd[index] = tmpmap[str(item)]

            # -> execute the command
            worker = MorphologistWrapper(cmd)
            worker()
            if worker.exitcode != 0:
                raise MorphologistRuntimeError(
                    " ".join(worker.cmd), worker.stderr)

        wfstatus = "Done"
        wfid = "subprocess"

    return wffile, wfid, wfstatus
Ejemplo n.º 31
0
def run_process_with_distribution(study_config,
                                  process,
                                  use_soma_workflow=False,
                                  resource_id=None,
                                  password=None,
                                  config=None,
                                  rsa_key_pass=None,
                                  queue=None,
                                  input_file_processing=None,
                                  output_file_processing=None,
                                  keep_workflow=False,
                                  keep_failed_workflow=False,
                                  write_workflow_only=None):
    ''' Run the given process, either sequentially or distributed through
    Soma-Workflow.

    Parameters
    ----------
    study_config: StudyConfig instance
    process: Process instance
        the process to execute (or pipeline, or iteration...)
    use_soma_workflow: bool or None (default=None)
        if False, run sequentially, otherwise use Soma-Workflow. Its
        configuration has to be setup and valid for non-local execution, and
        additional file transfer options may be used.
    resource_id: string (default=None)
        soma-workflow resource ID, defaults to localhost
    password: string
        password to access the remote computing resource. Do not specify it if
        using a ssh key.
    config: dict (optional)
        Soma-Workflow config: Not used for now...
    rsa_key_pass: string
        RSA key password, for ssh key access
    queue: string
        Queue to use on the computing resource. If not specified, use the
        default queue.
    input_file_processing: brainvisa.workflow.ProcessToSomaWorkflow processing code
        Input files processing: local_path (NO_FILE_PROCESSING),
        transfer (FILE_TRANSFER), translate (SHARED_RESOURCE_PATH),
        or translate_shared (BV_DB_SHARED_PATH).
    output_file_processing: same as for input_file_processing
        Output files processing: local_path (NO_FILE_PROCESSING),
        transfer (FILE_TRANSFER), or translate (SHARED_RESOURCE_PATH).
        The default is local_path.
    keep_workflow: bool
        keep the workflow in the computing resource database after execution.
        By default it is removed.
    keep_failed_workflow: bool
        keep the workflow in the computing resource database after execution,
        if it has failed. By default it is removed.
    write_workflow_only: str
        if specified, this is an output filename where the workflow file will
        be written. The workflow will not be actually run, because int his
        situation the user probably wants to use the workflow on his own.
    '''
    if write_workflow_only:
        use_soma_workflow = True

    if use_soma_workflow is not None:
        study_config.use_soma_workflow = use_soma_workflow

    if study_config.use_soma_workflow:

        if write_workflow_only:
            # Create soma workflow pipeline
            from capsul.pipeline.pipeline_workflow \
                import workflow_from_pipeline
            import soma_workflow.client as swclient

            workflow = workflow_from_pipeline(process)
            swclient.Helper.serialize(write_workflow_only)

            return

        swm = study_config.modules['SomaWorkflowConfig']
        resource_id = swm.get_resource_id(resource_id, set_it=True)
        if password is not None or rsa_key_pass is not None:
            swm.set_computing_resource_password(resource_id, password,
                                                rsa_key_pass)
        if queue is not None:
            if not hasattr(
                    study_config.somaworkflow_computing_resources_config,
                    resource_id):
                setattr(study_config.somaworkflow_computing_resources_config,
                        resource_id, {})
            getattr(study_config.somaworkflow_computing_resources_config,
                    resource_id).queue = queue

    res = study_config.run(process)
    return res
Ejemplo n.º 32
0
        app = QtGui.QApplication.instance()
        if not app:
            app = QtGui.QApplication(sys.argv)
        #pipeline = Pipeline1()
        #pipeline.main_inputs = ['/dir/file%d' % i for i in range(4)]
        #pipeline.left_out = pipeline.main_inputs[2]
        #pipeline.subject = 'subject2'
        #pipeline.output_directory = '/dir/out_dir'
        #view1 = PipelineDevelopperView(pipeline, allow_open_controller=True,
        #show_sub_pipelines=True,
        #enable_edition=True)
        #view1.show()

        pipeline2 = PipelineLOO()
        pipeline2.main_inputs = ['/dir/file%d' % i for i in range(4)]
        pipeline2.left_out = pipeline2.main_inputs[2]
        pipeline2.subjects = ['subject%d' % i for i in range(4)]
        pipeline2.output_directory = '/dir/out_dir'
        wf = pipeline_workflow.workflow_from_pipeline(pipeline2,
                                                      create_directories=False)
        view2 = PipelineDevelopperView(pipeline2,
                                       allow_open_controller=True,
                                       show_sub_pipelines=True,
                                       enable_edition=True)
        view2.show()

        app.exec_()
        #del view1
        del view2
Ejemplo n.º 33
0
    def run(self,
            process_or_pipeline,
            output_directory=None,
            execute_qc_nodes=True,
            verbose=0,
            **kwargs):
        """Method to execute a process or a pipline in a study configuration
         environment.

         Depending on the studies_config settings, it may be a sequential run,
         or a parallel run, which can involve remote execution (through soma-
         workflow).

         Only pipeline nodes can be filtered on the 'execute_qc_nodes'
         attribute.

         A valid output directory is exepcted to execute the process or the
         pepeline without soma-workflow.

        Parameters
        ----------
        process_or_pipeline: Process or Pipeline instance (mandatory)
            the process or pipeline we want to execute
        output_directory: Directory name (optional)
            the output directory to use for process execution. This replaces
            self.output_directory but left it unchanged.
        execute_qc_nodes: bool (optional, default False)
            if True execute process nodes that are taged as qualtity control
            process nodes.
        verbose: int
            if different from zero, print console messages.
        """

        if self.create_output_directories:
            for name, trait in process_or_pipeline.user_traits().items():
                if trait.output and isinstance(trait.handler,
                                               (File, Directory)):
                    value = getattr(process_or_pipeline, name)
                    if value is not Undefined and value:
                        base = os.path.dirname(value)
                        if base and not os.path.exists(base):
                            os.makedirs(base)

        for k, v in six.iteritems(kwargs):
            setattr(process_or_pipeline, k, v)
        missing = process_or_pipeline.get_missing_mandatory_parameters()
        if len(missing) != 0:
            ptype = 'process'
            if isinstance(process_or_pipeline, Pipeline):
                ptype = 'pipeline'
            raise ValueError(
                'In %s %s: missing mandatory parameters: %s' %
                (ptype, process_or_pipeline.name, ', '.join(missing)))

        # Use soma worflow to execute the pipeline or porcess in parallel
        # on the local machine
        if self.get_trait_value("use_soma_workflow"):

            # Create soma workflow pipeline
            from capsul.pipeline.pipeline_workflow import (
                workflow_from_pipeline, workflow_run)
            workflow = workflow_from_pipeline(process_or_pipeline)
            controller, wf_id = workflow_run(process_or_pipeline.id, workflow,
                                             self)
            workflow_status = controller.workflow_status(wf_id)
            elements_status = controller.workflow_elements_status(wf_id)
            # FIXME: it would be better if study_config does not require
            # soma_workflow modules.
            from soma_workflow import constants as swconstants
            from soma_workflow.utils import Helper
            self.failed_jobs = Helper.list_failed_jobs(
                wf_id,
                controller,
                include_aborted_jobs=True,
                include_user_killed_jobs=True)
            #self.failed_jobs = [
            #element for element in elements_status[0]
            #if element[1] != swconstants.DONE
            #or element[3][0] != swconstants.FINISHED_REGULARLY]
            # if execution was OK, delete the workflow
            if workflow_status == swconstants.WORKFLOW_DONE \
                    and len(self.failed_jobs) == 0:
                # success
                if self.somaworkflow_keep_succeeded_workflows:
                    print('Success. Workflow was kept in database.')
                else:
                    controller.delete_workflow(wf_id)
            else:
                # something went wrong: raise an exception containing
                # controller object and workflow id.
                if not self.somaworkflow_keep_failed_workflows:
                    controller.delete_workflow(wf_id)
                raise WorkflowExecutionError(
                    controller, wf_id, self.somaworkflow_keep_failed_workflows)

        # Use the local machine to execute the pipeline or process
        else:
            if output_directory is None or output_directory is Undefined:
                output_directory = self.output_directory
            # Not all processes need an output_directory defined on
            # StudyConfig
            if output_directory is not None and output_directory is not Undefined:
                # Check the output directory is valid
                if not isinstance(output_directory, basestring):
                    raise ValueError(
                        "'{0}' is not a valid directory. A valid output "
                        "directory is expected to run the process or "
                        "pipeline.".format(output_directory))
                try:
                    if not os.path.isdir(output_directory):
                        os.makedirs(output_directory)
                except:
                    raise ValueError(
                        "Can't create folder '{0}', please investigate.".
                        format(output_directory))

            # Temporary files can be generated for pipelines
            temporary_files = []
            result = None
            try:
                # Generate ordered execution list
                execution_list = []
                if isinstance(process_or_pipeline, Pipeline):
                    execution_list = \
                        process_or_pipeline.workflow_ordered_nodes()
                    # Filter process nodes if necessary
                    if not execute_qc_nodes:
                        execution_list = [
                            node for node in execution_list
                            if node.node_type == "processing_node"
                        ]
                    for node in execution_list:
                        # check temporary outputs and allocate files
                        process_or_pipeline._check_temporary_files_for_node(
                            node, temporary_files)
                elif isinstance(process_or_pipeline, Process):
                    execution_list.append(process_or_pipeline)
                else:
                    raise Exception(
                        "Unknown instance type. Got {0}and expect Process or "
                        "Pipeline instances".format(
                            process_or_pipeline.__module__.name__))

                # Execute each process node element
                for process_node in execution_list:
                    # Execute the process instance contained in the node
                    if isinstance(process_node, Node):
                        result = self._run(process_node.process,
                                           output_directory, verbose)

                    # Execute the process instance
                    else:
                        result = self._run(process_node, output_directory,
                                           verbose)
            finally:
                # Destroy temporary files
                if temporary_files:
                    # If temporary files have been created, we are sure that
                    # process_or_pipeline is a pipeline with a method
                    # _free_temporary_files.
                    process_or_pipeline._free_temporary_files(temporary_files)
            return result