Esempio n. 1
0
 def test_cache_download(self):
     archive = XnatArchive(project_id=self.project_id,
                           server=SERVER,
                           cache_dir=tempfile.mkdtemp())
     study = self.create_study(TestStudy,
                               'cache_download',
                               inputs=[
                                   DatasetMatch('dataset1', mrtrix_format,
                                                'dataset1'),
                                   DatasetMatch('dataset2', mrtrix_format,
                                                'dataset2'),
                                   DatasetMatch('dataset3', mrtrix_format,
                                                'dataset3'),
                                   DatasetMatch('dataset5', mrtrix_format,
                                                'dataset5')
                               ],
                               archive=archive)
     study.cache_inputs()
     for subject_id in self.SUBJECTS:
         for inpt in study.inputs:
             self.assertTrue(
                 os.path.exists(
                     os.path.join(
                         archive.cache_dir, self.PROJECT,
                         '{}_{}'.format(self.PROJECT, subject_id),
                         '{}_{}_{}'.format(self.PROJECT, subject_id,
                                           self.VISITS[0]), inpt.fname())))
Esempio n. 2
0
 def test_derivable(self):
     # Test vanilla study
     study = self.create_study(
         TestDerivableStudy,
         'study',
         inputs=[DatasetMatch('required', text_format, 'required')])
     self.assertTrue(study.bound_data_spec('derivable').derivable)
     self.assertTrue(study.bound_data_spec('another_derivable').derivable)
     self.assertFalse(study.bound_data_spec('missing_input').derivable)
     self.assertFalse(study.bound_data_spec('wrong_option').derivable)
     self.assertFalse(study.bound_data_spec('wrong_option2').derivable)
     # Test study with 'switch' enabled
     study_with_switch = self.create_study(
         TestDerivableStudy,
         'study_with_switch',
         inputs=[DatasetMatch('required', text_format, 'required')],
         options={'switch': 1})
     self.assertTrue(
         study_with_switch.bound_data_spec('wrong_option').derivable)
     self.assertTrue(
         study_with_switch.bound_data_spec('wrong_option2').derivable)
     # Test study with optional input
     study_with_input = self.create_study(
         TestDerivableStudy,
         'study_with_inputs',
         inputs=[
             DatasetMatch('required', text_format, 'required'),
             DatasetMatch('optional', text_format, 'required')
         ])
     self.assertTrue(
         study_with_input.bound_data_spec('missing_input').derivable)
Esempio n. 3
0
 def test_multi_multi_study(self):
     study = self.create_study(
         MultiMultiStudy, 'multi_multi',
         [DatasetMatch('ss1_x', mrtrix_format, 'ones'),
          DatasetMatch('ss1_y', mrtrix_format, 'ones'),
          DatasetMatch('full_a', mrtrix_format, 'ones'),
          DatasetMatch('full_b', mrtrix_format, 'ones'),
          DatasetMatch('full_c', mrtrix_format, 'ones'),
          DatasetMatch('partial_a', mrtrix_format, 'ones'),
          DatasetMatch('partial_b', mrtrix_format, 'ones'),
          DatasetMatch('partial_c', mrtrix_format, 'ones')],
         options=[Option('full_required_op', 'product'),
                  Option('partial_ss2_product_op', 'product')])
     g = study.data('g')[0]
     if self.mrtrix_req is not None:
         NiAnalysisNodeMixin.load_module(*self.mrtrix_req)
     try:
         g_mean = float(sp.check_output(
             'mrstats {} -output mean'.format(g.path),
             shell=True))
         self.assertEqual(g_mean, 11.0)
     finally:
         if self.mrtrix_req is not None:
             NiAnalysisNodeMixin.unload_module(*self.mrtrix_req)
     # Test option values in MultiStudy
     self.assertEqual(study.data('full_p1'), 100)
     self.assertEqual(study.data('full_p2'), '200')
     self.assertEqual(study.data('full_p3'), 300.0)
     self.assertEqual(study.data('full_q1'), 150)
     self.assertEqual(study.data('full_q2'), '250')
     self.assertEqual(study.data('full_required_op'),
                      'product')
     # Test option values in SubStudy
     ss1 = study.sub_study('full').sub_study('ss1')
     self.assertEqual(ss1.data('o1'), 100)
     self.assertEqual(ss1.data('o2'), '200')
     self.assertEqual(ss1.data('o3'), 300.0)
     ss2 = study.sub_study('full').sub_study('ss2')
     self.assertEqual(ss2.data('o1'), 150)
     self.assertEqual(ss2.data('o2'), '250')
     self.assertEqual(ss2.data('o3'), 300.0)
     self.assertEqual(ss2.data('product_op'), 'product')
     # Test option values in MultiStudy
     self.assertEqual(study.data('partial_p1'), 1000)
     self.assertEqual(study.data('partial_ss1_o2'), '2')
     self.assertEqual(study.data('partial_ss1_o3'), 3.0)
     self.assertEqual(study.data('partial_ss2_o2'), '20')
     self.assertEqual(study.data('partial_ss2_o3'), 30.0)
     self.assertEqual(
         study.partial_ss2_product_op, 'product')
     # Test option values in SubStudy
     ss1 = study.sub_study('partial').sub_study('ss1')
     self.assertEqual(ss1.data('o1'), 1000)
     self.assertEqual(ss1.data('o2'), '2')
     self.assertEqual(ss1.data('o3'), 3.0)
     ss2 = study.sub_study('partial').sub_study('ss2')
     self.assertEqual(ss2.data('o1'), 1000)
     self.assertEqual(ss2.data('o2'), '20')
     self.assertEqual(ss2.data('o3'), 30.0)
     self.assertEqual(ss2.data('product_op'), 'product')
Esempio n. 4
0
 def test_archive_roundtrip(self):
     study = DummyStudy(
         self.STUDY_NAME, self.archive, runner=LinearRunner('a_dir'),
         inputs=[DatasetMatch('source1', nifti_gz_format, 'source1'),
                 DatasetMatch('source2', nifti_gz_format, 'source2'),
                 DatasetMatch('source3', nifti_gz_format, 'source3'),
                 DatasetMatch('source4', nifti_gz_format, 'source4')])
     # TODO: Should test out other file formats as well.
     source_files = [study.input(n)
                     for n in ('source1', 'source2', 'source3',
                               'source4')]
     sink_files = [study.bound_data_spec(n)
                   for n in ('sink1', 'sink3', 'sink4')]
     inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                         'inputnode')
     inputnode.inputs.subject_id = self.SUBJECT
     inputnode.inputs.visit_id = self.VISIT
     source = self.archive.source(source_files,
                                  study_name=self.STUDY_NAME)
     sink = self.archive.sink(sink_files, study_name=self.STUDY_NAME)
     sink.inputs.name = 'archive_sink'
     sink.inputs.desc = (
         "A test session created by archive roundtrip unittest")
     # Create workflow connecting them together
     workflow = pe.Workflow('source_sink_unit_test', base_dir=self.work_dir)
     workflow.add_nodes((source, sink))
     workflow.connect(inputnode, 'subject_id', source, 'subject_id')
     workflow.connect(inputnode, 'visit_id', source, 'visit_id')
     workflow.connect(inputnode, 'subject_id', sink, 'subject_id')
     workflow.connect(inputnode, 'visit_id', sink, 'visit_id')
     for source_file in source_files:
         if not source_file.name.endswith('2'):
             source_name = source_file.name
             sink_name = source_name.replace('source', 'sink')
             workflow.connect(
                 source, source_name + PATH_SUFFIX,
                 sink, sink_name + PATH_SUFFIX)
     workflow.run()
     # Check local directory was created properly
     outputs = [
         f for f in sorted(os.listdir(self.session_dir))
         if f != FIELDS_FNAME]
     self.assertEqual(outputs,
                      [self.STUDY_NAME + '_sink1.nii.gz',
                       self.STUDY_NAME + '_sink3.nii.gz',
                       self.STUDY_NAME + '_sink4.nii.gz',
                       'source1.nii.gz', 'source2.nii.gz',
                       'source3.nii.gz', 'source4.nii.gz'])
Esempio n. 5
0
 def test_dcm2niix(self):
     study = self.create_study(
         DummyStudy, 'concatenate', inputs=[
             DatasetMatch('input_dataset',
                          dicom_format, 't2_tse_tra_p2_448')])
     study.data('output_dataset')[0]
     self.assertDatasetCreated('output_dataset.nii.gz', study.name)
Esempio n. 6
0
 def test_special_char_in_scan_name(self):
     """
     Tests whether XNAT source can download files with spaces in their names
     """
     cache_dir = tempfile.mkdtemp()
     archive = XnatArchive(server=SERVER,
                           cache_dir=cache_dir,
                           project_id=self.PROJECT)
     study = DummyStudy('study',
                        archive,
                        LinearRunner('ad'),
                        inputs=[
                            DatasetMatch('source{}'.format(i), dicom_format,
                                         d)
                            for i, d in enumerate(self.DATASETS, start=1)
                        ],
                        subject_ids=[self.SUBJECT],
                        visit_ids=[self.VISIT])
     source = archive.source([
         study.input('source{}'.format(i))
         for i in range(1,
                        len(self.DATASETS) + 1)
     ])
     source.inputs.subject_id = self.SUBJECT
     source.inputs.visit_id = self.VISIT
     workflow = pe.Workflow(self.TEST_NAME, base_dir=self.work_path)
     workflow.add_nodes([source])
     graph = workflow.run()
     result = next(n.result for n in graph.nodes() if n.name == source.name)
     for i, dname in enumerate(self.DATASETS, start=1):
         path = getattr(result.outputs, 'source{}{}'.format(i, PATH_SUFFIX))
         self.assertEqual(os.path.basename(path), dname)
         self.assertTrue(os.path.exists(path))
Esempio n. 7
0
class TestDicomTagMatch(BaseTestCase):

    IMAGE_TYPE_TAG = ('0008', '0008')
    GRE_PATTERN = 'gre_field_mapping_3mm.*'
    PHASE_IMAGE_TYPE = ['ORIGINAL', 'PRIMARY', 'P', 'ND']
    MAG_IMAGE_TYPE = ['ORIGINAL', 'PRIMARY', 'M', 'ND', 'NORM']
    DICOM_MATCH = [
        DatasetMatch('gre_phase',
                     dicom_format,
                     GRE_PATTERN,
                     dicom_tags={IMAGE_TYPE_TAG: PHASE_IMAGE_TYPE},
                     is_regex=True),
        DatasetMatch('gre_mag',
                     dicom_format,
                     GRE_PATTERN,
                     dicom_tags={IMAGE_TYPE_TAG: MAG_IMAGE_TYPE},
                     is_regex=True)
    ]

    def test_dicom_match(self):
        study = self.create_study(TestMatchStudy,
                                  'test_dicom',
                                  inputs=self.DICOM_MATCH)
        phase = study.data('gre_phase')[0]
        mag = study.data('gre_mag')[0]
        self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase')
        self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')

    def test_order_match(self):
        study = self.create_study(TestMatchStudy,
                                  'test_dicom',
                                  inputs=[
                                      DatasetMatch('gre_phase',
                                                   dicom_format,
                                                   pattern=self.GRE_PATTERN,
                                                   order=1,
                                                   is_regex=True),
                                      DatasetMatch('gre_mag',
                                                   dicom_format,
                                                   pattern=self.GRE_PATTERN,
                                                   order=0,
                                                   is_regex=True)
                                  ])
        phase = study.data('gre_phase')[0]
        mag = study.data('gre_mag')[0]
        self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase')
        self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
Esempio n. 8
0
 def test_full_multi_study(self):
     study = self.create_study(
         FullMultiStudy, 'full',
         [DatasetMatch('a', mrtrix_format, 'ones'),
          DatasetMatch('b', mrtrix_format, 'ones'),
          DatasetMatch('c', mrtrix_format, 'ones')],
         options=[Option('required_op', 'product')])
     d = study.data('d', subject_id='SUBJECT', visit_id='VISIT')
     e = study.data('e')[0]
     f = study.data('f')[0]
     if self.mrtrix_req is not None:
         NiAnalysisNodeMixin.load_module(*self.mrtrix_req)
     try:
         d_mean = float(sp.check_output(
             'mrstats {} -output mean'.format(d.path),
             shell=True))
         self.assertEqual(d_mean, 2.0)
         e_mean = float(sp.check_output(
             'mrstats {} -output mean'.format(e.path),
             shell=True))
         self.assertEqual(e_mean, 3.0)
         f_mean = float(sp.check_output(
             'mrstats {} -output mean'.format(f.path),
             shell=True))
         self.assertEqual(f_mean, 6.0)
     finally:
         if self.mrtrix_req is not None:
             NiAnalysisNodeMixin.unload_module(*self.mrtrix_req)
     # Test option values in MultiStudy
     self.assertEqual(study.data('p1'), 100)
     self.assertEqual(study.data('p2'), '200')
     self.assertEqual(study.data('p3'), 300.0)
     self.assertEqual(study.data('q1'), 150)
     self.assertEqual(study.data('q2'), '250')
     self.assertEqual(study.data('required_op'), 'product')
     # Test option values in SubStudy
     ss1 = study.sub_study('ss1')
     self.assertEqual(ss1.data('o1'), 100)
     self.assertEqual(ss1.data('o2'), '200')
     self.assertEqual(ss1.data('o3'), 300.0)
     ss2 = study.sub_study('ss2')
     self.assertEqual(ss2.data('o1'), 150)
     self.assertEqual(ss2.data('o2'), '250')
     self.assertEqual(ss2.data('o3'), 300.0)
     self.assertEqual(ss2.data('product_op'), 'product')
Esempio n. 9
0
 def test_partial_multi_study(self):
     study = self.create_study(
         PartialMultiStudy, 'partial',
         [DatasetMatch('a', mrtrix_format, 'ones'),
          DatasetMatch('b', mrtrix_format, 'ones'),
          DatasetMatch('c', mrtrix_format, 'ones')],
         options=[Option('ss2_product_op', 'product')])
     ss1_z = study.data('ss1_z')[0]
     ss2_y = study.data('ss2_y')[0]
     ss2_z = study.data('ss2_z')[0]
     if self.mrtrix_req is not None:
         NiAnalysisNodeMixin.load_module(*self.mrtrix_req)
     try:
         ss1_z_mean = float(sp.check_output(
             'mrstats {} -output mean'.format(ss1_z.path),
             shell=True))
         self.assertEqual(ss1_z_mean, 2.0)
         ss2_y_mean = float(sp.check_output(
             'mrstats {} -output mean'.format(ss2_y.path),
             shell=True))
         self.assertEqual(ss2_y_mean, 3.0)
         ss2_z_mean = float(sp.check_output(
             'mrstats {} -output mean'.format(ss2_z.path),
             shell=True))
         self.assertEqual(ss2_z_mean, 6.0)
     finally:
         if self.mrtrix_req is not None:
             NiAnalysisNodeMixin.unload_module(*self.mrtrix_req)
     # Test option values in MultiStudy
     self.assertEqual(study.data('p1'), 1000)
     self.assertEqual(study.data('ss1_o2'), '2')
     self.assertEqual(study.data('ss1_o3'), 3.0)
     self.assertEqual(study.data('ss2_o2'), '20')
     self.assertEqual(study.data('ss2_o3'), 30.0)
     self.assertEqual(study.data('ss2_product_op'), 'product')
     # Test option values in SubStudy
     ss1 = study.sub_study('ss1')
     self.assertEqual(ss1.data('o1'), 1000)
     self.assertEqual(ss1.data('o2'), '2')
     self.assertEqual(ss1.data('o3'), 3.0)
     ss2 = study.sub_study('ss2')
     self.assertEqual(ss2.data('o1'), 1000)
     self.assertEqual(ss2.data('o2'), '20')
     self.assertEqual(ss2.data('o3'), 30.0)
     self.assertEqual(ss2.data('product_op'), 'product')
Esempio n. 10
0
 def test_pipeline_prerequisites(self):
     study = self.create_study(
         ConversionStudy, 'conversion', [
             DatasetMatch('mrtrix', mrtrix_format, 'mrtrix'),
             DatasetMatch('nifti_gz', nifti_gz_format, 'nifti_gz'),
             DatasetMatch('dicom', dicom_format, 't1_mprage_sag_p2_iso_1_ADNI'),
             DatasetMatch('directory', directory_format, 't1_mprage_sag_p2_iso_1_ADNI'),
             DatasetMatch('zip', zip_format, 'zip')])
     study.data('nifti_gz_from_dicom')
     study.data('mrtrix_from_nifti_gz')
     study.data('nifti_from_mrtrix')
     study.data('directory_from_zip')
     study.data('zip_from_directory')
     self.assertDatasetCreated('nifti_gz_from_dicom.nii.gz', study.name)
     self.assertDatasetCreated('mrtrix_from_nifti_gz.mif', study.name)
     self.assertDatasetCreated('nifti_from_mrtrix.nii', study.name)
     self.assertDatasetCreated('directory_from_zip', study.name)
     self.assertDatasetCreated('zip_from_directory.zip', study.name)
Esempio n. 11
0
 def test_order_match(self):
     study = self.create_study(TestMatchStudy,
                               'test_dicom',
                               inputs=[
                                   DatasetMatch('gre_phase',
                                                dicom_format,
                                                pattern=self.GRE_PATTERN,
                                                order=1,
                                                is_regex=True),
                                   DatasetMatch('gre_mag',
                                                dicom_format,
                                                pattern=self.GRE_PATTERN,
                                                order=0,
                                                is_regex=True)
                               ])
     phase = study.data('gre_phase')[0]
     mag = study.data('gre_mag')[0]
     self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase')
     self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
Esempio n. 12
0
 def setUp(self):
     self.reset_dirs()
     for subject_id in self.SUBJECT_IDS:
         for visit_id in self.SESSION_IDS:
             self.add_session(self.project_dir, subject_id, visit_id)
     self.study = self.create_study(
         TestStudy,
         'dummy',
         inputs=[
             DatasetMatch('start', nifti_gz_format, 'start'),
             DatasetMatch('ones_slice', mrtrix_format, 'ones_slice')
         ],
         options={'pipeline_option': True})
     # Calculate MRtrix module required for 'mrstats' commands
     try:
         self.mrtrix_req = Requirement.best_requirement(
             [mrtrix3_req], NiAnalysisNodeMixin.available_modules(),
             NiAnalysisNodeMixin.preloaded_modules())
     except NiAnalysisModulesNotInstalledException:
         self.mrtrix_req = None
Esempio n. 13
0
 def test_id_match(self):
     study = test_dataset.TestMatchStudy(name='test_dicom',
                                         archive=XnatArchive(
                                             project_id='TEST001',
                                             server=SERVER,
                                             cache_dir=tempfile.mkdtemp()),
                                         runner=LinearRunner(self.work_dir),
                                         inputs=[
                                             DatasetMatch('gre_phase',
                                                          dicom_format,
                                                          id=8),
                                             DatasetMatch('gre_mag',
                                                          dicom_format,
                                                          id=7)
                                         ],
                                         subject_ids=['DATASET'],
                                         visit_ids=['DICOMTAGMATCH'])
     phase = study.data('gre_phase')[0]
     mag = study.data('gre_mag')[0]
     self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase')
     self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
Esempio n. 14
0
 def test_missing_option(self):
     # Misses the required 'full_required_op' option, which sets
     # the operation of the second node in StudyB's pipeline to
     # 'product'
     missing_option_study = self.create_study(
         MultiMultiStudy, 'multi_multi',
         [DatasetMatch('ss1_x', mrtrix_format, 'ones'),
          DatasetMatch('ss1_y', mrtrix_format, 'ones'),
          DatasetMatch('full_a', mrtrix_format, 'ones'),
          DatasetMatch('full_b', mrtrix_format, 'ones'),
          DatasetMatch('full_c', mrtrix_format, 'ones'),
          DatasetMatch('partial_a', mrtrix_format, 'ones'),
          DatasetMatch('partial_b', mrtrix_format, 'ones'),
          DatasetMatch('partial_c', mrtrix_format, 'ones')],
         options=[Option('partial_ss2_product_op', 'product')])
     self.assertRaises(
         RuntimeError,
         missing_option_study.data,
         'g')
Esempio n. 15
0
 def test_per_session_prereqs(self):
     study = self.create_study(
         ExistingPrereqStudy,
         self.study_name,
         inputs=[DatasetMatch('start', mrtrix_format, 'ones')])
     study.data('thousands')
     targets = {
         'subject1': {
             'visit1': 1100,
             'visit2': 1110,
             'visit3': 1000
         },
         'subject2': {
             'visit1': 1110,
             'visit2': 1110,
             'visit3': 1000
         },
         'subject3': {
             'visit1': 1111,
             'visit2': 1110,
             'visit3': 1000
         },
         'subject4': {
             'visit1': 1111,
             'visit2': 1110,
             'visit3': 1000
         }
     }
     for subj_id, visits in self.saved_structure.iteritems():
         for visit_id in visits:
             self.assertStatEqual('mean',
                                  'thousands.mif',
                                  targets[subj_id][visit_id],
                                  self.study_name,
                                  subject=subj_id,
                                  visit=visit_id,
                                  frequency='per_session')
Esempio n. 16
0
    def test_summary(self):
        study = DummyStudy(
            self.SUMMARY_STUDY_NAME, self.archive, LinearRunner('ad'),
            inputs=[DatasetMatch('source1', nifti_gz_format, 'source1'),
                    DatasetMatch('source2', nifti_gz_format, 'source2'),
                    DatasetMatch('source3', nifti_gz_format, 'source3')])
        # TODO: Should test out other file formats as well.
        source_files = [study.input(n)
                        for n in ('source1', 'source2', 'source3')]
        inputnode = pe.Node(
            IdentityInterface(['subject_id', 'visit_id']), 'inputnode')
        inputnode.inputs.subject_id = self.SUBJECT
        inputnode.inputs.visit_id = self.VISIT
        source = self.archive.source(source_files)
        # Test subject sink
        subject_sink_files = [
            study.bound_data_spec('subject_sink')]
        subject_sink = self.archive.sink(subject_sink_files,
                                         frequency='per_subject',
                                         study_name=self.SUMMARY_STUDY_NAME)
        subject_sink.inputs.name = 'subject_summary'
        subject_sink.inputs.desc = (
            "Tests the sinking of subject-wide datasets")
        # Test visit sink
        visit_sink_files = [study.bound_data_spec('visit_sink')]
        visit_sink = self.archive.sink(visit_sink_files,
                                       frequency='per_visit',
                                       study_name=self.SUMMARY_STUDY_NAME)
        visit_sink.inputs.name = 'visit_summary'
        visit_sink.inputs.desc = (
            "Tests the sinking of visit-wide datasets")
        # Test project sink
        project_sink_files = [
            study.bound_data_spec('project_sink')]
        project_sink = self.archive.sink(project_sink_files,
                                         frequency='per_project',
                                         study_name=self.SUMMARY_STUDY_NAME)

        project_sink.inputs.name = 'project_summary'
        project_sink.inputs.desc = (
            "Tests the sinking of project-wide datasets")
        # Create workflow connecting them together
        workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir)
        workflow.add_nodes((source, subject_sink, visit_sink,
                            project_sink))
        workflow.connect(inputnode, 'subject_id', source, 'subject_id')
        workflow.connect(inputnode, 'visit_id', source, 'visit_id')
        workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id')
        workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id')
        workflow.connect(
            source, 'source1' + PATH_SUFFIX,
            subject_sink, 'subject_sink' + PATH_SUFFIX)
        workflow.connect(
            source, 'source2' + PATH_SUFFIX,
            visit_sink, 'visit_sink' + PATH_SUFFIX)
        workflow.connect(
            source, 'source3' + PATH_SUFFIX,
            project_sink, 'project_sink' + PATH_SUFFIX)
        workflow.run()
        # Check local summary directories were created properly
        subject_dir = self.get_session_dir(frequency='per_subject')
        self.assertEqual(sorted(os.listdir(subject_dir)),
                         [self.SUMMARY_STUDY_NAME + '_subject_sink.nii.gz'])
        visit_dir = self.get_session_dir(frequency='per_visit')
        self.assertEqual(sorted(os.listdir(visit_dir)),
                         [self.SUMMARY_STUDY_NAME + '_visit_sink.nii.gz'])
        project_dir = self.get_session_dir(frequency='per_project')
        self.assertEqual(sorted(os.listdir(project_dir)),
                         [self.SUMMARY_STUDY_NAME + '_project_sink.nii.gz'])
        # Reload the data from the summary directories
        reloadinputnode = pe.Node(IdentityInterface(['subject_id',
                                                     'visit_id']),
                                  'reload_inputnode')
        reloadinputnode.inputs.subject_id = self.SUBJECT
        reloadinputnode.inputs.visit_id = self.VISIT
        reloadsource = self.archive.source(
            (source_files + subject_sink_files + visit_sink_files +
             project_sink_files),
            name='reload_source',
            study_name=self.SUMMARY_STUDY_NAME)
        reloadsink = self.archive.sink(
            [study.bound_data_spec(n)
             for n in ('resink1', 'resink2', 'resink3')],
            study_name=self.SUMMARY_STUDY_NAME)
        reloadsink.inputs.name = 'reload_summary'
        reloadsink.inputs.desc = (
            "Tests the reloading of subject and project summary datasets")
        reloadworkflow = pe.Workflow('reload_summary_unittest',
                                     base_dir=self.work_dir)
        reloadworkflow.connect(reloadinputnode, 'subject_id',
                               reloadsource, 'subject_id')
        reloadworkflow.connect(reloadinputnode, 'visit_id',
                               reloadsource, 'visit_id')
        reloadworkflow.connect(reloadinputnode, 'subject_id',
                               reloadsink, 'subject_id')
        reloadworkflow.connect(reloadinputnode, 'visit_id',
                               reloadsink, 'visit_id')
        reloadworkflow.connect(reloadsource,
                               'subject_sink' + PATH_SUFFIX,
                               reloadsink,
                               'resink1' + PATH_SUFFIX)
        reloadworkflow.connect(reloadsource,
                               'visit_sink' + PATH_SUFFIX,
                               reloadsink,
                               'resink2' + PATH_SUFFIX)
        reloadworkflow.connect(reloadsource,
                               'project_sink' + PATH_SUFFIX,
                               reloadsink,
                               'resink3' + PATH_SUFFIX)
        reloadworkflow.run()
        outputs = [
            f for f in sorted(os.listdir(self.session_dir))
            if f != FIELDS_FNAME]
        self.assertEqual(outputs,
                         [self.SUMMARY_STUDY_NAME + '_resink1.nii.gz',
                          self.SUMMARY_STUDY_NAME + '_resink2.nii.gz',
                          self.SUMMARY_STUDY_NAME + '_resink3.nii.gz',
                          'source1.nii.gz', 'source2.nii.gz',
                          'source3.nii.gz', 'source4.nii.gz'])
Esempio n. 17
0
 def test_pipeline_prerequisites(self):
     study = self.create_study(
         RequirementsStudy, 'requirements',
         [DatasetMatch('ones', nifti_gz_format, 'ones')])
     study.data('twos')
     self.assertDatasetCreated('twos.nii.gz', study.name)
Esempio n. 18
0
    def test_archive_roundtrip(self):

        # Create working dirs
        # Create DarisSource node
        archive = XnatArchive(project_id=self.PROJECT,
                              server=SERVER,
                              cache_dir=self.archive_cache_dir)
        study = DummyStudy(self.STUDY_NAME,
                           archive,
                           runner=LinearRunner('a_dir'),
                           inputs=[
                               DatasetMatch('source1', nifti_gz_format,
                                            'source1'),
                               DatasetMatch('source2', nifti_gz_format,
                                            'source2'),
                               DatasetMatch('source3', nifti_gz_format,
                                            'source3'),
                               DatasetMatch('source4', nifti_gz_format,
                                            'source4')
                           ])
        # TODO: Should test out other file formats as well.
        source_files = [
            study.input(n)
            for n in ('source1', 'source2', 'source3', 'source4')
        ]
        sink_files = [
            study.bound_data_spec(n) for n in ('sink1', 'sink3', 'sink4')
        ]
        inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                            'inputnode')
        inputnode.inputs.subject_id = str(self.SUBJECT)
        inputnode.inputs.visit_id = str(self.VISIT)
        source = archive.source(source_files, study_name=self.STUDY_NAME)
        sink = archive.sink(sink_files, study_name=self.STUDY_NAME)
        sink.inputs.name = 'archive-roundtrip-unittest'
        sink.inputs.desc = (
            "A test session created by archive roundtrip unittest")
        # Create workflow connecting them together
        workflow = pe.Workflow('source-sink-unit-test', base_dir=self.work_dir)
        workflow.add_nodes((source, sink))
        workflow.connect(inputnode, 'subject_id', source, 'subject_id')
        workflow.connect(inputnode, 'visit_id', source, 'visit_id')
        workflow.connect(inputnode, 'subject_id', sink, 'subject_id')
        workflow.connect(inputnode, 'visit_id', sink, 'visit_id')
        for source_file in source_files:
            if source_file.name != 'source2':
                sink_name = source_file.name.replace('source', 'sink')
                workflow.connect(source, source_file.name + PATH_SUFFIX, sink,
                                 sink_name + PATH_SUFFIX)
        workflow.run()
        # Check cache was created properly
        self.assertEqual(filter_md5_fnames(os.listdir(self.session_cache())), [
            'source1.nii.gz', 'source2.nii.gz', 'source3.nii.gz',
            'source4.nii.gz'
        ])
        expected_sink_datasets = [
            self.STUDY_NAME + '_sink1', self.STUDY_NAME + '_sink3',
            self.STUDY_NAME + '_sink4'
        ]
        self.assertEqual(
            filter_md5_fnames(os.listdir(self.proc_session_cache())),
            [d + nifti_gz_format.extension for d in expected_sink_datasets])
        with self._connect() as mbi_xnat:
            dataset_names = mbi_xnat.experiments[
                self.session_label() +
                XnatArchive.PROCESSED_SUFFIX].scans.keys()
        self.assertEqual(sorted(dataset_names), expected_sink_datasets)
Esempio n. 19
0
    def test_summary(self):
        # Create working dirs
        # Create XnatSource node
        archive = XnatArchive(server=SERVER,
                              cache_dir=self.archive_cache_dir,
                              project_id=self.PROJECT)
        study = DummyStudy(self.SUMMARY_STUDY_NAME,
                           archive,
                           LinearRunner('ad'),
                           inputs=[
                               DatasetMatch('source1', nifti_gz_format,
                                            'source1'),
                               DatasetMatch('source2', nifti_gz_format,
                                            'source2'),
                               DatasetMatch('source3', nifti_gz_format,
                                            'source3')
                           ])
        # TODO: Should test out other file formats as well.
        source_files = [
            study.input(n) for n in ('source1', 'source2', 'source3')
        ]
        inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                            'inputnode')
        inputnode.inputs.subject_id = self.SUBJECT
        inputnode.inputs.visit_id = self.VISIT
        source = archive.source(source_files)
        subject_sink_files = [study.bound_data_spec('subject_sink')]
        subject_sink = archive.sink(subject_sink_files,
                                    frequency='per_subject',
                                    study_name=self.SUMMARY_STUDY_NAME)
        subject_sink.inputs.name = 'subject_summary'
        subject_sink.inputs.desc = (
            "Tests the sinking of subject-wide datasets")
        # Test visit sink
        visit_sink_files = [study.bound_data_spec('visit_sink')]
        visit_sink = archive.sink(visit_sink_files,
                                  frequency='per_visit',
                                  study_name=self.SUMMARY_STUDY_NAME)
        visit_sink.inputs.name = 'visit_summary'
        visit_sink.inputs.desc = ("Tests the sinking of visit-wide datasets")
        # Test project sink
        project_sink_files = [study.bound_data_spec('project_sink')]
        project_sink = archive.sink(project_sink_files,
                                    frequency='per_project',
                                    study_name=self.SUMMARY_STUDY_NAME)

        project_sink.inputs.name = 'project_summary'
        project_sink.inputs.desc = (
            "Tests the sinking of project-wide datasets")
        # Create workflow connecting them together
        workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir)
        workflow.add_nodes((source, subject_sink, visit_sink, project_sink))
        workflow.connect(inputnode, 'subject_id', source, 'subject_id')
        workflow.connect(inputnode, 'visit_id', source, 'visit_id')
        workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id')
        workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id')
        workflow.connect(source, 'source1' + PATH_SUFFIX, subject_sink,
                         'subject_sink' + PATH_SUFFIX)
        workflow.connect(source, 'source2' + PATH_SUFFIX, visit_sink,
                         'visit_sink' + PATH_SUFFIX)
        workflow.connect(source, 'source3' + PATH_SUFFIX, project_sink,
                         'project_sink' + PATH_SUFFIX)
        workflow.run()
        with self._connect() as mbi_xnat:
            # Check subject summary directories were created properly in cache
            expected_subj_datasets = [
                self.SUMMARY_STUDY_NAME + '_subject_sink'
            ]
            subject_dir = os.path.join(
                self.archive_cache_dir, self.PROJECT,
                '_'.join((self.PROJECT, self.SUBJECT)), '_'.join(
                    (self.PROJECT, self.SUBJECT, XnatArchive.SUMMARY_NAME)))
            self.assertEqual(filter_md5_fnames(os.listdir(subject_dir)), [
                d + nifti_gz_format.extension for d in expected_subj_datasets
            ])
            # and on XNAT
            subject_dataset_names = mbi_xnat.projects[
                self.PROJECT].experiments['_'.join(
                    (self.PROJECT, self.SUBJECT,
                     XnatArchive.SUMMARY_NAME))].scans.keys()
            self.assertEqual(expected_subj_datasets, subject_dataset_names)
            # Check visit summary directories were created properly in
            # cache
            expected_visit_datasets = [self.SUMMARY_STUDY_NAME + '_visit_sink']
            visit_dir = os.path.join(
                self.archive_cache_dir, self.PROJECT,
                self.PROJECT + '_' + XnatArchive.SUMMARY_NAME,
                (self.PROJECT + '_' + XnatArchive.SUMMARY_NAME + '_' +
                 self.VISIT))
            self.assertEqual(filter_md5_fnames(os.listdir(visit_dir)), [
                d + nifti_gz_format.extension for d in expected_visit_datasets
            ])
            # and on XNAT
            visit_dataset_names = mbi_xnat.projects[self.PROJECT].experiments[
                '{}_{}_{}'.format(self.PROJECT, XnatArchive.SUMMARY_NAME,
                                  self.VISIT)].scans.keys()
            self.assertEqual(expected_visit_datasets, visit_dataset_names)
            # Check project summary directories were created properly in cache
            expected_proj_datasets = [
                self.SUMMARY_STUDY_NAME + '_project_sink'
            ]
            project_dir = os.path.join(
                self.archive_cache_dir, self.PROJECT,
                self.PROJECT + '_' + XnatArchive.SUMMARY_NAME,
                self.PROJECT + '_' + XnatArchive.SUMMARY_NAME + '_' +
                XnatArchive.SUMMARY_NAME)
            self.assertEqual(filter_md5_fnames(os.listdir(project_dir)), [
                d + nifti_gz_format.extension for d in expected_proj_datasets
            ])
            # and on XNAT
            project_dataset_names = mbi_xnat.projects[
                self.PROJECT].experiments['{}_{sum}_{sum}'.format(
                    self.PROJECT, sum=XnatArchive.SUMMARY_NAME)].scans.keys()
            self.assertEqual(expected_proj_datasets, project_dataset_names)
        # Reload the data from the summary directories
        reloadinputnode = pe.Node(
            IdentityInterface(['subject_id', 'visit_id']), 'reload_inputnode')
        reloadinputnode.inputs.subject_id = self.SUBJECT
        reloadinputnode.inputs.visit_id = self.VISIT
        reloadsource = archive.source((source_files + subject_sink_files +
                                       visit_sink_files + project_sink_files),
                                      name='reload_source',
                                      study_name=self.SUMMARY_STUDY_NAME)
        reloadsink = archive.sink([
            study.bound_data_spec(n) for n in ('resink1', 'resink2', 'resink3')
        ],
                                  study_name=self.SUMMARY_STUDY_NAME)
        reloadsink.inputs.name = 'reload_summary'
        reloadsink.inputs.desc = (
            "Tests the reloading of subject and project summary datasets")
        reloadworkflow = pe.Workflow('reload_summary_unittest',
                                     base_dir=self.work_dir)
        reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsource,
                               'subject_id')
        reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsource,
                               'visit_id')
        reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsink,
                               'subject_id')
        reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsink,
                               'visit_id')
        reloadworkflow.connect(reloadsource, 'subject_sink' + PATH_SUFFIX,
                               reloadsink, 'resink1' + PATH_SUFFIX)
        reloadworkflow.connect(reloadsource, 'visit_sink' + PATH_SUFFIX,
                               reloadsink, 'resink2' + PATH_SUFFIX)
        reloadworkflow.connect(reloadsource, 'project_sink' + PATH_SUFFIX,
                               reloadsink, 'resink3' + PATH_SUFFIX)
        reloadworkflow.run()
        # Check that the datasets
        self.assertEqual(
            filter_md5_fnames(os.listdir(self.proc_session_cache())), [
                self.SUMMARY_STUDY_NAME + '_resink1.nii.gz',
                self.SUMMARY_STUDY_NAME + '_resink2.nii.gz',
                self.SUMMARY_STUDY_NAME + '_resink3.nii.gz'
            ])
        # and on XNAT
        with self._connect() as mbi_xnat:
            resinked_dataset_names = mbi_xnat.projects[
                self.PROJECT].experiments[
                    self.session_label() +
                    XnatArchive.PROCESSED_SUFFIX].scans.keys()
            self.assertEqual(sorted(resinked_dataset_names), [
                self.SUMMARY_STUDY_NAME + '_resink1', self.SUMMARY_STUDY_NAME +
                '_resink2', self.SUMMARY_STUDY_NAME + '_resink3'
            ])
Esempio n. 20
0
    def test_delayed_download(self):
        """
        Tests handling of race conditions where separate processes attempt to
        cache the same dataset
        """
        cache_dir = os.path.join(self.base_cache_path,
                                 'delayed-download-cache')
        DATASET_NAME = 'source1'
        target_path = os.path.join(self.session_cache(cache_dir),
                                   DATASET_NAME + nifti_gz_format.extension)
        tmp_dir = target_path + '.download'
        shutil.rmtree(cache_dir, ignore_errors=True)
        os.makedirs(cache_dir)
        archive = XnatArchive(server=SERVER,
                              cache_dir=cache_dir,
                              project_id=self.PROJECT)
        study = DummyStudy(
            self.STUDY_NAME,
            archive,
            LinearRunner('ad'),
            inputs=[DatasetMatch(DATASET_NAME, nifti_gz_format, DATASET_NAME)])
        source = archive.source([study.input(DATASET_NAME)],
                                name='delayed_source',
                                study_name='delayed_study')
        source.inputs.subject_id = self.SUBJECT
        source.inputs.visit_id = self.VISIT
        result1 = source.run()
        source1_path = result1.outputs.source1_path
        self.assertTrue(os.path.exists(source1_path))
        self.assertEqual(
            source1_path, target_path,
            "Output file path '{}' not equal to target path '{}'".format(
                source1_path, target_path))
        # Clear cache to start again
        shutil.rmtree(cache_dir, ignore_errors=True)
        # Create tmp_dir before running interface, this time should wait for 1
        # second, check to see that the session hasn't been created and then
        # clear it and redownload the dataset.
        os.makedirs(tmp_dir)
        source.inputs.race_cond_delay = 1
        result2 = source.run()
        source1_path = result2.outputs.source1_path
        # Clear cache to start again
        shutil.rmtree(cache_dir, ignore_errors=True)
        # Create tmp_dir before running interface, this time should wait for 1
        # second, check to see that the session hasn't been created and then
        # clear it and redownload the dataset.
        internal_dir = os.path.join(tmp_dir, 'internal')
        deleted_tmp_dir = tmp_dir + '.deleted'

        def simulate_download():
            "Simulates a download in a separate process"
            os.makedirs(internal_dir)
            time.sleep(5)
            # Modify a file in the temp dir to make the source download keep
            # waiting
            logger.info('Updating simulated download directory')
            with open(os.path.join(internal_dir, 'download'), 'a') as f:
                f.write('downloading')
            time.sleep(10)
            # Simulate the finalising of the download by copying the previously
            # downloaded file into place and deleting the temp dir.
            logger.info('Finalising simulated download')
            with open(target_path, 'a') as f:
                f.write('simulated')
            shutil.move(tmp_dir, deleted_tmp_dir)

        source.inputs.race_cond_delay = 10
        p = Process(target=simulate_download)
        p.start()  # Start the simulated download in separate process
        time.sleep(1)
        source.run()  # Run the local download
        p.join()
        with open(os.path.join(deleted_tmp_dir, 'internal', 'download')) as f:
            d = f.read()
        self.assertEqual(d, 'downloading')
        with open(target_path) as f:
            d = f.read()
        self.assertEqual(d, 'simulated')
Esempio n. 21
0
 def test_digest_check(self):
     """
     Tests check of downloaded digests to see if file needs to be
     redownloaded
     """
     cache_dir = os.path.join(self.base_cache_path, 'digest-check-cache')
     DATASET_NAME = 'source1'
     STUDY_NAME = 'digest_check_study'
     dataset_fpath = DATASET_NAME + nifti_gz_format.extension
     source_target_path = os.path.join(self.session_cache(cache_dir),
                                       dataset_fpath)
     md5_path = source_target_path + XnatArchive.MD5_SUFFIX
     shutil.rmtree(cache_dir, ignore_errors=True)
     os.makedirs(cache_dir)
     archive = XnatArchive(project_id=self.PROJECT,
                           server=SERVER,
                           cache_dir=cache_dir)
     study = DummyStudy(
         STUDY_NAME,
         archive,
         LinearRunner('ad'),
         inputs=[DatasetMatch(DATASET_NAME, nifti_gz_format, DATASET_NAME)])
     source = archive.source([study.input(DATASET_NAME)],
                             name='digest_check_source',
                             study_name=STUDY_NAME)
     source.inputs.subject_id = self.SUBJECT
     source.inputs.visit_id = self.VISIT
     source.run()
     self.assertTrue(os.path.exists(md5_path))
     self.assertTrue(os.path.exists(source_target_path))
     with open(md5_path) as f:
         digests = json.load(f)
     # Stash the downloaded file in a new location and create a dummy
     # file instead
     stash_path = source_target_path + '.stash'
     shutil.move(source_target_path, stash_path)
     with open(source_target_path, 'w') as f:
         f.write('dummy')
     # Run the download, which shouldn't download as the digests are the
     # same
     source.run()
     with open(source_target_path) as f:
         d = f.read()
     self.assertEqual(d, 'dummy')
     # Replace the digest with a dummy
     os.remove(md5_path)
     digests[dataset_fpath] = 'dummy_digest'
     with open(md5_path, 'w') as f:
         json.dump(digests, f)
     # Retry the download, which should now download since the digests
     # differ
     source.run()
     with open(source_target_path) as f:
         d = f.read()
     with open(stash_path) as f:
         e = f.read()
     self.assertEqual(d, e)
     # Resink the source file and check that the generated MD5 digest is
     # stored in identical format
     sink_archive = XnatArchive(project_id=self.DIGEST_SINK_PROJECT,
                                server=SERVER,
                                cache_dir=cache_dir)
     DATASET_NAME = 'sink1'
     sink = sink_archive.sink([study.bound_data_spec(DATASET_NAME)],
                              name='digest_check_sink',
                              study_name=STUDY_NAME)
     sink.inputs.name = 'digest_check_sink'
     sink.inputs.desc = "Tests the generation of MD5 digests"
     sink.inputs.subject_id = self.DIGEST_SINK_SUBJECT
     sink.inputs.visit_id = self.VISIT
     sink.inputs.sink1_path = source_target_path
     sink_fpath = (STUDY_NAME + '_' + DATASET_NAME +
                   nifti_gz_format.extension)
     sink_target_path = os.path.join(
         (self.session_cache(cache_dir,
                             project=self.DIGEST_SINK_PROJECT,
                             subject=(self.DIGEST_SINK_SUBJECT)) +
          XnatArchive.PROCESSED_SUFFIX), sink_fpath)
     sink_md5_path = sink_target_path + XnatArchive.MD5_SUFFIX
     sink.run()
     with open(md5_path) as f:
         source_digests = json.load(f)
     with open(sink_md5_path) as f:
         sink_digests = json.load(f)
     self.assertEqual(
         source_digests[dataset_fpath], sink_digests[sink_fpath],
         ("Source digest ({}) did not equal sink digest ({})".format(
             source_digests[dataset_fpath], sink_digests[sink_fpath])))