Ejemplo n.º 1
0
 def test_special_char_in_scan_name(self):
     """
     Tests whether XNAT source can download files with spaces in their names
     """
     cache_dir = tempfile.mkdtemp()
     archive = XnatArchive(server=SERVER,
                           cache_dir=cache_dir,
                           project_id=self.PROJECT)
     study = DummyStudy('study',
                        archive,
                        LinearRunner('ad'),
                        inputs=[
                            DatasetMatch('source{}'.format(i), dicom_format,
                                         d)
                            for i, d in enumerate(self.DATASETS, start=1)
                        ],
                        subject_ids=[self.SUBJECT],
                        visit_ids=[self.VISIT])
     source = archive.source([
         study.input('source{}'.format(i))
         for i in range(1,
                        len(self.DATASETS) + 1)
     ])
     source.inputs.subject_id = self.SUBJECT
     source.inputs.visit_id = self.VISIT
     workflow = pe.Workflow(self.TEST_NAME, base_dir=self.work_path)
     workflow.add_nodes([source])
     graph = workflow.run()
     result = next(n.result for n in graph.nodes() if n.name == source.name)
     for i, dname in enumerate(self.DATASETS, start=1):
         path = getattr(result.outputs, 'source{}{}'.format(i, PATH_SUFFIX))
         self.assertEqual(os.path.basename(path), dname)
         self.assertTrue(os.path.exists(path))
Ejemplo n.º 2
0
 def test_dicom_match(self):
     study = test_dataset.TestMatchStudy(
         name='test_dicom',
         archive=XnatArchive(project_id='TEST001',
                             server=SERVER,
                             cache_dir=tempfile.mkdtemp()),
         runner=LinearRunner(self.work_dir),
         inputs=test_dataset.TestDicomTagMatch.DICOM_MATCH,
         subject_ids=['DATASET'],
         visit_ids=['DICOMTAGMATCH'])
     phase = study.data('gre_phase')[0]
     mag = study.data('gre_mag')[0]
     self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase')
     self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
Ejemplo n.º 3
0
 def test_archive_roundtrip(self):
     study = DummyStudy(
         self.STUDY_NAME, self.archive, runner=LinearRunner('a_dir'),
         inputs=[DatasetMatch('source1', nifti_gz_format, 'source1'),
                 DatasetMatch('source2', nifti_gz_format, 'source2'),
                 DatasetMatch('source3', nifti_gz_format, 'source3'),
                 DatasetMatch('source4', nifti_gz_format, 'source4')])
     # TODO: Should test out other file formats as well.
     source_files = [study.input(n)
                     for n in ('source1', 'source2', 'source3',
                               'source4')]
     sink_files = [study.bound_data_spec(n)
                   for n in ('sink1', 'sink3', 'sink4')]
     inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                         'inputnode')
     inputnode.inputs.subject_id = self.SUBJECT
     inputnode.inputs.visit_id = self.VISIT
     source = self.archive.source(source_files,
                                  study_name=self.STUDY_NAME)
     sink = self.archive.sink(sink_files, study_name=self.STUDY_NAME)
     sink.inputs.name = 'archive_sink'
     sink.inputs.desc = (
         "A test session created by archive roundtrip unittest")
     # Create workflow connecting them together
     workflow = pe.Workflow('source_sink_unit_test', base_dir=self.work_dir)
     workflow.add_nodes((source, sink))
     workflow.connect(inputnode, 'subject_id', source, 'subject_id')
     workflow.connect(inputnode, 'visit_id', source, 'visit_id')
     workflow.connect(inputnode, 'subject_id', sink, 'subject_id')
     workflow.connect(inputnode, 'visit_id', sink, 'visit_id')
     for source_file in source_files:
         if not source_file.name.endswith('2'):
             source_name = source_file.name
             sink_name = source_name.replace('source', 'sink')
             workflow.connect(
                 source, source_name + PATH_SUFFIX,
                 sink, sink_name + PATH_SUFFIX)
     workflow.run()
     # Check local directory was created properly
     outputs = [
         f for f in sorted(os.listdir(self.session_dir))
         if f != FIELDS_FNAME]
     self.assertEqual(outputs,
                      [self.STUDY_NAME + '_sink1.nii.gz',
                       self.STUDY_NAME + '_sink3.nii.gz',
                       self.STUDY_NAME + '_sink4.nii.gz',
                       'source1.nii.gz', 'source2.nii.gz',
                       'source3.nii.gz', 'source4.nii.gz'])
Ejemplo n.º 4
0
    def test_summary(self):
        study = DummyStudy(
            self.SUMMARY_STUDY_NAME, self.archive, LinearRunner('ad'),
            inputs=[DatasetMatch('source1', nifti_gz_format, 'source1'),
                    DatasetMatch('source2', nifti_gz_format, 'source2'),
                    DatasetMatch('source3', nifti_gz_format, 'source3')])
        # TODO: Should test out other file formats as well.
        source_files = [study.input(n)
                        for n in ('source1', 'source2', 'source3')]
        inputnode = pe.Node(
            IdentityInterface(['subject_id', 'visit_id']), 'inputnode')
        inputnode.inputs.subject_id = self.SUBJECT
        inputnode.inputs.visit_id = self.VISIT
        source = self.archive.source(source_files)
        # Test subject sink
        subject_sink_files = [
            study.bound_data_spec('subject_sink')]
        subject_sink = self.archive.sink(subject_sink_files,
                                         frequency='per_subject',
                                         study_name=self.SUMMARY_STUDY_NAME)
        subject_sink.inputs.name = 'subject_summary'
        subject_sink.inputs.desc = (
            "Tests the sinking of subject-wide datasets")
        # Test visit sink
        visit_sink_files = [study.bound_data_spec('visit_sink')]
        visit_sink = self.archive.sink(visit_sink_files,
                                       frequency='per_visit',
                                       study_name=self.SUMMARY_STUDY_NAME)
        visit_sink.inputs.name = 'visit_summary'
        visit_sink.inputs.desc = (
            "Tests the sinking of visit-wide datasets")
        # Test project sink
        project_sink_files = [
            study.bound_data_spec('project_sink')]
        project_sink = self.archive.sink(project_sink_files,
                                         frequency='per_project',
                                         study_name=self.SUMMARY_STUDY_NAME)

        project_sink.inputs.name = 'project_summary'
        project_sink.inputs.desc = (
            "Tests the sinking of project-wide datasets")
        # Create workflow connecting them together
        workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir)
        workflow.add_nodes((source, subject_sink, visit_sink,
                            project_sink))
        workflow.connect(inputnode, 'subject_id', source, 'subject_id')
        workflow.connect(inputnode, 'visit_id', source, 'visit_id')
        workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id')
        workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id')
        workflow.connect(
            source, 'source1' + PATH_SUFFIX,
            subject_sink, 'subject_sink' + PATH_SUFFIX)
        workflow.connect(
            source, 'source2' + PATH_SUFFIX,
            visit_sink, 'visit_sink' + PATH_SUFFIX)
        workflow.connect(
            source, 'source3' + PATH_SUFFIX,
            project_sink, 'project_sink' + PATH_SUFFIX)
        workflow.run()
        # Check local summary directories were created properly
        subject_dir = self.get_session_dir(frequency='per_subject')
        self.assertEqual(sorted(os.listdir(subject_dir)),
                         [self.SUMMARY_STUDY_NAME + '_subject_sink.nii.gz'])
        visit_dir = self.get_session_dir(frequency='per_visit')
        self.assertEqual(sorted(os.listdir(visit_dir)),
                         [self.SUMMARY_STUDY_NAME + '_visit_sink.nii.gz'])
        project_dir = self.get_session_dir(frequency='per_project')
        self.assertEqual(sorted(os.listdir(project_dir)),
                         [self.SUMMARY_STUDY_NAME + '_project_sink.nii.gz'])
        # Reload the data from the summary directories
        reloadinputnode = pe.Node(IdentityInterface(['subject_id',
                                                     'visit_id']),
                                  'reload_inputnode')
        reloadinputnode.inputs.subject_id = self.SUBJECT
        reloadinputnode.inputs.visit_id = self.VISIT
        reloadsource = self.archive.source(
            (source_files + subject_sink_files + visit_sink_files +
             project_sink_files),
            name='reload_source',
            study_name=self.SUMMARY_STUDY_NAME)
        reloadsink = self.archive.sink(
            [study.bound_data_spec(n)
             for n in ('resink1', 'resink2', 'resink3')],
            study_name=self.SUMMARY_STUDY_NAME)
        reloadsink.inputs.name = 'reload_summary'
        reloadsink.inputs.desc = (
            "Tests the reloading of subject and project summary datasets")
        reloadworkflow = pe.Workflow('reload_summary_unittest',
                                     base_dir=self.work_dir)
        reloadworkflow.connect(reloadinputnode, 'subject_id',
                               reloadsource, 'subject_id')
        reloadworkflow.connect(reloadinputnode, 'visit_id',
                               reloadsource, 'visit_id')
        reloadworkflow.connect(reloadinputnode, 'subject_id',
                               reloadsink, 'subject_id')
        reloadworkflow.connect(reloadinputnode, 'visit_id',
                               reloadsink, 'visit_id')
        reloadworkflow.connect(reloadsource,
                               'subject_sink' + PATH_SUFFIX,
                               reloadsink,
                               'resink1' + PATH_SUFFIX)
        reloadworkflow.connect(reloadsource,
                               'visit_sink' + PATH_SUFFIX,
                               reloadsink,
                               'resink2' + PATH_SUFFIX)
        reloadworkflow.connect(reloadsource,
                               'project_sink' + PATH_SUFFIX,
                               reloadsink,
                               'resink3' + PATH_SUFFIX)
        reloadworkflow.run()
        outputs = [
            f for f in sorted(os.listdir(self.session_dir))
            if f != FIELDS_FNAME]
        self.assertEqual(outputs,
                         [self.SUMMARY_STUDY_NAME + '_resink1.nii.gz',
                          self.SUMMARY_STUDY_NAME + '_resink2.nii.gz',
                          self.SUMMARY_STUDY_NAME + '_resink3.nii.gz',
                          'source1.nii.gz', 'source2.nii.gz',
                          'source3.nii.gz', 'source4.nii.gz'])
Ejemplo n.º 5
0
 def runner(self):
     return LinearRunner(self.work_dir)
Ejemplo n.º 6
0
 def test_digest_check(self):
     """
     Tests check of downloaded digests to see if file needs to be
     redownloaded
     """
     cache_dir = os.path.join(self.base_cache_path, 'digest-check-cache')
     DATASET_NAME = 'source1'
     STUDY_NAME = 'digest_check_study'
     dataset_fpath = DATASET_NAME + nifti_gz_format.extension
     source_target_path = os.path.join(self.session_cache(cache_dir),
                                       dataset_fpath)
     md5_path = source_target_path + XnatArchive.MD5_SUFFIX
     shutil.rmtree(cache_dir, ignore_errors=True)
     os.makedirs(cache_dir)
     archive = XnatArchive(project_id=self.PROJECT,
                           server=SERVER,
                           cache_dir=cache_dir)
     study = DummyStudy(
         STUDY_NAME,
         archive,
         LinearRunner('ad'),
         inputs=[DatasetMatch(DATASET_NAME, nifti_gz_format, DATASET_NAME)])
     source = archive.source([study.input(DATASET_NAME)],
                             name='digest_check_source',
                             study_name=STUDY_NAME)
     source.inputs.subject_id = self.SUBJECT
     source.inputs.visit_id = self.VISIT
     source.run()
     self.assertTrue(os.path.exists(md5_path))
     self.assertTrue(os.path.exists(source_target_path))
     with open(md5_path) as f:
         digests = json.load(f)
     # Stash the downloaded file in a new location and create a dummy
     # file instead
     stash_path = source_target_path + '.stash'
     shutil.move(source_target_path, stash_path)
     with open(source_target_path, 'w') as f:
         f.write('dummy')
     # Run the download, which shouldn't download as the digests are the
     # same
     source.run()
     with open(source_target_path) as f:
         d = f.read()
     self.assertEqual(d, 'dummy')
     # Replace the digest with a dummy
     os.remove(md5_path)
     digests[dataset_fpath] = 'dummy_digest'
     with open(md5_path, 'w') as f:
         json.dump(digests, f)
     # Retry the download, which should now download since the digests
     # differ
     source.run()
     with open(source_target_path) as f:
         d = f.read()
     with open(stash_path) as f:
         e = f.read()
     self.assertEqual(d, e)
     # Resink the source file and check that the generated MD5 digest is
     # stored in identical format
     sink_archive = XnatArchive(project_id=self.DIGEST_SINK_PROJECT,
                                server=SERVER,
                                cache_dir=cache_dir)
     DATASET_NAME = 'sink1'
     sink = sink_archive.sink([study.bound_data_spec(DATASET_NAME)],
                              name='digest_check_sink',
                              study_name=STUDY_NAME)
     sink.inputs.name = 'digest_check_sink'
     sink.inputs.desc = "Tests the generation of MD5 digests"
     sink.inputs.subject_id = self.DIGEST_SINK_SUBJECT
     sink.inputs.visit_id = self.VISIT
     sink.inputs.sink1_path = source_target_path
     sink_fpath = (STUDY_NAME + '_' + DATASET_NAME +
                   nifti_gz_format.extension)
     sink_target_path = os.path.join(
         (self.session_cache(cache_dir,
                             project=self.DIGEST_SINK_PROJECT,
                             subject=(self.DIGEST_SINK_SUBJECT)) +
          XnatArchive.PROCESSED_SUFFIX), sink_fpath)
     sink_md5_path = sink_target_path + XnatArchive.MD5_SUFFIX
     sink.run()
     with open(md5_path) as f:
         source_digests = json.load(f)
     with open(sink_md5_path) as f:
         sink_digests = json.load(f)
     self.assertEqual(
         source_digests[dataset_fpath], sink_digests[sink_fpath],
         ("Source digest ({}) did not equal sink digest ({})".format(
             source_digests[dataset_fpath], sink_digests[sink_fpath])))
Ejemplo n.º 7
0
    def test_delayed_download(self):
        """
        Tests handling of race conditions where separate processes attempt to
        cache the same dataset
        """
        cache_dir = os.path.join(self.base_cache_path,
                                 'delayed-download-cache')
        DATASET_NAME = 'source1'
        target_path = os.path.join(self.session_cache(cache_dir),
                                   DATASET_NAME + nifti_gz_format.extension)
        tmp_dir = target_path + '.download'
        shutil.rmtree(cache_dir, ignore_errors=True)
        os.makedirs(cache_dir)
        archive = XnatArchive(server=SERVER,
                              cache_dir=cache_dir,
                              project_id=self.PROJECT)
        study = DummyStudy(
            self.STUDY_NAME,
            archive,
            LinearRunner('ad'),
            inputs=[DatasetMatch(DATASET_NAME, nifti_gz_format, DATASET_NAME)])
        source = archive.source([study.input(DATASET_NAME)],
                                name='delayed_source',
                                study_name='delayed_study')
        source.inputs.subject_id = self.SUBJECT
        source.inputs.visit_id = self.VISIT
        result1 = source.run()
        source1_path = result1.outputs.source1_path
        self.assertTrue(os.path.exists(source1_path))
        self.assertEqual(
            source1_path, target_path,
            "Output file path '{}' not equal to target path '{}'".format(
                source1_path, target_path))
        # Clear cache to start again
        shutil.rmtree(cache_dir, ignore_errors=True)
        # Create tmp_dir before running interface, this time should wait for 1
        # second, check to see that the session hasn't been created and then
        # clear it and redownload the dataset.
        os.makedirs(tmp_dir)
        source.inputs.race_cond_delay = 1
        result2 = source.run()
        source1_path = result2.outputs.source1_path
        # Clear cache to start again
        shutil.rmtree(cache_dir, ignore_errors=True)
        # Create tmp_dir before running interface, this time should wait for 1
        # second, check to see that the session hasn't been created and then
        # clear it and redownload the dataset.
        internal_dir = os.path.join(tmp_dir, 'internal')
        deleted_tmp_dir = tmp_dir + '.deleted'

        def simulate_download():
            "Simulates a download in a separate process"
            os.makedirs(internal_dir)
            time.sleep(5)
            # Modify a file in the temp dir to make the source download keep
            # waiting
            logger.info('Updating simulated download directory')
            with open(os.path.join(internal_dir, 'download'), 'a') as f:
                f.write('downloading')
            time.sleep(10)
            # Simulate the finalising of the download by copying the previously
            # downloaded file into place and deleting the temp dir.
            logger.info('Finalising simulated download')
            with open(target_path, 'a') as f:
                f.write('simulated')
            shutil.move(tmp_dir, deleted_tmp_dir)

        source.inputs.race_cond_delay = 10
        p = Process(target=simulate_download)
        p.start()  # Start the simulated download in separate process
        time.sleep(1)
        source.run()  # Run the local download
        p.join()
        with open(os.path.join(deleted_tmp_dir, 'internal', 'download')) as f:
            d = f.read()
        self.assertEqual(d, 'downloading')
        with open(target_path) as f:
            d = f.read()
        self.assertEqual(d, 'simulated')
Ejemplo n.º 8
0
    def test_summary(self):
        # Create working dirs
        # Create XnatSource node
        archive = XnatArchive(server=SERVER,
                              cache_dir=self.archive_cache_dir,
                              project_id=self.PROJECT)
        study = DummyStudy(self.SUMMARY_STUDY_NAME,
                           archive,
                           LinearRunner('ad'),
                           inputs=[
                               DatasetMatch('source1', nifti_gz_format,
                                            'source1'),
                               DatasetMatch('source2', nifti_gz_format,
                                            'source2'),
                               DatasetMatch('source3', nifti_gz_format,
                                            'source3')
                           ])
        # TODO: Should test out other file formats as well.
        source_files = [
            study.input(n) for n in ('source1', 'source2', 'source3')
        ]
        inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                            'inputnode')
        inputnode.inputs.subject_id = self.SUBJECT
        inputnode.inputs.visit_id = self.VISIT
        source = archive.source(source_files)
        subject_sink_files = [study.bound_data_spec('subject_sink')]
        subject_sink = archive.sink(subject_sink_files,
                                    frequency='per_subject',
                                    study_name=self.SUMMARY_STUDY_NAME)
        subject_sink.inputs.name = 'subject_summary'
        subject_sink.inputs.desc = (
            "Tests the sinking of subject-wide datasets")
        # Test visit sink
        visit_sink_files = [study.bound_data_spec('visit_sink')]
        visit_sink = archive.sink(visit_sink_files,
                                  frequency='per_visit',
                                  study_name=self.SUMMARY_STUDY_NAME)
        visit_sink.inputs.name = 'visit_summary'
        visit_sink.inputs.desc = ("Tests the sinking of visit-wide datasets")
        # Test project sink
        project_sink_files = [study.bound_data_spec('project_sink')]
        project_sink = archive.sink(project_sink_files,
                                    frequency='per_project',
                                    study_name=self.SUMMARY_STUDY_NAME)

        project_sink.inputs.name = 'project_summary'
        project_sink.inputs.desc = (
            "Tests the sinking of project-wide datasets")
        # Create workflow connecting them together
        workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir)
        workflow.add_nodes((source, subject_sink, visit_sink, project_sink))
        workflow.connect(inputnode, 'subject_id', source, 'subject_id')
        workflow.connect(inputnode, 'visit_id', source, 'visit_id')
        workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id')
        workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id')
        workflow.connect(source, 'source1' + PATH_SUFFIX, subject_sink,
                         'subject_sink' + PATH_SUFFIX)
        workflow.connect(source, 'source2' + PATH_SUFFIX, visit_sink,
                         'visit_sink' + PATH_SUFFIX)
        workflow.connect(source, 'source3' + PATH_SUFFIX, project_sink,
                         'project_sink' + PATH_SUFFIX)
        workflow.run()
        with self._connect() as mbi_xnat:
            # Check subject summary directories were created properly in cache
            expected_subj_datasets = [
                self.SUMMARY_STUDY_NAME + '_subject_sink'
            ]
            subject_dir = os.path.join(
                self.archive_cache_dir, self.PROJECT,
                '_'.join((self.PROJECT, self.SUBJECT)), '_'.join(
                    (self.PROJECT, self.SUBJECT, XnatArchive.SUMMARY_NAME)))
            self.assertEqual(filter_md5_fnames(os.listdir(subject_dir)), [
                d + nifti_gz_format.extension for d in expected_subj_datasets
            ])
            # and on XNAT
            subject_dataset_names = mbi_xnat.projects[
                self.PROJECT].experiments['_'.join(
                    (self.PROJECT, self.SUBJECT,
                     XnatArchive.SUMMARY_NAME))].scans.keys()
            self.assertEqual(expected_subj_datasets, subject_dataset_names)
            # Check visit summary directories were created properly in
            # cache
            expected_visit_datasets = [self.SUMMARY_STUDY_NAME + '_visit_sink']
            visit_dir = os.path.join(
                self.archive_cache_dir, self.PROJECT,
                self.PROJECT + '_' + XnatArchive.SUMMARY_NAME,
                (self.PROJECT + '_' + XnatArchive.SUMMARY_NAME + '_' +
                 self.VISIT))
            self.assertEqual(filter_md5_fnames(os.listdir(visit_dir)), [
                d + nifti_gz_format.extension for d in expected_visit_datasets
            ])
            # and on XNAT
            visit_dataset_names = mbi_xnat.projects[self.PROJECT].experiments[
                '{}_{}_{}'.format(self.PROJECT, XnatArchive.SUMMARY_NAME,
                                  self.VISIT)].scans.keys()
            self.assertEqual(expected_visit_datasets, visit_dataset_names)
            # Check project summary directories were created properly in cache
            expected_proj_datasets = [
                self.SUMMARY_STUDY_NAME + '_project_sink'
            ]
            project_dir = os.path.join(
                self.archive_cache_dir, self.PROJECT,
                self.PROJECT + '_' + XnatArchive.SUMMARY_NAME,
                self.PROJECT + '_' + XnatArchive.SUMMARY_NAME + '_' +
                XnatArchive.SUMMARY_NAME)
            self.assertEqual(filter_md5_fnames(os.listdir(project_dir)), [
                d + nifti_gz_format.extension for d in expected_proj_datasets
            ])
            # and on XNAT
            project_dataset_names = mbi_xnat.projects[
                self.PROJECT].experiments['{}_{sum}_{sum}'.format(
                    self.PROJECT, sum=XnatArchive.SUMMARY_NAME)].scans.keys()
            self.assertEqual(expected_proj_datasets, project_dataset_names)
        # Reload the data from the summary directories
        reloadinputnode = pe.Node(
            IdentityInterface(['subject_id', 'visit_id']), 'reload_inputnode')
        reloadinputnode.inputs.subject_id = self.SUBJECT
        reloadinputnode.inputs.visit_id = self.VISIT
        reloadsource = archive.source((source_files + subject_sink_files +
                                       visit_sink_files + project_sink_files),
                                      name='reload_source',
                                      study_name=self.SUMMARY_STUDY_NAME)
        reloadsink = archive.sink([
            study.bound_data_spec(n) for n in ('resink1', 'resink2', 'resink3')
        ],
                                  study_name=self.SUMMARY_STUDY_NAME)
        reloadsink.inputs.name = 'reload_summary'
        reloadsink.inputs.desc = (
            "Tests the reloading of subject and project summary datasets")
        reloadworkflow = pe.Workflow('reload_summary_unittest',
                                     base_dir=self.work_dir)
        reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsource,
                               'subject_id')
        reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsource,
                               'visit_id')
        reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsink,
                               'subject_id')
        reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsink,
                               'visit_id')
        reloadworkflow.connect(reloadsource, 'subject_sink' + PATH_SUFFIX,
                               reloadsink, 'resink1' + PATH_SUFFIX)
        reloadworkflow.connect(reloadsource, 'visit_sink' + PATH_SUFFIX,
                               reloadsink, 'resink2' + PATH_SUFFIX)
        reloadworkflow.connect(reloadsource, 'project_sink' + PATH_SUFFIX,
                               reloadsink, 'resink3' + PATH_SUFFIX)
        reloadworkflow.run()
        # Check that the datasets
        self.assertEqual(
            filter_md5_fnames(os.listdir(self.proc_session_cache())), [
                self.SUMMARY_STUDY_NAME + '_resink1.nii.gz',
                self.SUMMARY_STUDY_NAME + '_resink2.nii.gz',
                self.SUMMARY_STUDY_NAME + '_resink3.nii.gz'
            ])
        # and on XNAT
        with self._connect() as mbi_xnat:
            resinked_dataset_names = mbi_xnat.projects[
                self.PROJECT].experiments[
                    self.session_label() +
                    XnatArchive.PROCESSED_SUFFIX].scans.keys()
            self.assertEqual(sorted(resinked_dataset_names), [
                self.SUMMARY_STUDY_NAME + '_resink1', self.SUMMARY_STUDY_NAME +
                '_resink2', self.SUMMARY_STUDY_NAME + '_resink3'
            ])
Ejemplo n.º 9
0
    def test_archive_roundtrip(self):

        # Create working dirs
        # Create DarisSource node
        archive = XnatArchive(project_id=self.PROJECT,
                              server=SERVER,
                              cache_dir=self.archive_cache_dir)
        study = DummyStudy(self.STUDY_NAME,
                           archive,
                           runner=LinearRunner('a_dir'),
                           inputs=[
                               DatasetMatch('source1', nifti_gz_format,
                                            'source1'),
                               DatasetMatch('source2', nifti_gz_format,
                                            'source2'),
                               DatasetMatch('source3', nifti_gz_format,
                                            'source3'),
                               DatasetMatch('source4', nifti_gz_format,
                                            'source4')
                           ])
        # TODO: Should test out other file formats as well.
        source_files = [
            study.input(n)
            for n in ('source1', 'source2', 'source3', 'source4')
        ]
        sink_files = [
            study.bound_data_spec(n) for n in ('sink1', 'sink3', 'sink4')
        ]
        inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                            'inputnode')
        inputnode.inputs.subject_id = str(self.SUBJECT)
        inputnode.inputs.visit_id = str(self.VISIT)
        source = archive.source(source_files, study_name=self.STUDY_NAME)
        sink = archive.sink(sink_files, study_name=self.STUDY_NAME)
        sink.inputs.name = 'archive-roundtrip-unittest'
        sink.inputs.desc = (
            "A test session created by archive roundtrip unittest")
        # Create workflow connecting them together
        workflow = pe.Workflow('source-sink-unit-test', base_dir=self.work_dir)
        workflow.add_nodes((source, sink))
        workflow.connect(inputnode, 'subject_id', source, 'subject_id')
        workflow.connect(inputnode, 'visit_id', source, 'visit_id')
        workflow.connect(inputnode, 'subject_id', sink, 'subject_id')
        workflow.connect(inputnode, 'visit_id', sink, 'visit_id')
        for source_file in source_files:
            if source_file.name != 'source2':
                sink_name = source_file.name.replace('source', 'sink')
                workflow.connect(source, source_file.name + PATH_SUFFIX, sink,
                                 sink_name + PATH_SUFFIX)
        workflow.run()
        # Check cache was created properly
        self.assertEqual(filter_md5_fnames(os.listdir(self.session_cache())), [
            'source1.nii.gz', 'source2.nii.gz', 'source3.nii.gz',
            'source4.nii.gz'
        ])
        expected_sink_datasets = [
            self.STUDY_NAME + '_sink1', self.STUDY_NAME + '_sink3',
            self.STUDY_NAME + '_sink4'
        ]
        self.assertEqual(
            filter_md5_fnames(os.listdir(self.proc_session_cache())),
            [d + nifti_gz_format.extension for d in expected_sink_datasets])
        with self._connect() as mbi_xnat:
            dataset_names = mbi_xnat.experiments[
                self.session_label() +
                XnatArchive.PROCESSED_SUFFIX].scans.keys()
        self.assertEqual(sorted(dataset_names), expected_sink_datasets)