コード例 #1
0
ファイル: test_source_sink.py プロジェクト: MonashBI/arcana
 def test_fields_roundtrip(self):
     repository = XnatRepo(server=SERVER, cache_dir=self.cache_dir)
     dataset = repository.dataset(self.project)
     analysis = DummyAnalysis(
         self.STUDY_NAME,
         dataset=dataset,
         processor=SingleProc('a_dir'),
         inputs=[FilesetFilter('source1', 'source1', text_format)])
     fields = ['field{}'.format(i) for i in range(1, 4)]
     dummy_pipeline = analysis.dummy_pipeline()
     dummy_pipeline.cap()
     sink = pe.Node(RepositorySink(
         (analysis.bound_spec(f).slice for f in fields), dummy_pipeline),
                    name='fields_sink')
     sink.inputs.field1_field = field1 = 1
     sink.inputs.field2_field = field2 = 2.0
     sink.inputs.field3_field = field3 = str('3')
     sink.inputs.subject_id = self.SUBJECT
     sink.inputs.visit_id = self.VISIT
     sink.inputs.desc = "Test sink of fields"
     sink.inputs.name = 'test_sink'
     sink.run()
     source = pe.Node(RepositorySource(
         analysis.bound_spec(f).slice for f in fields),
                      name='fields_source')
     source.inputs.visit_id = self.VISIT
     source.inputs.subject_id = self.SUBJECT
     source.inputs.desc = "Test source of fields"
     source.inputs.name = 'test_source'
     results = source.run()
     self.assertEqual(results.outputs.field1_field, field1)
     self.assertEqual(results.outputs.field2_field, field2)
     self.assertEqual(results.outputs.field3_field, field3)
コード例 #2
0
ファイル: test_multi_subj.py プロジェクト: MonashBI/arcana
 def test_cache_download(self):
     repository = XnatRepo(server=SERVER, cache_dir=tempfile.mkdtemp())
     dataset = repository.dataset(self.project)
     analysis = self.create_analysis(
         TestAnalysis,
         'cache_download',
         inputs=[
             FilesetFilter('fileset1', 'fileset1', text_format),
             FilesetFilter('fileset3', 'fileset3', text_format)
         ],
         dataset=dataset)
     analysis.cache_inputs()
     for subject_id, visits in list(self.STRUCTURE.items()):
         subj_dir = op.join(repository.cache_dir, self.project,
                            '{}_{}'.format(self.project, subject_id))
         for visit_id in visits:
             sess_dir = op.join(
                 subj_dir, '{}_{}_{}'.format(self.project, subject_id,
                                             visit_id))
             for inpt in analysis.inputs:
                 self.assertTrue(
                     op.exists(
                         op.join(sess_dir, inpt.name + '-' + inpt.name)))
コード例 #3
0
ファイル: test_source_sink.py プロジェクト: MonashBI/arcana
    def test_repository_roundtrip(self):

        # Create working dirs
        # Create DarisSource node
        repository = XnatRepo(server=SERVER, cache_dir=self.cache_dir)
        dataset = repository.dataset(self.project)
        analysis = DummyAnalysis(self.STUDY_NAME,
                                 dataset=dataset,
                                 processor=SingleProc('a_dir'),
                                 inputs=[
                                     FilesetFilter('source1', 'source1',
                                                   text_format),
                                     FilesetFilter('source2', 'source2',
                                                   text_format),
                                     FilesetFilter('source3', 'source3',
                                                   text_format),
                                     FilesetFilter('source4', 'source4',
                                                   text_format)
                                 ])
        # TODO: Should test out other file formats as well.
        source_files = ['source1', 'source2', 'source3', 'source4']
        sink_files = ['sink1', 'sink3', 'sink4']
        inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                            'inputnode')
        inputnode.inputs.subject_id = str(self.SUBJECT)
        inputnode.inputs.visit_id = str(self.VISIT)
        source = pe.Node(RepositorySource(
            analysis.bound_spec(f).slice for f in source_files),
                         name='source')
        dummy_pipeline = analysis.dummy_pipeline()
        dummy_pipeline.cap()
        sink = pe.Node(RepositorySink((analysis.bound_spec(f).slice
                                       for f in sink_files), dummy_pipeline),
                       name='sink')
        sink.inputs.name = 'repository-roundtrip-unittest'
        sink.inputs.desc = (
            "A test session created by repository roundtrip unittest")
        # Create workflow connecting them together
        workflow = pe.Workflow('source-sink-unit-test', base_dir=self.work_dir)
        workflow.add_nodes((source, sink))
        workflow.connect(inputnode, 'subject_id', source, 'subject_id')
        workflow.connect(inputnode, 'visit_id', source, 'visit_id')
        workflow.connect(inputnode, 'subject_id', sink, 'subject_id')
        workflow.connect(inputnode, 'visit_id', sink, 'visit_id')
        for source_name in source_files:
            if source_name != 'source2':
                sink_name = source_name.replace('source', 'sink')
                workflow.connect(source, source_name + PATH_SUFFIX, sink,
                                 sink_name + PATH_SUFFIX)
        workflow.run()
        # Check cache was created properly
        self.assertEqual(filter_scans(os.listdir(self.session_cache())), [
            'source1-source1', 'source2-source2', 'source3-source3',
            'source4-source4'
        ])
        expected_sink_filesets = ['sink1', 'sink3', 'sink4']
        self.assertEqual(
            filter_scans(
                os.listdir(self.session_cache(from_analysis=self.STUDY_NAME))),
            [(e + '-' + e) for e in expected_sink_filesets])
        with self._connect() as login:
            fileset_names = filter_scans(login.experiments[self.session_label(
                from_analysis=self.STUDY_NAME)].scans.keys())
        self.assertEqual(fileset_names, expected_sink_filesets)
コード例 #4
0
ファイル: test_source_sink.py プロジェクト: MonashBI/arcana
 def test_summary(self):
     # Create working dirs
     # Create XnatSource node
     repository = XnatRepo(server=SERVER, cache_dir=self.cache_dir)
     analysis = DummyAnalysis(self.SUMMARY_STUDY_NAME,
                              repository.dataset(self.project),
                              SingleProc('ad'),
                              inputs=[
                                  FilesetFilter('source1', 'source1',
                                                text_format),
                                  FilesetFilter('source2', 'source2',
                                                text_format),
                                  FilesetFilter('source3', 'source3',
                                                text_format)
                              ])
     # TODO: Should test out other file formats as well.
     source_files = ['source1', 'source2', 'source3']
     inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']),
                         'inputnode')
     inputnode.inputs.subject_id = self.SUBJECT
     inputnode.inputs.visit_id = self.VISIT
     source = pe.Node(RepositorySource(
         [analysis.bound_spec(f).slice for f in source_files]),
                      name='source')
     subject_sink_files = ['subject_sink']
     dummy_pipeline = analysis.dummy_pipeline()
     dummy_pipeline.cap()
     subject_sink = pe.Node(RepositorySink(
         [analysis.bound_spec(f).slice for f in subject_sink_files],
         dummy_pipeline),
                            name='subject_sink')
     subject_sink.inputs.name = 'subject_summary'
     subject_sink.inputs.desc = (
         "Tests the sinking of subject-wide filesets")
     # Test visit sink
     visit_sink_files = ['visit_sink']
     visit_sink = pe.Node(RepositorySink(
         [analysis.bound_spec(f).slice for f in visit_sink_files],
         dummy_pipeline),
                          name='visit_sink')
     visit_sink.inputs.name = 'visit_summary'
     visit_sink.inputs.desc = ("Tests the sinking of visit-wide filesets")
     # Test project sink
     analysis_sink_files = ['analysis_sink']
     analysis_sink = pe.Node(RepositorySink(
         [analysis.bound_spec(f).slice for f in analysis_sink_files],
         dummy_pipeline),
                             name='analysis_sink')
     analysis_sink.inputs.name = 'project_summary'
     analysis_sink.inputs.desc = (
         "Tests the sinking of project-wide filesets")
     # Create workflow connecting them together
     workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir)
     workflow.add_nodes((source, subject_sink, visit_sink, analysis_sink))
     workflow.connect(inputnode, 'subject_id', source, 'subject_id')
     workflow.connect(inputnode, 'visit_id', source, 'visit_id')
     workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id')
     workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id')
     workflow.connect(source, 'source1' + PATH_SUFFIX, subject_sink,
                      'subject_sink' + PATH_SUFFIX)
     workflow.connect(source, 'source2' + PATH_SUFFIX, visit_sink,
                      'visit_sink' + PATH_SUFFIX)
     workflow.connect(source, 'source3' + PATH_SUFFIX, analysis_sink,
                      'analysis_sink' + PATH_SUFFIX)
     workflow.run()
     analysis.clear_caches()  # Refreshed cached repository tree object
     with self._connect() as login:
         # Check subject summary directories were created properly in cache
         expected_subj_filesets = ['subject_sink']
         subject_dir = self.session_cache(
             visit=XnatRepo.SUMMARY_NAME,
             from_analysis=self.SUMMARY_STUDY_NAME)
         self.assertEqual(filter_scans(os.listdir(subject_dir)),
                          [(e + '-' + e) for e in expected_subj_filesets])
         # and on XNAT
         subject_fileset_names = filter_scans(
             login.projects[self.project].experiments[self.session_label(
                 visit=XnatRepo.SUMMARY_NAME,
                 from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys())
         self.assertEqual(expected_subj_filesets, subject_fileset_names)
         # Check visit summary directories were created properly in
         # cache
         expected_visit_filesets = ['visit_sink']
         visit_dir = self.session_cache(
             subject=XnatRepo.SUMMARY_NAME,
             from_analysis=self.SUMMARY_STUDY_NAME)
         self.assertEqual(filter_scans(os.listdir(visit_dir)),
                          [(e + '-' + e) for e in expected_visit_filesets])
         # and on XNAT
         visit_fileset_names = filter_scans(
             login.projects[self.project].experiments[self.session_label(
                 subject=XnatRepo.SUMMARY_NAME,
                 from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys())
         self.assertEqual(expected_visit_filesets, visit_fileset_names)
         # Check project summary directories were created properly in cache
         expected_proj_filesets = ['analysis_sink']
         project_dir = self.session_cache(
             subject=XnatRepo.SUMMARY_NAME,
             visit=XnatRepo.SUMMARY_NAME,
             from_analysis=self.SUMMARY_STUDY_NAME)
         self.assertEqual(filter_scans(os.listdir(project_dir)),
                          [(e + '-' + e) for e in expected_proj_filesets])
         # and on XNAT
         project_fileset_names = filter_scans(
             login.projects[self.project].experiments[self.session_label(
                 subject=XnatRepo.SUMMARY_NAME,
                 visit=XnatRepo.SUMMARY_NAME,
                 from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys())
         self.assertEqual(expected_proj_filesets, project_fileset_names)
     # Reload the data from the summary directories
     reloadinputnode = pe.Node(
         IdentityInterface(['subject_id', 'visit_id']), 'reload_inputnode')
     reloadinputnode.inputs.subject_id = self.SUBJECT
     reloadinputnode.inputs.visit_id = self.VISIT
     reloadsource_per_subject = pe.Node(RepositorySource(
         analysis.bound_spec(f).slice for f in subject_sink_files),
                                        name='reload_source_per_subject')
     reloadsource_per_visit = pe.Node(RepositorySource(
         analysis.bound_spec(f).slice for f in visit_sink_files),
                                      name='reload_source_per_visit')
     reloadsource_per_dataset = pe.Node(RepositorySource(
         analysis.bound_spec(f).slice for f in analysis_sink_files),
                                        name='reload_source_per_dataset')
     reloadsink = pe.Node(RepositorySink(
         (analysis.bound_spec(f).slice
          for f in ['resink1', 'resink2', 'resink3']), dummy_pipeline),
                          name='reload_sink')
     reloadsink.inputs.name = 'reload_summary'
     reloadsink.inputs.desc = (
         "Tests the reloading of subject and project summary filesets")
     reloadworkflow = pe.Workflow('reload_summary_unittest',
                                  base_dir=self.work_dir)
     for node in (reloadsource_per_subject, reloadsource_per_visit,
                  reloadsource_per_dataset, reloadsink):
         for iterator in ('subject_id', 'visit_id'):
             reloadworkflow.connect(reloadinputnode, iterator, node,
                                    iterator)
     reloadworkflow.connect(reloadsource_per_subject,
                            'subject_sink' + PATH_SUFFIX, reloadsink,
                            'resink1' + PATH_SUFFIX)
     reloadworkflow.connect(reloadsource_per_visit,
                            'visit_sink' + PATH_SUFFIX, reloadsink,
                            'resink2' + PATH_SUFFIX)
     reloadworkflow.connect(reloadsource_per_dataset,
                            'analysis_sink' + PATH_SUFFIX, reloadsink,
                            'resink3' + PATH_SUFFIX)
     reloadworkflow.run()
     # Check that the filesets
     self.assertEqual(
         filter_scans(
             os.listdir(
                 self.session_cache(
                     from_analysis=self.SUMMARY_STUDY_NAME))),
         ['resink1-resink1', 'resink2-resink2', 'resink3-resink3'])
     # and on XNAT
     with self._connect() as login:
         resinked_fileset_names = filter_scans(
             login.projects[self.project].experiments[self.session_label(
                 from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys())
         self.assertEqual(sorted(resinked_fileset_names),
                          ['resink1', 'resink2', 'resink3'])
コード例 #5
0
ファイル: test_source_sink.py プロジェクト: MonashBI/arcana
 def test_checksums(self):
     """
     Tests check of downloaded checksums to see if file needs to be
     redownloaded
     """
     cache_dir = op.join(self.work_dir, 'cache-checksum-check')
     DATASET_NAME = 'source1'
     STUDY_NAME = 'checksum_check_analysis'
     fileset_fname = DATASET_NAME + text_format.extension
     source_target_path = op.join(self.session_cache(cache_dir),
                                  DATASET_NAME + '-' + DATASET_NAME)
     md5_path = source_target_path + XnatRepo.MD5_SUFFIX
     source_target_fpath = op.join(source_target_path, fileset_fname)
     shutil.rmtree(cache_dir, ignore_errors=True)
     os.makedirs(cache_dir)
     source_repository = XnatRepo(server=SERVER, cache_dir=cache_dir)
     source_dataset = source_repository.dataset(self.project)
     sink_repository = XnatRepo(server=SERVER, cache_dir=cache_dir)
     sink_dataset = sink_repository.dataset(self.checksum_sink_project,
                                            subject_ids=['SUBJECT'],
                                            visit_ids=['VISIT'],
                                            fill_tree=True)
     analysis = DummyAnalysis(STUDY_NAME,
                              dataset=sink_dataset,
                              processor=SingleProc('ad'),
                              inputs=[
                                  FilesetFilter(DATASET_NAME,
                                                DATASET_NAME,
                                                text_format,
                                                dataset=source_dataset)
                              ])
     source = pe.Node(RepositorySource(
         [analysis.bound_spec(DATASET_NAME).slice]),
                      name='checksum_check_source')
     source.inputs.subject_id = self.SUBJECT
     source.inputs.visit_id = self.VISIT
     source.run()
     self.assertTrue(op.exists(md5_path))
     self.assertTrue(op.exists(source_target_fpath))
     with open(md5_path) as f:
         checksums = json.load(f)
     # Stash the downloaded file in a new location and create a dummy
     # file instead
     stash_path = source_target_path + '.stash'
     shutil.move(source_target_path, stash_path)
     os.mkdir(source_target_path)
     with open(source_target_fpath, 'w') as f:
         f.write('dummy')
     # Run the download, which shouldn't download as the checksums are the
     # same
     source.run()
     with open(source_target_fpath) as f:
         d = f.read()
     self.assertEqual(d, 'dummy')
     # Replace the checksum with a dummy
     os.remove(md5_path)
     checksums['.'] = 'dummy_checksum'
     with open(md5_path, 'w', **JSON_ENCODING) as f:
         json.dump(checksums, f, indent=2)
     # Retry the download, which should now download since the checksums
     # differ
     source.run()
     with open(source_target_fpath) as f:
         d = f.read()
     with open(op.join(stash_path, fileset_fname)) as f:
         e = f.read()
     self.assertEqual(d, e)
     # Resink the source file and check that the generated MD5 checksum is
     # stored in identical format
     DATASET_NAME = 'sink1'
     dummy_pipeline = analysis.dummy_pipeline()
     dummy_pipeline.cap()
     sink = pe.Node(RepositorySink(
         [analysis.bound_spec(DATASET_NAME).slice], dummy_pipeline),
                    name='checksum_check_sink')
     sink.inputs.name = 'checksum_check_sink'
     sink.inputs.desc = "Tests the generation of MD5 checksums"
     sink.inputs.subject_id = self.SUBJECT
     sink.inputs.visit_id = self.VISIT
     sink.inputs.sink1_path = source_target_fpath
     sink_target_path = op.join(
         self.session_cache(cache_dir,
                            project=self.checksum_sink_project,
                            subject=(self.SUBJECT),
                            from_analysis=STUDY_NAME),
         DATASET_NAME + '-' + DATASET_NAME)
     sink_md5_path = sink_target_path + XnatRepo.MD5_SUFFIX
     sink.run()
     with open(md5_path) as f:
         source_checksums = json.load(f)
     with open(sink_md5_path) as f:
         sink_checksums = json.load(f)
     self.assertEqual(
         source_checksums, sink_checksums,
         ("Source checksum ({}) did not equal sink checksum ({})".format(
             source_checksums, sink_checksums)))
コード例 #6
0
ファイル: test_source_sink.py プロジェクト: MonashBI/arcana
    def test_delayed_download(self):
        """
        Tests handling of race conditions where separate processes attempt to
        cache the same fileset
        """
        cache_dir = op.join(self.work_dir, 'cache-delayed-download')
        DATASET_NAME = 'source1'
        target_path = op.join(self.session_cache(cache_dir), DATASET_NAME,
                              DATASET_NAME + text_format.extension)
        tmp_dir = target_path + '.download'
        shutil.rmtree(cache_dir, ignore_errors=True)
        os.makedirs(cache_dir)
        repository = XnatRepo(server=SERVER, cache_dir=cache_dir)
        dataset = repository.dataset(self.project)
        analysis = DummyAnalysis(
            self.STUDY_NAME,
            dataset,
            SingleProc('ad'),
            inputs=[FilesetFilter(DATASET_NAME, DATASET_NAME, text_format)])
        source = pe.Node(RepositorySource(
            [analysis.bound_spec(DATASET_NAME).slice]),
                         name='delayed_source')
        source.inputs.subject_id = self.SUBJECT
        source.inputs.visit_id = self.VISIT
        result1 = source.run()
        source1_path = result1.outputs.source1_path
        self.assertTrue(op.exists(source1_path))
        self.assertEqual(
            source1_path, target_path,
            "Output file path '{}' not equal to target path '{}'".format(
                source1_path, target_path))
        # Clear cache to start again
        shutil.rmtree(cache_dir, ignore_errors=True)
        # Create tmp_dir before running interface, this time should wait for 1
        # second, check to see that the session hasn't been created and then
        # clear it and redownload the fileset.
        os.makedirs(tmp_dir)
        source.inputs.race_cond_delay = 1
        result2 = source.run()
        source1_path = result2.outputs.source1_path
        # Clear cache to start again
        shutil.rmtree(cache_dir, ignore_errors=True)
        # Create tmp_dir before running interface, this time should wait for 1
        # second, check to see that the session hasn't been created and then
        # clear it and redownload the fileset.
        internal_dir = op.join(tmp_dir, 'internal')
        deleted_tmp_dir = tmp_dir + '.deleted'

        def simulate_download():
            "Simulates a download in a separate process"
            os.makedirs(internal_dir)
            time.sleep(5)
            # Modify a file in the temp dir to make the source download keep
            # waiting
            logger.info('Updating simulated download directory')
            with open(op.join(internal_dir, 'download'), 'a') as f:
                f.write('downloading')
            time.sleep(10)
            # Simulate the finalising of the download by copying the previously
            # downloaded file into place and deleting the temp dir.
            logger.info('Finalising simulated download')
            with open(target_path, 'a') as f:
                f.write('simulated')
            shutil.move(tmp_dir, deleted_tmp_dir)

        source.inputs.race_cond_delay = 10
        p = Process(target=simulate_download)
        p.start()  # Start the simulated download in separate process
        time.sleep(1)
        source.run()  # Run the local download
        p.join()
        with open(op.join(deleted_tmp_dir, 'internal', 'download')) as f:
            d = f.read()
        self.assertEqual(d, 'downloading')
        with open(target_path) as f:
            d = f.read()
        self.assertEqual(d, 'simulated')