def test_cache_download(self): archive = XnatArchive(project_id=self.project_id, server=SERVER, cache_dir=tempfile.mkdtemp()) study = self.create_study(TestStudy, 'cache_download', inputs=[ DatasetMatch('dataset1', mrtrix_format, 'dataset1'), DatasetMatch('dataset2', mrtrix_format, 'dataset2'), DatasetMatch('dataset3', mrtrix_format, 'dataset3'), DatasetMatch('dataset5', mrtrix_format, 'dataset5') ], archive=archive) study.cache_inputs() for subject_id in self.SUBJECTS: for inpt in study.inputs: self.assertTrue( os.path.exists( os.path.join( archive.cache_dir, self.PROJECT, '{}_{}'.format(self.PROJECT, subject_id), '{}_{}_{}'.format(self.PROJECT, subject_id, self.VISITS[0]), inpt.fname())))
def test_derivable(self): # Test vanilla study study = self.create_study( TestDerivableStudy, 'study', inputs=[DatasetMatch('required', text_format, 'required')]) self.assertTrue(study.bound_data_spec('derivable').derivable) self.assertTrue(study.bound_data_spec('another_derivable').derivable) self.assertFalse(study.bound_data_spec('missing_input').derivable) self.assertFalse(study.bound_data_spec('wrong_option').derivable) self.assertFalse(study.bound_data_spec('wrong_option2').derivable) # Test study with 'switch' enabled study_with_switch = self.create_study( TestDerivableStudy, 'study_with_switch', inputs=[DatasetMatch('required', text_format, 'required')], options={'switch': 1}) self.assertTrue( study_with_switch.bound_data_spec('wrong_option').derivable) self.assertTrue( study_with_switch.bound_data_spec('wrong_option2').derivable) # Test study with optional input study_with_input = self.create_study( TestDerivableStudy, 'study_with_inputs', inputs=[ DatasetMatch('required', text_format, 'required'), DatasetMatch('optional', text_format, 'required') ]) self.assertTrue( study_with_input.bound_data_spec('missing_input').derivable)
def test_multi_multi_study(self): study = self.create_study( MultiMultiStudy, 'multi_multi', [DatasetMatch('ss1_x', mrtrix_format, 'ones'), DatasetMatch('ss1_y', mrtrix_format, 'ones'), DatasetMatch('full_a', mrtrix_format, 'ones'), DatasetMatch('full_b', mrtrix_format, 'ones'), DatasetMatch('full_c', mrtrix_format, 'ones'), DatasetMatch('partial_a', mrtrix_format, 'ones'), DatasetMatch('partial_b', mrtrix_format, 'ones'), DatasetMatch('partial_c', mrtrix_format, 'ones')], options=[Option('full_required_op', 'product'), Option('partial_ss2_product_op', 'product')]) g = study.data('g')[0] if self.mrtrix_req is not None: NiAnalysisNodeMixin.load_module(*self.mrtrix_req) try: g_mean = float(sp.check_output( 'mrstats {} -output mean'.format(g.path), shell=True)) self.assertEqual(g_mean, 11.0) finally: if self.mrtrix_req is not None: NiAnalysisNodeMixin.unload_module(*self.mrtrix_req) # Test option values in MultiStudy self.assertEqual(study.data('full_p1'), 100) self.assertEqual(study.data('full_p2'), '200') self.assertEqual(study.data('full_p3'), 300.0) self.assertEqual(study.data('full_q1'), 150) self.assertEqual(study.data('full_q2'), '250') self.assertEqual(study.data('full_required_op'), 'product') # Test option values in SubStudy ss1 = study.sub_study('full').sub_study('ss1') self.assertEqual(ss1.data('o1'), 100) self.assertEqual(ss1.data('o2'), '200') self.assertEqual(ss1.data('o3'), 300.0) ss2 = study.sub_study('full').sub_study('ss2') self.assertEqual(ss2.data('o1'), 150) self.assertEqual(ss2.data('o2'), '250') self.assertEqual(ss2.data('o3'), 300.0) self.assertEqual(ss2.data('product_op'), 'product') # Test option values in MultiStudy self.assertEqual(study.data('partial_p1'), 1000) self.assertEqual(study.data('partial_ss1_o2'), '2') self.assertEqual(study.data('partial_ss1_o3'), 3.0) self.assertEqual(study.data('partial_ss2_o2'), '20') self.assertEqual(study.data('partial_ss2_o3'), 30.0) self.assertEqual( study.partial_ss2_product_op, 'product') # Test option values in SubStudy ss1 = study.sub_study('partial').sub_study('ss1') self.assertEqual(ss1.data('o1'), 1000) self.assertEqual(ss1.data('o2'), '2') self.assertEqual(ss1.data('o3'), 3.0) ss2 = study.sub_study('partial').sub_study('ss2') self.assertEqual(ss2.data('o1'), 1000) self.assertEqual(ss2.data('o2'), '20') self.assertEqual(ss2.data('o3'), 30.0) self.assertEqual(ss2.data('product_op'), 'product')
def test_archive_roundtrip(self): study = DummyStudy( self.STUDY_NAME, self.archive, runner=LinearRunner('a_dir'), inputs=[DatasetMatch('source1', nifti_gz_format, 'source1'), DatasetMatch('source2', nifti_gz_format, 'source2'), DatasetMatch('source3', nifti_gz_format, 'source3'), DatasetMatch('source4', nifti_gz_format, 'source4')]) # TODO: Should test out other file formats as well. source_files = [study.input(n) for n in ('source1', 'source2', 'source3', 'source4')] sink_files = [study.bound_data_spec(n) for n in ('sink1', 'sink3', 'sink4')] inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = self.SUBJECT inputnode.inputs.visit_id = self.VISIT source = self.archive.source(source_files, study_name=self.STUDY_NAME) sink = self.archive.sink(sink_files, study_name=self.STUDY_NAME) sink.inputs.name = 'archive_sink' sink.inputs.desc = ( "A test session created by archive roundtrip unittest") # Create workflow connecting them together workflow = pe.Workflow('source_sink_unit_test', base_dir=self.work_dir) workflow.add_nodes((source, sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', sink, 'subject_id') workflow.connect(inputnode, 'visit_id', sink, 'visit_id') for source_file in source_files: if not source_file.name.endswith('2'): source_name = source_file.name sink_name = source_name.replace('source', 'sink') workflow.connect( source, source_name + PATH_SUFFIX, sink, sink_name + PATH_SUFFIX) workflow.run() # Check local directory was created properly outputs = [ f for f in sorted(os.listdir(self.session_dir)) if f != FIELDS_FNAME] self.assertEqual(outputs, [self.STUDY_NAME + '_sink1.nii.gz', self.STUDY_NAME + '_sink3.nii.gz', self.STUDY_NAME + '_sink4.nii.gz', 'source1.nii.gz', 'source2.nii.gz', 'source3.nii.gz', 'source4.nii.gz'])
def test_dcm2niix(self): study = self.create_study( DummyStudy, 'concatenate', inputs=[ DatasetMatch('input_dataset', dicom_format, 't2_tse_tra_p2_448')]) study.data('output_dataset')[0] self.assertDatasetCreated('output_dataset.nii.gz', study.name)
def test_special_char_in_scan_name(self): """ Tests whether XNAT source can download files with spaces in their names """ cache_dir = tempfile.mkdtemp() archive = XnatArchive(server=SERVER, cache_dir=cache_dir, project_id=self.PROJECT) study = DummyStudy('study', archive, LinearRunner('ad'), inputs=[ DatasetMatch('source{}'.format(i), dicom_format, d) for i, d in enumerate(self.DATASETS, start=1) ], subject_ids=[self.SUBJECT], visit_ids=[self.VISIT]) source = archive.source([ study.input('source{}'.format(i)) for i in range(1, len(self.DATASETS) + 1) ]) source.inputs.subject_id = self.SUBJECT source.inputs.visit_id = self.VISIT workflow = pe.Workflow(self.TEST_NAME, base_dir=self.work_path) workflow.add_nodes([source]) graph = workflow.run() result = next(n.result for n in graph.nodes() if n.name == source.name) for i, dname in enumerate(self.DATASETS, start=1): path = getattr(result.outputs, 'source{}{}'.format(i, PATH_SUFFIX)) self.assertEqual(os.path.basename(path), dname) self.assertTrue(os.path.exists(path))
class TestDicomTagMatch(BaseTestCase): IMAGE_TYPE_TAG = ('0008', '0008') GRE_PATTERN = 'gre_field_mapping_3mm.*' PHASE_IMAGE_TYPE = ['ORIGINAL', 'PRIMARY', 'P', 'ND'] MAG_IMAGE_TYPE = ['ORIGINAL', 'PRIMARY', 'M', 'ND', 'NORM'] DICOM_MATCH = [ DatasetMatch('gre_phase', dicom_format, GRE_PATTERN, dicom_tags={IMAGE_TYPE_TAG: PHASE_IMAGE_TYPE}, is_regex=True), DatasetMatch('gre_mag', dicom_format, GRE_PATTERN, dicom_tags={IMAGE_TYPE_TAG: MAG_IMAGE_TYPE}, is_regex=True) ] def test_dicom_match(self): study = self.create_study(TestMatchStudy, 'test_dicom', inputs=self.DICOM_MATCH) phase = study.data('gre_phase')[0] mag = study.data('gre_mag')[0] self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase') self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag') def test_order_match(self): study = self.create_study(TestMatchStudy, 'test_dicom', inputs=[ DatasetMatch('gre_phase', dicom_format, pattern=self.GRE_PATTERN, order=1, is_regex=True), DatasetMatch('gre_mag', dicom_format, pattern=self.GRE_PATTERN, order=0, is_regex=True) ]) phase = study.data('gre_phase')[0] mag = study.data('gre_mag')[0] self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase') self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
def test_full_multi_study(self): study = self.create_study( FullMultiStudy, 'full', [DatasetMatch('a', mrtrix_format, 'ones'), DatasetMatch('b', mrtrix_format, 'ones'), DatasetMatch('c', mrtrix_format, 'ones')], options=[Option('required_op', 'product')]) d = study.data('d', subject_id='SUBJECT', visit_id='VISIT') e = study.data('e')[0] f = study.data('f')[0] if self.mrtrix_req is not None: NiAnalysisNodeMixin.load_module(*self.mrtrix_req) try: d_mean = float(sp.check_output( 'mrstats {} -output mean'.format(d.path), shell=True)) self.assertEqual(d_mean, 2.0) e_mean = float(sp.check_output( 'mrstats {} -output mean'.format(e.path), shell=True)) self.assertEqual(e_mean, 3.0) f_mean = float(sp.check_output( 'mrstats {} -output mean'.format(f.path), shell=True)) self.assertEqual(f_mean, 6.0) finally: if self.mrtrix_req is not None: NiAnalysisNodeMixin.unload_module(*self.mrtrix_req) # Test option values in MultiStudy self.assertEqual(study.data('p1'), 100) self.assertEqual(study.data('p2'), '200') self.assertEqual(study.data('p3'), 300.0) self.assertEqual(study.data('q1'), 150) self.assertEqual(study.data('q2'), '250') self.assertEqual(study.data('required_op'), 'product') # Test option values in SubStudy ss1 = study.sub_study('ss1') self.assertEqual(ss1.data('o1'), 100) self.assertEqual(ss1.data('o2'), '200') self.assertEqual(ss1.data('o3'), 300.0) ss2 = study.sub_study('ss2') self.assertEqual(ss2.data('o1'), 150) self.assertEqual(ss2.data('o2'), '250') self.assertEqual(ss2.data('o3'), 300.0) self.assertEqual(ss2.data('product_op'), 'product')
def test_partial_multi_study(self): study = self.create_study( PartialMultiStudy, 'partial', [DatasetMatch('a', mrtrix_format, 'ones'), DatasetMatch('b', mrtrix_format, 'ones'), DatasetMatch('c', mrtrix_format, 'ones')], options=[Option('ss2_product_op', 'product')]) ss1_z = study.data('ss1_z')[0] ss2_y = study.data('ss2_y')[0] ss2_z = study.data('ss2_z')[0] if self.mrtrix_req is not None: NiAnalysisNodeMixin.load_module(*self.mrtrix_req) try: ss1_z_mean = float(sp.check_output( 'mrstats {} -output mean'.format(ss1_z.path), shell=True)) self.assertEqual(ss1_z_mean, 2.0) ss2_y_mean = float(sp.check_output( 'mrstats {} -output mean'.format(ss2_y.path), shell=True)) self.assertEqual(ss2_y_mean, 3.0) ss2_z_mean = float(sp.check_output( 'mrstats {} -output mean'.format(ss2_z.path), shell=True)) self.assertEqual(ss2_z_mean, 6.0) finally: if self.mrtrix_req is not None: NiAnalysisNodeMixin.unload_module(*self.mrtrix_req) # Test option values in MultiStudy self.assertEqual(study.data('p1'), 1000) self.assertEqual(study.data('ss1_o2'), '2') self.assertEqual(study.data('ss1_o3'), 3.0) self.assertEqual(study.data('ss2_o2'), '20') self.assertEqual(study.data('ss2_o3'), 30.0) self.assertEqual(study.data('ss2_product_op'), 'product') # Test option values in SubStudy ss1 = study.sub_study('ss1') self.assertEqual(ss1.data('o1'), 1000) self.assertEqual(ss1.data('o2'), '2') self.assertEqual(ss1.data('o3'), 3.0) ss2 = study.sub_study('ss2') self.assertEqual(ss2.data('o1'), 1000) self.assertEqual(ss2.data('o2'), '20') self.assertEqual(ss2.data('o3'), 30.0) self.assertEqual(ss2.data('product_op'), 'product')
def test_pipeline_prerequisites(self): study = self.create_study( ConversionStudy, 'conversion', [ DatasetMatch('mrtrix', mrtrix_format, 'mrtrix'), DatasetMatch('nifti_gz', nifti_gz_format, 'nifti_gz'), DatasetMatch('dicom', dicom_format, 't1_mprage_sag_p2_iso_1_ADNI'), DatasetMatch('directory', directory_format, 't1_mprage_sag_p2_iso_1_ADNI'), DatasetMatch('zip', zip_format, 'zip')]) study.data('nifti_gz_from_dicom') study.data('mrtrix_from_nifti_gz') study.data('nifti_from_mrtrix') study.data('directory_from_zip') study.data('zip_from_directory') self.assertDatasetCreated('nifti_gz_from_dicom.nii.gz', study.name) self.assertDatasetCreated('mrtrix_from_nifti_gz.mif', study.name) self.assertDatasetCreated('nifti_from_mrtrix.nii', study.name) self.assertDatasetCreated('directory_from_zip', study.name) self.assertDatasetCreated('zip_from_directory.zip', study.name)
def test_order_match(self): study = self.create_study(TestMatchStudy, 'test_dicom', inputs=[ DatasetMatch('gre_phase', dicom_format, pattern=self.GRE_PATTERN, order=1, is_regex=True), DatasetMatch('gre_mag', dicom_format, pattern=self.GRE_PATTERN, order=0, is_regex=True) ]) phase = study.data('gre_phase')[0] mag = study.data('gre_mag')[0] self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase') self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
def setUp(self): self.reset_dirs() for subject_id in self.SUBJECT_IDS: for visit_id in self.SESSION_IDS: self.add_session(self.project_dir, subject_id, visit_id) self.study = self.create_study( TestStudy, 'dummy', inputs=[ DatasetMatch('start', nifti_gz_format, 'start'), DatasetMatch('ones_slice', mrtrix_format, 'ones_slice') ], options={'pipeline_option': True}) # Calculate MRtrix module required for 'mrstats' commands try: self.mrtrix_req = Requirement.best_requirement( [mrtrix3_req], NiAnalysisNodeMixin.available_modules(), NiAnalysisNodeMixin.preloaded_modules()) except NiAnalysisModulesNotInstalledException: self.mrtrix_req = None
def test_id_match(self): study = test_dataset.TestMatchStudy(name='test_dicom', archive=XnatArchive( project_id='TEST001', server=SERVER, cache_dir=tempfile.mkdtemp()), runner=LinearRunner(self.work_dir), inputs=[ DatasetMatch('gre_phase', dicom_format, id=8), DatasetMatch('gre_mag', dicom_format, id=7) ], subject_ids=['DATASET'], visit_ids=['DICOMTAGMATCH']) phase = study.data('gre_phase')[0] mag = study.data('gre_mag')[0] self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase') self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
def test_missing_option(self): # Misses the required 'full_required_op' option, which sets # the operation of the second node in StudyB's pipeline to # 'product' missing_option_study = self.create_study( MultiMultiStudy, 'multi_multi', [DatasetMatch('ss1_x', mrtrix_format, 'ones'), DatasetMatch('ss1_y', mrtrix_format, 'ones'), DatasetMatch('full_a', mrtrix_format, 'ones'), DatasetMatch('full_b', mrtrix_format, 'ones'), DatasetMatch('full_c', mrtrix_format, 'ones'), DatasetMatch('partial_a', mrtrix_format, 'ones'), DatasetMatch('partial_b', mrtrix_format, 'ones'), DatasetMatch('partial_c', mrtrix_format, 'ones')], options=[Option('partial_ss2_product_op', 'product')]) self.assertRaises( RuntimeError, missing_option_study.data, 'g')
def test_per_session_prereqs(self): study = self.create_study( ExistingPrereqStudy, self.study_name, inputs=[DatasetMatch('start', mrtrix_format, 'ones')]) study.data('thousands') targets = { 'subject1': { 'visit1': 1100, 'visit2': 1110, 'visit3': 1000 }, 'subject2': { 'visit1': 1110, 'visit2': 1110, 'visit3': 1000 }, 'subject3': { 'visit1': 1111, 'visit2': 1110, 'visit3': 1000 }, 'subject4': { 'visit1': 1111, 'visit2': 1110, 'visit3': 1000 } } for subj_id, visits in self.saved_structure.iteritems(): for visit_id in visits: self.assertStatEqual('mean', 'thousands.mif', targets[subj_id][visit_id], self.study_name, subject=subj_id, visit=visit_id, frequency='per_session')
def test_summary(self): study = DummyStudy( self.SUMMARY_STUDY_NAME, self.archive, LinearRunner('ad'), inputs=[DatasetMatch('source1', nifti_gz_format, 'source1'), DatasetMatch('source2', nifti_gz_format, 'source2'), DatasetMatch('source3', nifti_gz_format, 'source3')]) # TODO: Should test out other file formats as well. source_files = [study.input(n) for n in ('source1', 'source2', 'source3')] inputnode = pe.Node( IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = self.SUBJECT inputnode.inputs.visit_id = self.VISIT source = self.archive.source(source_files) # Test subject sink subject_sink_files = [ study.bound_data_spec('subject_sink')] subject_sink = self.archive.sink(subject_sink_files, frequency='per_subject', study_name=self.SUMMARY_STUDY_NAME) subject_sink.inputs.name = 'subject_summary' subject_sink.inputs.desc = ( "Tests the sinking of subject-wide datasets") # Test visit sink visit_sink_files = [study.bound_data_spec('visit_sink')] visit_sink = self.archive.sink(visit_sink_files, frequency='per_visit', study_name=self.SUMMARY_STUDY_NAME) visit_sink.inputs.name = 'visit_summary' visit_sink.inputs.desc = ( "Tests the sinking of visit-wide datasets") # Test project sink project_sink_files = [ study.bound_data_spec('project_sink')] project_sink = self.archive.sink(project_sink_files, frequency='per_project', study_name=self.SUMMARY_STUDY_NAME) project_sink.inputs.name = 'project_summary' project_sink.inputs.desc = ( "Tests the sinking of project-wide datasets") # Create workflow connecting them together workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir) workflow.add_nodes((source, subject_sink, visit_sink, project_sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id') workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id') workflow.connect( source, 'source1' + PATH_SUFFIX, subject_sink, 'subject_sink' + PATH_SUFFIX) workflow.connect( source, 'source2' + PATH_SUFFIX, visit_sink, 'visit_sink' + PATH_SUFFIX) workflow.connect( source, 'source3' + PATH_SUFFIX, project_sink, 'project_sink' + PATH_SUFFIX) workflow.run() # Check local summary directories were created properly subject_dir = self.get_session_dir(frequency='per_subject') self.assertEqual(sorted(os.listdir(subject_dir)), [self.SUMMARY_STUDY_NAME + '_subject_sink.nii.gz']) visit_dir = self.get_session_dir(frequency='per_visit') self.assertEqual(sorted(os.listdir(visit_dir)), [self.SUMMARY_STUDY_NAME + '_visit_sink.nii.gz']) project_dir = self.get_session_dir(frequency='per_project') self.assertEqual(sorted(os.listdir(project_dir)), [self.SUMMARY_STUDY_NAME + '_project_sink.nii.gz']) # Reload the data from the summary directories reloadinputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'reload_inputnode') reloadinputnode.inputs.subject_id = self.SUBJECT reloadinputnode.inputs.visit_id = self.VISIT reloadsource = self.archive.source( (source_files + subject_sink_files + visit_sink_files + project_sink_files), name='reload_source', study_name=self.SUMMARY_STUDY_NAME) reloadsink = self.archive.sink( [study.bound_data_spec(n) for n in ('resink1', 'resink2', 'resink3')], study_name=self.SUMMARY_STUDY_NAME) reloadsink.inputs.name = 'reload_summary' reloadsink.inputs.desc = ( "Tests the reloading of subject and project summary datasets") reloadworkflow = pe.Workflow('reload_summary_unittest', base_dir=self.work_dir) reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsource, 'subject_id') reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsource, 'visit_id') reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsink, 'subject_id') reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsink, 'visit_id') reloadworkflow.connect(reloadsource, 'subject_sink' + PATH_SUFFIX, reloadsink, 'resink1' + PATH_SUFFIX) reloadworkflow.connect(reloadsource, 'visit_sink' + PATH_SUFFIX, reloadsink, 'resink2' + PATH_SUFFIX) reloadworkflow.connect(reloadsource, 'project_sink' + PATH_SUFFIX, reloadsink, 'resink3' + PATH_SUFFIX) reloadworkflow.run() outputs = [ f for f in sorted(os.listdir(self.session_dir)) if f != FIELDS_FNAME] self.assertEqual(outputs, [self.SUMMARY_STUDY_NAME + '_resink1.nii.gz', self.SUMMARY_STUDY_NAME + '_resink2.nii.gz', self.SUMMARY_STUDY_NAME + '_resink3.nii.gz', 'source1.nii.gz', 'source2.nii.gz', 'source3.nii.gz', 'source4.nii.gz'])
def test_pipeline_prerequisites(self): study = self.create_study( RequirementsStudy, 'requirements', [DatasetMatch('ones', nifti_gz_format, 'ones')]) study.data('twos') self.assertDatasetCreated('twos.nii.gz', study.name)
def test_archive_roundtrip(self): # Create working dirs # Create DarisSource node archive = XnatArchive(project_id=self.PROJECT, server=SERVER, cache_dir=self.archive_cache_dir) study = DummyStudy(self.STUDY_NAME, archive, runner=LinearRunner('a_dir'), inputs=[ DatasetMatch('source1', nifti_gz_format, 'source1'), DatasetMatch('source2', nifti_gz_format, 'source2'), DatasetMatch('source3', nifti_gz_format, 'source3'), DatasetMatch('source4', nifti_gz_format, 'source4') ]) # TODO: Should test out other file formats as well. source_files = [ study.input(n) for n in ('source1', 'source2', 'source3', 'source4') ] sink_files = [ study.bound_data_spec(n) for n in ('sink1', 'sink3', 'sink4') ] inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = str(self.SUBJECT) inputnode.inputs.visit_id = str(self.VISIT) source = archive.source(source_files, study_name=self.STUDY_NAME) sink = archive.sink(sink_files, study_name=self.STUDY_NAME) sink.inputs.name = 'archive-roundtrip-unittest' sink.inputs.desc = ( "A test session created by archive roundtrip unittest") # Create workflow connecting them together workflow = pe.Workflow('source-sink-unit-test', base_dir=self.work_dir) workflow.add_nodes((source, sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', sink, 'subject_id') workflow.connect(inputnode, 'visit_id', sink, 'visit_id') for source_file in source_files: if source_file.name != 'source2': sink_name = source_file.name.replace('source', 'sink') workflow.connect(source, source_file.name + PATH_SUFFIX, sink, sink_name + PATH_SUFFIX) workflow.run() # Check cache was created properly self.assertEqual(filter_md5_fnames(os.listdir(self.session_cache())), [ 'source1.nii.gz', 'source2.nii.gz', 'source3.nii.gz', 'source4.nii.gz' ]) expected_sink_datasets = [ self.STUDY_NAME + '_sink1', self.STUDY_NAME + '_sink3', self.STUDY_NAME + '_sink4' ] self.assertEqual( filter_md5_fnames(os.listdir(self.proc_session_cache())), [d + nifti_gz_format.extension for d in expected_sink_datasets]) with self._connect() as mbi_xnat: dataset_names = mbi_xnat.experiments[ self.session_label() + XnatArchive.PROCESSED_SUFFIX].scans.keys() self.assertEqual(sorted(dataset_names), expected_sink_datasets)
def test_summary(self): # Create working dirs # Create XnatSource node archive = XnatArchive(server=SERVER, cache_dir=self.archive_cache_dir, project_id=self.PROJECT) study = DummyStudy(self.SUMMARY_STUDY_NAME, archive, LinearRunner('ad'), inputs=[ DatasetMatch('source1', nifti_gz_format, 'source1'), DatasetMatch('source2', nifti_gz_format, 'source2'), DatasetMatch('source3', nifti_gz_format, 'source3') ]) # TODO: Should test out other file formats as well. source_files = [ study.input(n) for n in ('source1', 'source2', 'source3') ] inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = self.SUBJECT inputnode.inputs.visit_id = self.VISIT source = archive.source(source_files) subject_sink_files = [study.bound_data_spec('subject_sink')] subject_sink = archive.sink(subject_sink_files, frequency='per_subject', study_name=self.SUMMARY_STUDY_NAME) subject_sink.inputs.name = 'subject_summary' subject_sink.inputs.desc = ( "Tests the sinking of subject-wide datasets") # Test visit sink visit_sink_files = [study.bound_data_spec('visit_sink')] visit_sink = archive.sink(visit_sink_files, frequency='per_visit', study_name=self.SUMMARY_STUDY_NAME) visit_sink.inputs.name = 'visit_summary' visit_sink.inputs.desc = ("Tests the sinking of visit-wide datasets") # Test project sink project_sink_files = [study.bound_data_spec('project_sink')] project_sink = archive.sink(project_sink_files, frequency='per_project', study_name=self.SUMMARY_STUDY_NAME) project_sink.inputs.name = 'project_summary' project_sink.inputs.desc = ( "Tests the sinking of project-wide datasets") # Create workflow connecting them together workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir) workflow.add_nodes((source, subject_sink, visit_sink, project_sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id') workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id') workflow.connect(source, 'source1' + PATH_SUFFIX, subject_sink, 'subject_sink' + PATH_SUFFIX) workflow.connect(source, 'source2' + PATH_SUFFIX, visit_sink, 'visit_sink' + PATH_SUFFIX) workflow.connect(source, 'source3' + PATH_SUFFIX, project_sink, 'project_sink' + PATH_SUFFIX) workflow.run() with self._connect() as mbi_xnat: # Check subject summary directories were created properly in cache expected_subj_datasets = [ self.SUMMARY_STUDY_NAME + '_subject_sink' ] subject_dir = os.path.join( self.archive_cache_dir, self.PROJECT, '_'.join((self.PROJECT, self.SUBJECT)), '_'.join( (self.PROJECT, self.SUBJECT, XnatArchive.SUMMARY_NAME))) self.assertEqual(filter_md5_fnames(os.listdir(subject_dir)), [ d + nifti_gz_format.extension for d in expected_subj_datasets ]) # and on XNAT subject_dataset_names = mbi_xnat.projects[ self.PROJECT].experiments['_'.join( (self.PROJECT, self.SUBJECT, XnatArchive.SUMMARY_NAME))].scans.keys() self.assertEqual(expected_subj_datasets, subject_dataset_names) # Check visit summary directories were created properly in # cache expected_visit_datasets = [self.SUMMARY_STUDY_NAME + '_visit_sink'] visit_dir = os.path.join( self.archive_cache_dir, self.PROJECT, self.PROJECT + '_' + XnatArchive.SUMMARY_NAME, (self.PROJECT + '_' + XnatArchive.SUMMARY_NAME + '_' + self.VISIT)) self.assertEqual(filter_md5_fnames(os.listdir(visit_dir)), [ d + nifti_gz_format.extension for d in expected_visit_datasets ]) # and on XNAT visit_dataset_names = mbi_xnat.projects[self.PROJECT].experiments[ '{}_{}_{}'.format(self.PROJECT, XnatArchive.SUMMARY_NAME, self.VISIT)].scans.keys() self.assertEqual(expected_visit_datasets, visit_dataset_names) # Check project summary directories were created properly in cache expected_proj_datasets = [ self.SUMMARY_STUDY_NAME + '_project_sink' ] project_dir = os.path.join( self.archive_cache_dir, self.PROJECT, self.PROJECT + '_' + XnatArchive.SUMMARY_NAME, self.PROJECT + '_' + XnatArchive.SUMMARY_NAME + '_' + XnatArchive.SUMMARY_NAME) self.assertEqual(filter_md5_fnames(os.listdir(project_dir)), [ d + nifti_gz_format.extension for d in expected_proj_datasets ]) # and on XNAT project_dataset_names = mbi_xnat.projects[ self.PROJECT].experiments['{}_{sum}_{sum}'.format( self.PROJECT, sum=XnatArchive.SUMMARY_NAME)].scans.keys() self.assertEqual(expected_proj_datasets, project_dataset_names) # Reload the data from the summary directories reloadinputnode = pe.Node( IdentityInterface(['subject_id', 'visit_id']), 'reload_inputnode') reloadinputnode.inputs.subject_id = self.SUBJECT reloadinputnode.inputs.visit_id = self.VISIT reloadsource = archive.source((source_files + subject_sink_files + visit_sink_files + project_sink_files), name='reload_source', study_name=self.SUMMARY_STUDY_NAME) reloadsink = archive.sink([ study.bound_data_spec(n) for n in ('resink1', 'resink2', 'resink3') ], study_name=self.SUMMARY_STUDY_NAME) reloadsink.inputs.name = 'reload_summary' reloadsink.inputs.desc = ( "Tests the reloading of subject and project summary datasets") reloadworkflow = pe.Workflow('reload_summary_unittest', base_dir=self.work_dir) reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsource, 'subject_id') reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsource, 'visit_id') reloadworkflow.connect(reloadinputnode, 'subject_id', reloadsink, 'subject_id') reloadworkflow.connect(reloadinputnode, 'visit_id', reloadsink, 'visit_id') reloadworkflow.connect(reloadsource, 'subject_sink' + PATH_SUFFIX, reloadsink, 'resink1' + PATH_SUFFIX) reloadworkflow.connect(reloadsource, 'visit_sink' + PATH_SUFFIX, reloadsink, 'resink2' + PATH_SUFFIX) reloadworkflow.connect(reloadsource, 'project_sink' + PATH_SUFFIX, reloadsink, 'resink3' + PATH_SUFFIX) reloadworkflow.run() # Check that the datasets self.assertEqual( filter_md5_fnames(os.listdir(self.proc_session_cache())), [ self.SUMMARY_STUDY_NAME + '_resink1.nii.gz', self.SUMMARY_STUDY_NAME + '_resink2.nii.gz', self.SUMMARY_STUDY_NAME + '_resink3.nii.gz' ]) # and on XNAT with self._connect() as mbi_xnat: resinked_dataset_names = mbi_xnat.projects[ self.PROJECT].experiments[ self.session_label() + XnatArchive.PROCESSED_SUFFIX].scans.keys() self.assertEqual(sorted(resinked_dataset_names), [ self.SUMMARY_STUDY_NAME + '_resink1', self.SUMMARY_STUDY_NAME + '_resink2', self.SUMMARY_STUDY_NAME + '_resink3' ])
def test_delayed_download(self): """ Tests handling of race conditions where separate processes attempt to cache the same dataset """ cache_dir = os.path.join(self.base_cache_path, 'delayed-download-cache') DATASET_NAME = 'source1' target_path = os.path.join(self.session_cache(cache_dir), DATASET_NAME + nifti_gz_format.extension) tmp_dir = target_path + '.download' shutil.rmtree(cache_dir, ignore_errors=True) os.makedirs(cache_dir) archive = XnatArchive(server=SERVER, cache_dir=cache_dir, project_id=self.PROJECT) study = DummyStudy( self.STUDY_NAME, archive, LinearRunner('ad'), inputs=[DatasetMatch(DATASET_NAME, nifti_gz_format, DATASET_NAME)]) source = archive.source([study.input(DATASET_NAME)], name='delayed_source', study_name='delayed_study') source.inputs.subject_id = self.SUBJECT source.inputs.visit_id = self.VISIT result1 = source.run() source1_path = result1.outputs.source1_path self.assertTrue(os.path.exists(source1_path)) self.assertEqual( source1_path, target_path, "Output file path '{}' not equal to target path '{}'".format( source1_path, target_path)) # Clear cache to start again shutil.rmtree(cache_dir, ignore_errors=True) # Create tmp_dir before running interface, this time should wait for 1 # second, check to see that the session hasn't been created and then # clear it and redownload the dataset. os.makedirs(tmp_dir) source.inputs.race_cond_delay = 1 result2 = source.run() source1_path = result2.outputs.source1_path # Clear cache to start again shutil.rmtree(cache_dir, ignore_errors=True) # Create tmp_dir before running interface, this time should wait for 1 # second, check to see that the session hasn't been created and then # clear it and redownload the dataset. internal_dir = os.path.join(tmp_dir, 'internal') deleted_tmp_dir = tmp_dir + '.deleted' def simulate_download(): "Simulates a download in a separate process" os.makedirs(internal_dir) time.sleep(5) # Modify a file in the temp dir to make the source download keep # waiting logger.info('Updating simulated download directory') with open(os.path.join(internal_dir, 'download'), 'a') as f: f.write('downloading') time.sleep(10) # Simulate the finalising of the download by copying the previously # downloaded file into place and deleting the temp dir. logger.info('Finalising simulated download') with open(target_path, 'a') as f: f.write('simulated') shutil.move(tmp_dir, deleted_tmp_dir) source.inputs.race_cond_delay = 10 p = Process(target=simulate_download) p.start() # Start the simulated download in separate process time.sleep(1) source.run() # Run the local download p.join() with open(os.path.join(deleted_tmp_dir, 'internal', 'download')) as f: d = f.read() self.assertEqual(d, 'downloading') with open(target_path) as f: d = f.read() self.assertEqual(d, 'simulated')
def test_digest_check(self): """ Tests check of downloaded digests to see if file needs to be redownloaded """ cache_dir = os.path.join(self.base_cache_path, 'digest-check-cache') DATASET_NAME = 'source1' STUDY_NAME = 'digest_check_study' dataset_fpath = DATASET_NAME + nifti_gz_format.extension source_target_path = os.path.join(self.session_cache(cache_dir), dataset_fpath) md5_path = source_target_path + XnatArchive.MD5_SUFFIX shutil.rmtree(cache_dir, ignore_errors=True) os.makedirs(cache_dir) archive = XnatArchive(project_id=self.PROJECT, server=SERVER, cache_dir=cache_dir) study = DummyStudy( STUDY_NAME, archive, LinearRunner('ad'), inputs=[DatasetMatch(DATASET_NAME, nifti_gz_format, DATASET_NAME)]) source = archive.source([study.input(DATASET_NAME)], name='digest_check_source', study_name=STUDY_NAME) source.inputs.subject_id = self.SUBJECT source.inputs.visit_id = self.VISIT source.run() self.assertTrue(os.path.exists(md5_path)) self.assertTrue(os.path.exists(source_target_path)) with open(md5_path) as f: digests = json.load(f) # Stash the downloaded file in a new location and create a dummy # file instead stash_path = source_target_path + '.stash' shutil.move(source_target_path, stash_path) with open(source_target_path, 'w') as f: f.write('dummy') # Run the download, which shouldn't download as the digests are the # same source.run() with open(source_target_path) as f: d = f.read() self.assertEqual(d, 'dummy') # Replace the digest with a dummy os.remove(md5_path) digests[dataset_fpath] = 'dummy_digest' with open(md5_path, 'w') as f: json.dump(digests, f) # Retry the download, which should now download since the digests # differ source.run() with open(source_target_path) as f: d = f.read() with open(stash_path) as f: e = f.read() self.assertEqual(d, e) # Resink the source file and check that the generated MD5 digest is # stored in identical format sink_archive = XnatArchive(project_id=self.DIGEST_SINK_PROJECT, server=SERVER, cache_dir=cache_dir) DATASET_NAME = 'sink1' sink = sink_archive.sink([study.bound_data_spec(DATASET_NAME)], name='digest_check_sink', study_name=STUDY_NAME) sink.inputs.name = 'digest_check_sink' sink.inputs.desc = "Tests the generation of MD5 digests" sink.inputs.subject_id = self.DIGEST_SINK_SUBJECT sink.inputs.visit_id = self.VISIT sink.inputs.sink1_path = source_target_path sink_fpath = (STUDY_NAME + '_' + DATASET_NAME + nifti_gz_format.extension) sink_target_path = os.path.join( (self.session_cache(cache_dir, project=self.DIGEST_SINK_PROJECT, subject=(self.DIGEST_SINK_SUBJECT)) + XnatArchive.PROCESSED_SUFFIX), sink_fpath) sink_md5_path = sink_target_path + XnatArchive.MD5_SUFFIX sink.run() with open(md5_path) as f: source_digests = json.load(f) with open(sink_md5_path) as f: sink_digests = json.load(f) self.assertEqual( source_digests[dataset_fpath], sink_digests[sink_fpath], ("Source digest ({}) did not equal sink digest ({})".format( source_digests[dataset_fpath], sink_digests[sink_fpath])))