def test_full_multi_analysis(self): analysis = self.create_analysis( FullMultiAnalysis, 'full', [ FilesetFilter('a', 'ones', text_format), FilesetFilter('b', 'ones', text_format), FilesetFilter('c', 'ones', text_format) ], parameters=[Parameter('required_op', 'mul')]) d, e, f = analysis.data(('d', 'e', 'f'), derive=True, subject_id='SUBJECT', visit_id='VISIT') self.assertContentsEqual(d, 2.0) self.assertContentsEqual(e, 3.0) self.assertContentsEqual(f, 6.0) # Test parameter values in MultiAnalysis self.assertEqual(analysis._get_parameter('p1').value, 100) self.assertEqual(analysis._get_parameter('p2').value, '200') self.assertEqual(analysis._get_parameter('p3').value, 300.0) self.assertEqual(analysis._get_parameter('q1').value, 150) self.assertEqual(analysis._get_parameter('q2').value, '250') self.assertEqual(analysis._get_parameter('required_op').value, 'mul') # Test parameter values in SubComp ss1 = analysis.subcomp('ss1') self.assertEqual(ss1._get_parameter('o1').value, 100) self.assertEqual(ss1._get_parameter('o2').value, '200') self.assertEqual(ss1._get_parameter('o3').value, 300.0) ss2 = analysis.subcomp('ss2') self.assertEqual(ss2._get_parameter('o1').value, 150) self.assertEqual(ss2._get_parameter('o2').value, '250') self.assertEqual(ss2._get_parameter('o3').value, 300.0) self.assertEqual(ss2._get_parameter('product_op').value, 'mul')
def test_partial_multi_analysis(self): analysis = self.create_analysis( PartialMultiAnalysis, 'partial', [ FilesetFilter('a', 'ones', text_format), FilesetFilter('b', 'ones', text_format), FilesetFilter('c', 'ones', text_format) ], parameters=[Parameter('ss2_product_op', 'mul')]) ss1_z = analysis.data('ss1_z', subject_id='SUBJECT', visit_id='VISIT', derive=True) ss2_z = list(analysis.data('ss2_z', derive=True))[0] self.assertContentsEqual(ss1_z, 2.0) self.assertContentsEqual(analysis.data('ss2_y', derive=True), 3.0) self.assertContentsEqual(ss2_z, 6.0) # Test parameter values in MultiAnalysis self.assertEqual(analysis._get_parameter('p1').value, 1000) self.assertEqual(analysis._get_parameter('ss1_o2').value, '2') self.assertEqual(analysis._get_parameter('ss1_o3').value, 3.0) self.assertEqual(analysis._get_parameter('ss2_o2').value, '20') self.assertEqual(analysis._get_parameter('ss2_o3').value, 30.0) self.assertEqual( analysis._get_parameter('ss2_product_op').value, 'mul') # Test parameter values in SubComp ss1 = analysis.subcomp('ss1') self.assertEqual(ss1._get_parameter('o1').value, 1000) self.assertEqual(ss1._get_parameter('o2').value, '2') self.assertEqual(ss1._get_parameter('o3').value, 3.0) ss2 = analysis.subcomp('ss2') self.assertEqual(ss2._get_parameter('o1').value, 1000) self.assertEqual(ss2._get_parameter('o2').value, '20') self.assertEqual(ss2._get_parameter('o3').value, 30.0) self.assertEqual(ss2._get_parameter('product_op').value, 'mul')
def test_multi_analysis_generated_cls_pickle(self): cls_dct = { 'add_subcomp_specs': [ SubCompSpec('ss1', BasicTestAnalysis), SubCompSpec('ss2', BasicTestAnalysis) ] } MultiGeneratedClass = MultiAnalysisMetaClass('MultiGeneratedClass', (MultiAnalysis, ), cls_dct) analysis = self.create_analysis( MultiGeneratedClass, 'multi_gen_cls', inputs=[ FilesetFilter('ss1_fileset', 'fileset', text_format), FilesetFilter('ss2_fileset', 'fileset', text_format) ]) pkl_path = os.path.join(self.work_dir, 'multi_gen_cls.pkl') with open(pkl_path, 'wb') as f: pkl.dump(analysis, f) del MultiGeneratedClass with open(pkl_path, 'rb') as f: regen = pkl.load(f) self.assertContentsEqual(regen.data('ss2_out_fileset', derive=True), 'foo')
def test_input_validation_fail(self): self.assertRaises(ArcanaUsageError, self.create_analysis, TestInputValidationAnalysis, 'test_validation_fail', inputs=[ FilesetFilter('a', 'a', test3_format), FilesetFilter('b', 'b', test3_format) ])
def test_input_validation(self): self.create_analysis(TestInputValidationAnalysis, 'test_input_validation', inputs=[ FilesetFilter('a', 'a', test1_format), FilesetFilter('b', 'b', test3_format), FilesetFilter('c', 'a', test1_format), FilesetFilter('d', 'd', test3_format) ])
def make_analysis(self): return self.create_analysis(ExampleAnalysis, 'dummy', inputs=[ FilesetFilter('one', 'one_input', text_format), FilesetFilter('ten', 'ten_input', text_format) ], parameters={'pipeline_parameter': True})
def test_repository_roundtrip(self): analysis = DummyAnalysis(self.STUDY_NAME, self.dataset, processor=SingleProc('a_dir'), inputs=[ FilesetFilter('source1', 'source1', text_format), FilesetFilter('source2', 'source2', text_format), FilesetFilter('source3', 'source3', text_format), FilesetFilter('source4', 'source4', text_format) ]) # TODO: Should test out other file formats as well. source_files = ('source1', 'source2', 'source3', 'source4') sink_files = ('sink1', 'sink3', 'sink4') inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = self.SUBJECT inputnode.inputs.visit_id = self.VISIT source = pe.Node(RepositorySource( analysis.bound_spec(f).slice for f in source_files), name='source') dummy_pipeline = analysis.dummy_pipeline() dummy_pipeline.cap() sink = pe.Node(RepositorySink((analysis.bound_spec(f).slice for f in sink_files), dummy_pipeline), name='sink') sink.inputs.name = 'repository_sink' sink.inputs.desc = ( "A test session created by repository roundtrip unittest") # Create workflow connecting them together workflow = pe.Workflow('source_sink_unit_test', base_dir=self.work_dir) workflow.add_nodes((source, sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', sink, 'subject_id') workflow.connect(inputnode, 'visit_id', sink, 'visit_id') for source_name in source_files: if not source_name.endswith('2'): sink_name = source_name.replace('source', 'sink') workflow.connect(source, source_name + PATH_SUFFIX, sink, sink_name + PATH_SUFFIX) workflow.run() # Check local directory was created properly outputs = [ f for f in sorted( os.listdir(self.get_session_dir( from_analysis=self.STUDY_NAME))) if f not in (LocalFileSystemRepo.FIELDS_FNAME, LocalFileSystemRepo.PROV_DIR) ] self.assertEqual(outputs, ['sink1.txt', 'sink3.txt', 'sink4.txt'])
def test_format_conversions(self): analysis = self.create_analysis( ConversionAnalysis, 'conversion', [ FilesetFilter('text', 'text', text_format), FilesetFilter('directory', 'directory', directory_format), FilesetFilter('zip', 'zip', zip_format)]) self.assertCreated(list(analysis.data('text_from_text', derive=True))[0]) self.assertCreated(list(analysis.data('directory_from_zip_on_input', derive=True))[0]) self.assertCreated(list(analysis.data('zip_from_directory_on_input', derive=True))[0]) self.assertCreated(list(analysis.data('directory_from_zip_on_output', derive=True))[0]) self.assertCreated(list(analysis.data('zip_from_directory_on_output', derive=True))[0])
def test_derivable(self): # Test vanilla analysis analysis = self.create_analysis( TestDerivableAnalysis, 'analysis', inputs={'required': 'required'}) self.assertTrue(analysis.spec('derivable').derivable) self.assertTrue( analysis.spec('another_derivable').derivable) self.assertFalse( analysis.spec('missing_input').derivable) self.assertFalse( analysis.spec('requires_switch').derivable) self.assertFalse( analysis.spec('requires_switch2').derivable) self.assertTrue(analysis.spec('requires_foo').derivable) self.assertFalse(analysis.spec('requires_bar').derivable) # Test analysis with 'switch' enabled analysis_with_switch = self.create_analysis( TestDerivableAnalysis, 'analysis_with_switch', inputs=[FilesetFilter('required', 'required', text_format)], parameters={'switch': True}) self.assertTrue( analysis_with_switch.spec('requires_switch').derivable) self.assertTrue( analysis_with_switch.spec('requires_switch2').derivable) # Test analysis with branch=='bar' analysis_bar_branch = self.create_analysis( TestDerivableAnalysis, 'analysis_bar_branch', inputs=[FilesetFilter('required', 'required', text_format)], parameters={'branch': 'bar'}) self.assertFalse(analysis_bar_branch.spec('requires_foo').derivable) self.assertTrue(analysis_bar_branch.spec('requires_bar').derivable) # Test analysis with optional input analysis_with_input = self.create_analysis( TestDerivableAnalysis, 'analysis_with_inputs', inputs=[FilesetFilter('required', 'required', text_format), FilesetFilter('optional', 'required', text_format)]) self.assertTrue( analysis_with_input.spec('missing_input').derivable) analysis_unhandled = self.create_analysis( TestDerivableAnalysis, 'analysis_unhandled', inputs=[FilesetFilter('required', 'required', text_format)], parameters={'branch': 'wee'}) self.assertRaises( ArcanaDesignError, getattr, analysis_unhandled.spec('requires_foo'), 'derivable')
def test_order_match(self): analysis = self.create_analysis( TestMatchAnalysis, 'test_dicom', inputs=[ FilesetFilter('gre_phase', pattern=self.GRE_PATTERN, valid_formats=dicom_format, order=1, is_regex=True), FilesetFilter('gre_mag', pattern=self.GRE_PATTERN, valid_formats=dicom_format, order=0, is_regex=True)]) phase = list(analysis.data('gre_phase', derive=True))[0] mag = list(analysis.data('gre_mag', derive=True))[0] self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase') self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
def test_id_match(self): analysis = test_data.TestMatchAnalysis( name='test_dicom', dataset=XnatRepo(server=SERVER, cache_dir=tempfile.mkdtemp()).dataset( self.project), processor=SingleProc(self.work_dir), inputs=[ FilesetFilter('gre_phase', valid_formats=dicom_format, id=7), FilesetFilter('gre_mag', valid_formats=dicom_format, id=6) ]) phase = list(analysis.data('gre_phase', derive=True))[0] mag = list(analysis.data('gre_mag', derive=True))[0] self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase') self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
def test_per_session_prereqs(self): # Generate all data for 'thousand' spec analysis = self.create_analysis( ExistingPrereqAnalysis, self.STUDY_NAME, inputs=[FilesetFilter('one', 'one', text_format)]) analysis.derive('thousand') targets = { 'subject1': { 'visit1': 1100.0, 'visit2': 1110.0, 'visit3': 1000.0 }, 'subject2': { 'visit1': 1111.0, 'visit2': 1110.0, 'visit3': 1000.0 } } tree = self.dataset.tree for subj_id, visits in self.PROJECT_STRUCTURE.items(): for visit_id in visits: session = tree.subject(subj_id).session(visit_id) fileset = session.fileset('thousand', from_analysis=self.STUDY_NAME) fileset.format = text_format self.assertContentsEqual(fileset, targets[subj_id][visit_id], "{}:{}".format(subj_id, visit_id))
def test_fields_roundtrip(self): repository = XnatRepo(server=SERVER, cache_dir=self.cache_dir) dataset = repository.dataset(self.project) analysis = DummyAnalysis( self.STUDY_NAME, dataset=dataset, processor=SingleProc('a_dir'), inputs=[FilesetFilter('source1', 'source1', text_format)]) fields = ['field{}'.format(i) for i in range(1, 4)] dummy_pipeline = analysis.dummy_pipeline() dummy_pipeline.cap() sink = pe.Node(RepositorySink( (analysis.bound_spec(f).slice for f in fields), dummy_pipeline), name='fields_sink') sink.inputs.field1_field = field1 = 1 sink.inputs.field2_field = field2 = 2.0 sink.inputs.field3_field = field3 = str('3') sink.inputs.subject_id = self.SUBJECT sink.inputs.visit_id = self.VISIT sink.inputs.desc = "Test sink of fields" sink.inputs.name = 'test_sink' sink.run() source = pe.Node(RepositorySource( analysis.bound_spec(f).slice for f in fields), name='fields_source') source.inputs.visit_id = self.VISIT source.inputs.subject_id = self.SUBJECT source.inputs.desc = "Test source of fields" source.inputs.name = 'test_source' results = source.run() self.assertEqual(results.outputs.field1_field, field1) self.assertEqual(results.outputs.field2_field, field2) self.assertEqual(results.outputs.field3_field, field3)
def test_multi_multi_analysis(self): analysis = self.create_analysis( MultiMultiAnalysis, 'multi_multi', [ FilesetFilter('ss1_x', 'ones', text_format), FilesetFilter('ss1_y', 'ones', text_format), FilesetFilter('full_a', 'ones', text_format), FilesetFilter('full_b', 'ones', text_format), FilesetFilter('full_c', 'ones', text_format), FilesetFilter('partial_a', 'ones', text_format), FilesetFilter('partial_b', 'ones', text_format), FilesetFilter('partial_c', 'ones', text_format) ], parameters=[ Parameter('full_required_op', 'mul'), Parameter('partial_ss2_product_op', 'mul') ]) self.assertContentsEqual(analysis.data('g', derive=True), 11.0) # Test parameter values in MultiAnalysis self.assertEqual(analysis._get_parameter('full_p1').value, 100) self.assertEqual(analysis._get_parameter('full_p2').value, '200') self.assertEqual(analysis._get_parameter('full_p3').value, 300.0) self.assertEqual(analysis._get_parameter('full_q1').value, 150) self.assertEqual(analysis._get_parameter('full_q2').value, '250') self.assertEqual( analysis._get_parameter('full_required_op').value, 'mul') # Test parameter values in SubComp ss1 = analysis.subcomp('full').subcomp('ss1') self.assertEqual(ss1._get_parameter('o1').value, 100) self.assertEqual(ss1._get_parameter('o2').value, '200') self.assertEqual(ss1._get_parameter('o3').value, 300.0) ss2 = analysis.subcomp('full').subcomp('ss2') self.assertEqual(ss2._get_parameter('o1').value, 150) self.assertEqual(ss2._get_parameter('o2').value, '250') self.assertEqual(ss2._get_parameter('o3').value, 300.0) self.assertEqual(ss2._get_parameter('product_op').value, 'mul') # Test parameter values in MultiAnalysis self.assertEqual(analysis._get_parameter('partial_p1').value, 1000) self.assertEqual(analysis._get_parameter('partial_ss1_o2').value, '2') self.assertEqual(analysis._get_parameter('partial_ss1_o3').value, 3.0) self.assertEqual(analysis._get_parameter('partial_ss2_o2').value, '20') self.assertEqual(analysis._get_parameter('partial_ss2_o3').value, 30.0) self.assertEqual( analysis._get_parameter('partial_ss2_product_op').value, 'mul') # Test parameter values in SubComp ss1 = analysis.subcomp('partial').subcomp('ss1') self.assertEqual(ss1._get_parameter('o1').value, 1000) self.assertEqual(ss1._get_parameter('o2').value, '2') self.assertEqual(ss1._get_parameter('o3').value, 3.0) ss2 = analysis.subcomp('partial').subcomp('ss2') self.assertEqual(ss2._get_parameter('o1').value, 1000) self.assertEqual(ss2._get_parameter('o2').value, '20') self.assertEqual(ss2._get_parameter('o3').value, 30.0) self.assertEqual(ss2._get_parameter('product_op').value, 'mul')
def test_module_load_in_map(self): analysis = self.create_analysis( RequirementsAnalysis, 'requirements', [FilesetFilter('ones', 'ones', text_format)], environment=ModulesEnv()) threes = analysis.data('threes', derive=True) fours = analysis.data('fours', derive=True) self.assertEqual(next(iter(threes)).value, 3) self.assertEqual(next(iter(fours)).value, 4) self.assertEqual(ModulesEnv.loaded(), {})
def test_dcm2niix(self): analysis = self.create_analysis( DummyAnalysis, 'concatenate', environment=TEST_ENV, inputs=[ FilesetFilter('input_fileset', dicom_format, 't2_tse_tra_p2_448')]) self.assertFilesetCreated( next(iter(analysis.data('output_fileset', derive=True))))
def test_pipeline_prerequisites(self): analysis = self.create_analysis(ConversionAnalysis, 'conversion', [ FilesetFilter('mrtrix', 'mrtrix', text_format), FilesetFilter('nifti_gz', text_format, 'nifti_gz'), FilesetFilter('dicom', dicom_format, 't1_mprage_sag_p2_iso_1_ADNI'), FilesetFilter('directory', directory_format, 't1_mprage_sag_p2_iso_1_ADNI'), FilesetFilter('zip', 'zip', zip_format) ]) self.assertFilesetCreated( next(iter(analysis.data('nifti_gz_from_dicom', derive=True)))) self.assertFilesetCreated( next(iter(analysis.data('mrtrix_from_nifti_gz', derive=True)))) self.assertFilesetCreated( next(iter(analysis.data('nifti_from_mrtrix', derive=True)))) self.assertFilesetCreated( next(iter(analysis.data('directory_from_zip', derive=True)))) self.assertFilesetCreated( next(iter(analysis.data('zip_from_directory', derive=True))))
def test_raised_error(self): analysis = self.create_analysis( BasicTestAnalysis, 'base', inputs=[FilesetFilter('fileset', 'fileset', text_format)]) # Disable error logs as it should always throw an error logger = logging.getLogger('nipype.workflow') orig_level = logger.level logger.setLevel(50) self.assertRaises(RuntimeError, analysis.derive, 'raise_error') logger.setLevel(orig_level)
class TestDicomTagMatch(BaseTestCase): IMAGE_TYPE_TAG = ('0008', '0008') GRE_PATTERN = 'gre_field_mapping_3mm.*' PHASE_IMAGE_TYPE = ['ORIGINAL', 'PRIMARY', 'P', 'ND'] MAG_IMAGE_TYPE = ['ORIGINAL', 'PRIMARY', 'M', 'ND', 'NORM'] DICOM_MATCH = [ FilesetFilter('gre_phase', GRE_PATTERN, dicom_format, dicom_tags={IMAGE_TYPE_TAG: PHASE_IMAGE_TYPE}, is_regex=True), FilesetFilter('gre_mag', GRE_PATTERN, dicom_format, dicom_tags={IMAGE_TYPE_TAG: MAG_IMAGE_TYPE}, is_regex=True)] INPUTS_FROM_REF_DIR = True REF_FORMATS = [dicom_format] def test_dicom_match(self): analysis = self.create_analysis( TestMatchAnalysis, 'test_dicom', inputs=self.DICOM_MATCH) phase = list(analysis.data('gre_phase', derive=True))[0] mag = list(analysis.data('gre_mag', derive=True))[0] self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase') self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag') def test_order_match(self): analysis = self.create_analysis( TestMatchAnalysis, 'test_dicom', inputs=[ FilesetFilter('gre_phase', pattern=self.GRE_PATTERN, valid_formats=dicom_format, order=1, is_regex=True), FilesetFilter('gre_mag', pattern=self.GRE_PATTERN, valid_formats=dicom_format, order=0, is_regex=True)]) phase = list(analysis.data('gre_phase', derive=True))[0] mag = list(analysis.data('gre_mag', derive=True))[0] self.assertEqual(phase.name, 'gre_field_mapping_3mm_phase') self.assertEqual(mag.name, 'gre_field_mapping_3mm_mag')
def test_generated_cls_pickle(self): GeneratedClass = AnalysisMetaClass('GeneratedClass', (BasicTestAnalysis, ), {}) analysis = self.create_analysis( GeneratedClass, 'gen_cls', inputs=[FilesetFilter('fileset', 'fileset', text_format)]) pkl_path = os.path.join(self.work_dir, 'gen_cls.pkl') with open(pkl_path, 'wb') as f: pkl.dump(analysis, f) del GeneratedClass with open(pkl_path, 'rb') as f: regen = pkl.load(f) self.assertContentsEqual(regen.data('out_fileset', derive=True), 'foo')
def test_genenerated_method_pickle_fail(self): cls_dct = { 'add_subcomp_specs': [ SubCompSpec('ss1', BasicTestAnalysis), SubCompSpec('ss2', BasicTestAnalysis) ], 'default_fileset_pipeline': MultiAnalysis.translate('ss1', 'pipeline') } MultiGeneratedClass = MultiAnalysisMetaClass('MultiGeneratedClass', (MultiAnalysis, ), cls_dct) analysis = self.create_analysis( MultiGeneratedClass, 'multi_gen_cls', inputs=[ FilesetFilter('ss1_fileset', 'fileset', text_format), FilesetFilter('ss2_fileset', 'fileset', text_format) ]) pkl_path = os.path.join(self.work_dir, 'multi_gen_cls.pkl') with open(pkl_path, 'w') as f: self.assertRaises(ArcanaCantPickleAnalysisError, pkl.dump, analysis, f)
def test_cache_download(self): repository = XnatRepo(server=SERVER, cache_dir=tempfile.mkdtemp()) dataset = repository.dataset(self.project) analysis = self.create_analysis( TestAnalysis, 'cache_download', inputs=[ FilesetFilter('fileset1', 'fileset1', text_format), FilesetFilter('fileset3', 'fileset3', text_format) ], dataset=dataset) analysis.cache_inputs() for subject_id, visits in list(self.STRUCTURE.items()): subj_dir = op.join(repository.cache_dir, self.project, '{}_{}'.format(self.project, subject_id)) for visit_id in visits: sess_dir = op.join( subj_dir, '{}_{}_{}'.format(self.project, subject_id, visit_id)) for inpt in analysis.inputs: self.assertTrue( op.exists( op.join(sess_dir, inpt.name + '-' + inpt.name)))
def test_scan_label_quality(self): tmp_dir = tempfile.mkdtemp() repository = XnatRepo(server=SERVER, cache_dir=tmp_dir) dataset = repository.dataset(self.project, subject_ids=[self.SUBJECT], visit_ids=[self.VISIT]) tree = dataset.tree for accepted, expected in ((None, '1unusable'), ((None, 'questionable', 'usable'), '2unlabelled'), (('questionable', 'usable'), '3questionable'), ('usable', '4usable')): inpt = FilesetFilter('dummy', order=0, valid_formats=text_format, acceptable_quality=accepted) matched = inpt.match(tree).item(subject_id=self.SUBJECT, visit_id=self.VISIT) self.assertEqual(matched.name, expected)
def test_missing_parameter(self): # Misses the required 'full_required_op' parameter, which sets # the operation of the second node in AnalysisB's pipeline to # 'product' inputs = [ FilesetFilter('ss1_x', 'ones', text_format), FilesetFilter('ss1_y', 'ones', text_format), FilesetFilter('full_a', 'ones', text_format), FilesetFilter('full_b', 'ones', text_format), FilesetFilter('full_c', 'ones', text_format), FilesetFilter('partial_a', 'ones', text_format), FilesetFilter('partial_b', 'ones', text_format), FilesetFilter('partial_c', 'ones', text_format) ] missing_parameter_analysis = self.create_analysis( MultiMultiAnalysis, 'multi_multi', inputs, parameters=[Parameter('partial_ss2_product_op', 'mul')]) self.assertRaises(NotSpecifiedRequiredParameter, missing_parameter_analysis.derive, 'g') missing_parameter_analysis2 = self.create_analysis( MultiMultiAnalysis, 'multi_multi', inputs, parameters=[Parameter('full_required_op', 'mul')]) self.assertRaises(NotSpecifiedRequiredParameter, missing_parameter_analysis2.derive, 'g') provided_parameters_analysis = self.create_analysis( MultiMultiAnalysis, 'multi_multi', inputs, parameters=[ Parameter('partial_ss2_product_op', 'mul'), Parameter('full_required_op', 'mul') ]) g = list(provided_parameters_analysis.data('g', derive=True))[0] self.assertContentsEqual(g, 11.0)
def add_sessions(self): BaseMultiSubjectTestCase.add_sessions(self) # Create a analysis object, in order to generate appropriate provenance # for the existing "derived" data derived_filesets = [f for f in self.DATASET_CONTENTS if f != 'one'] analysis = self.create_analysis( ExistingPrereqAnalysis, self.STUDY_NAME, dataset=self.local_dataset, inputs=[FilesetFilter('one', 'one', text_format)]) # Get all pipelines in the analysis pipelines = { n: getattr(analysis, '{}_pipeline'.format(n))() for n in derived_filesets } for node in analysis.dataset.tree: for fileset in node.filesets: if fileset.basename != 'one' and fileset.exists: # Generate expected provenance record for each pipeline # and save in the local dataset pipelines[fileset.name].cap() record = pipelines[fileset.name].expected_record(node) self.local_dataset.put_record(record) analysis.clear_caches() # Reset dataset trees
def test_repository_roundtrip(self): # Create working dirs # Create DarisSource node repository = XnatRepo(server=SERVER, cache_dir=self.cache_dir) dataset = repository.dataset(self.project) analysis = DummyAnalysis(self.STUDY_NAME, dataset=dataset, processor=SingleProc('a_dir'), inputs=[ FilesetFilter('source1', 'source1', text_format), FilesetFilter('source2', 'source2', text_format), FilesetFilter('source3', 'source3', text_format), FilesetFilter('source4', 'source4', text_format) ]) # TODO: Should test out other file formats as well. source_files = ['source1', 'source2', 'source3', 'source4'] sink_files = ['sink1', 'sink3', 'sink4'] inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = str(self.SUBJECT) inputnode.inputs.visit_id = str(self.VISIT) source = pe.Node(RepositorySource( analysis.bound_spec(f).slice for f in source_files), name='source') dummy_pipeline = analysis.dummy_pipeline() dummy_pipeline.cap() sink = pe.Node(RepositorySink((analysis.bound_spec(f).slice for f in sink_files), dummy_pipeline), name='sink') sink.inputs.name = 'repository-roundtrip-unittest' sink.inputs.desc = ( "A test session created by repository roundtrip unittest") # Create workflow connecting them together workflow = pe.Workflow('source-sink-unit-test', base_dir=self.work_dir) workflow.add_nodes((source, sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', sink, 'subject_id') workflow.connect(inputnode, 'visit_id', sink, 'visit_id') for source_name in source_files: if source_name != 'source2': sink_name = source_name.replace('source', 'sink') workflow.connect(source, source_name + PATH_SUFFIX, sink, sink_name + PATH_SUFFIX) workflow.run() # Check cache was created properly self.assertEqual(filter_scans(os.listdir(self.session_cache())), [ 'source1-source1', 'source2-source2', 'source3-source3', 'source4-source4' ]) expected_sink_filesets = ['sink1', 'sink3', 'sink4'] self.assertEqual( filter_scans( os.listdir(self.session_cache(from_analysis=self.STUDY_NAME))), [(e + '-' + e) for e in expected_sink_filesets]) with self._connect() as login: fileset_names = filter_scans(login.experiments[self.session_label( from_analysis=self.STUDY_NAME)].scans.keys()) self.assertEqual(fileset_names, expected_sink_filesets)
def test_summary(self): # Create working dirs # Create XnatSource node repository = XnatRepo(server=SERVER, cache_dir=self.cache_dir) analysis = DummyAnalysis(self.SUMMARY_STUDY_NAME, repository.dataset(self.project), SingleProc('ad'), inputs=[ FilesetFilter('source1', 'source1', text_format), FilesetFilter('source2', 'source2', text_format), FilesetFilter('source3', 'source3', text_format) ]) # TODO: Should test out other file formats as well. source_files = ['source1', 'source2', 'source3'] inputnode = pe.Node(IdentityInterface(['subject_id', 'visit_id']), 'inputnode') inputnode.inputs.subject_id = self.SUBJECT inputnode.inputs.visit_id = self.VISIT source = pe.Node(RepositorySource( [analysis.bound_spec(f).slice for f in source_files]), name='source') subject_sink_files = ['subject_sink'] dummy_pipeline = analysis.dummy_pipeline() dummy_pipeline.cap() subject_sink = pe.Node(RepositorySink( [analysis.bound_spec(f).slice for f in subject_sink_files], dummy_pipeline), name='subject_sink') subject_sink.inputs.name = 'subject_summary' subject_sink.inputs.desc = ( "Tests the sinking of subject-wide filesets") # Test visit sink visit_sink_files = ['visit_sink'] visit_sink = pe.Node(RepositorySink( [analysis.bound_spec(f).slice for f in visit_sink_files], dummy_pipeline), name='visit_sink') visit_sink.inputs.name = 'visit_summary' visit_sink.inputs.desc = ("Tests the sinking of visit-wide filesets") # Test project sink analysis_sink_files = ['analysis_sink'] analysis_sink = pe.Node(RepositorySink( [analysis.bound_spec(f).slice for f in analysis_sink_files], dummy_pipeline), name='analysis_sink') analysis_sink.inputs.name = 'project_summary' analysis_sink.inputs.desc = ( "Tests the sinking of project-wide filesets") # Create workflow connecting them together workflow = pe.Workflow('summary_unittest', base_dir=self.work_dir) workflow.add_nodes((source, subject_sink, visit_sink, analysis_sink)) workflow.connect(inputnode, 'subject_id', source, 'subject_id') workflow.connect(inputnode, 'visit_id', source, 'visit_id') workflow.connect(inputnode, 'subject_id', subject_sink, 'subject_id') workflow.connect(inputnode, 'visit_id', visit_sink, 'visit_id') workflow.connect(source, 'source1' + PATH_SUFFIX, subject_sink, 'subject_sink' + PATH_SUFFIX) workflow.connect(source, 'source2' + PATH_SUFFIX, visit_sink, 'visit_sink' + PATH_SUFFIX) workflow.connect(source, 'source3' + PATH_SUFFIX, analysis_sink, 'analysis_sink' + PATH_SUFFIX) workflow.run() analysis.clear_caches() # Refreshed cached repository tree object with self._connect() as login: # Check subject summary directories were created properly in cache expected_subj_filesets = ['subject_sink'] subject_dir = self.session_cache( visit=XnatRepo.SUMMARY_NAME, from_analysis=self.SUMMARY_STUDY_NAME) self.assertEqual(filter_scans(os.listdir(subject_dir)), [(e + '-' + e) for e in expected_subj_filesets]) # and on XNAT subject_fileset_names = filter_scans( login.projects[self.project].experiments[self.session_label( visit=XnatRepo.SUMMARY_NAME, from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys()) self.assertEqual(expected_subj_filesets, subject_fileset_names) # Check visit summary directories were created properly in # cache expected_visit_filesets = ['visit_sink'] visit_dir = self.session_cache( subject=XnatRepo.SUMMARY_NAME, from_analysis=self.SUMMARY_STUDY_NAME) self.assertEqual(filter_scans(os.listdir(visit_dir)), [(e + '-' + e) for e in expected_visit_filesets]) # and on XNAT visit_fileset_names = filter_scans( login.projects[self.project].experiments[self.session_label( subject=XnatRepo.SUMMARY_NAME, from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys()) self.assertEqual(expected_visit_filesets, visit_fileset_names) # Check project summary directories were created properly in cache expected_proj_filesets = ['analysis_sink'] project_dir = self.session_cache( subject=XnatRepo.SUMMARY_NAME, visit=XnatRepo.SUMMARY_NAME, from_analysis=self.SUMMARY_STUDY_NAME) self.assertEqual(filter_scans(os.listdir(project_dir)), [(e + '-' + e) for e in expected_proj_filesets]) # and on XNAT project_fileset_names = filter_scans( login.projects[self.project].experiments[self.session_label( subject=XnatRepo.SUMMARY_NAME, visit=XnatRepo.SUMMARY_NAME, from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys()) self.assertEqual(expected_proj_filesets, project_fileset_names) # Reload the data from the summary directories reloadinputnode = pe.Node( IdentityInterface(['subject_id', 'visit_id']), 'reload_inputnode') reloadinputnode.inputs.subject_id = self.SUBJECT reloadinputnode.inputs.visit_id = self.VISIT reloadsource_per_subject = pe.Node(RepositorySource( analysis.bound_spec(f).slice for f in subject_sink_files), name='reload_source_per_subject') reloadsource_per_visit = pe.Node(RepositorySource( analysis.bound_spec(f).slice for f in visit_sink_files), name='reload_source_per_visit') reloadsource_per_dataset = pe.Node(RepositorySource( analysis.bound_spec(f).slice for f in analysis_sink_files), name='reload_source_per_dataset') reloadsink = pe.Node(RepositorySink( (analysis.bound_spec(f).slice for f in ['resink1', 'resink2', 'resink3']), dummy_pipeline), name='reload_sink') reloadsink.inputs.name = 'reload_summary' reloadsink.inputs.desc = ( "Tests the reloading of subject and project summary filesets") reloadworkflow = pe.Workflow('reload_summary_unittest', base_dir=self.work_dir) for node in (reloadsource_per_subject, reloadsource_per_visit, reloadsource_per_dataset, reloadsink): for iterator in ('subject_id', 'visit_id'): reloadworkflow.connect(reloadinputnode, iterator, node, iterator) reloadworkflow.connect(reloadsource_per_subject, 'subject_sink' + PATH_SUFFIX, reloadsink, 'resink1' + PATH_SUFFIX) reloadworkflow.connect(reloadsource_per_visit, 'visit_sink' + PATH_SUFFIX, reloadsink, 'resink2' + PATH_SUFFIX) reloadworkflow.connect(reloadsource_per_dataset, 'analysis_sink' + PATH_SUFFIX, reloadsink, 'resink3' + PATH_SUFFIX) reloadworkflow.run() # Check that the filesets self.assertEqual( filter_scans( os.listdir( self.session_cache( from_analysis=self.SUMMARY_STUDY_NAME))), ['resink1-resink1', 'resink2-resink2', 'resink3-resink3']) # and on XNAT with self._connect() as login: resinked_fileset_names = filter_scans( login.projects[self.project].experiments[self.session_label( from_analysis=self.SUMMARY_STUDY_NAME)].scans.keys()) self.assertEqual(sorted(resinked_fileset_names), ['resink1', 'resink2', 'resink3'])
def test_checksums(self): """ Tests check of downloaded checksums to see if file needs to be redownloaded """ cache_dir = op.join(self.work_dir, 'cache-checksum-check') DATASET_NAME = 'source1' STUDY_NAME = 'checksum_check_analysis' fileset_fname = DATASET_NAME + text_format.extension source_target_path = op.join(self.session_cache(cache_dir), DATASET_NAME + '-' + DATASET_NAME) md5_path = source_target_path + XnatRepo.MD5_SUFFIX source_target_fpath = op.join(source_target_path, fileset_fname) shutil.rmtree(cache_dir, ignore_errors=True) os.makedirs(cache_dir) source_repository = XnatRepo(server=SERVER, cache_dir=cache_dir) source_dataset = source_repository.dataset(self.project) sink_repository = XnatRepo(server=SERVER, cache_dir=cache_dir) sink_dataset = sink_repository.dataset(self.checksum_sink_project, subject_ids=['SUBJECT'], visit_ids=['VISIT'], fill_tree=True) analysis = DummyAnalysis(STUDY_NAME, dataset=sink_dataset, processor=SingleProc('ad'), inputs=[ FilesetFilter(DATASET_NAME, DATASET_NAME, text_format, dataset=source_dataset) ]) source = pe.Node(RepositorySource( [analysis.bound_spec(DATASET_NAME).slice]), name='checksum_check_source') source.inputs.subject_id = self.SUBJECT source.inputs.visit_id = self.VISIT source.run() self.assertTrue(op.exists(md5_path)) self.assertTrue(op.exists(source_target_fpath)) with open(md5_path) as f: checksums = json.load(f) # Stash the downloaded file in a new location and create a dummy # file instead stash_path = source_target_path + '.stash' shutil.move(source_target_path, stash_path) os.mkdir(source_target_path) with open(source_target_fpath, 'w') as f: f.write('dummy') # Run the download, which shouldn't download as the checksums are the # same source.run() with open(source_target_fpath) as f: d = f.read() self.assertEqual(d, 'dummy') # Replace the checksum with a dummy os.remove(md5_path) checksums['.'] = 'dummy_checksum' with open(md5_path, 'w', **JSON_ENCODING) as f: json.dump(checksums, f, indent=2) # Retry the download, which should now download since the checksums # differ source.run() with open(source_target_fpath) as f: d = f.read() with open(op.join(stash_path, fileset_fname)) as f: e = f.read() self.assertEqual(d, e) # Resink the source file and check that the generated MD5 checksum is # stored in identical format DATASET_NAME = 'sink1' dummy_pipeline = analysis.dummy_pipeline() dummy_pipeline.cap() sink = pe.Node(RepositorySink( [analysis.bound_spec(DATASET_NAME).slice], dummy_pipeline), name='checksum_check_sink') sink.inputs.name = 'checksum_check_sink' sink.inputs.desc = "Tests the generation of MD5 checksums" sink.inputs.subject_id = self.SUBJECT sink.inputs.visit_id = self.VISIT sink.inputs.sink1_path = source_target_fpath sink_target_path = op.join( self.session_cache(cache_dir, project=self.checksum_sink_project, subject=(self.SUBJECT), from_analysis=STUDY_NAME), DATASET_NAME + '-' + DATASET_NAME) sink_md5_path = sink_target_path + XnatRepo.MD5_SUFFIX sink.run() with open(md5_path) as f: source_checksums = json.load(f) with open(sink_md5_path) as f: sink_checksums = json.load(f) self.assertEqual( source_checksums, sink_checksums, ("Source checksum ({}) did not equal sink checksum ({})".format( source_checksums, sink_checksums)))
def test_delayed_download(self): """ Tests handling of race conditions where separate processes attempt to cache the same fileset """ cache_dir = op.join(self.work_dir, 'cache-delayed-download') DATASET_NAME = 'source1' target_path = op.join(self.session_cache(cache_dir), DATASET_NAME, DATASET_NAME + text_format.extension) tmp_dir = target_path + '.download' shutil.rmtree(cache_dir, ignore_errors=True) os.makedirs(cache_dir) repository = XnatRepo(server=SERVER, cache_dir=cache_dir) dataset = repository.dataset(self.project) analysis = DummyAnalysis( self.STUDY_NAME, dataset, SingleProc('ad'), inputs=[FilesetFilter(DATASET_NAME, DATASET_NAME, text_format)]) source = pe.Node(RepositorySource( [analysis.bound_spec(DATASET_NAME).slice]), name='delayed_source') source.inputs.subject_id = self.SUBJECT source.inputs.visit_id = self.VISIT result1 = source.run() source1_path = result1.outputs.source1_path self.assertTrue(op.exists(source1_path)) self.assertEqual( source1_path, target_path, "Output file path '{}' not equal to target path '{}'".format( source1_path, target_path)) # Clear cache to start again shutil.rmtree(cache_dir, ignore_errors=True) # Create tmp_dir before running interface, this time should wait for 1 # second, check to see that the session hasn't been created and then # clear it and redownload the fileset. os.makedirs(tmp_dir) source.inputs.race_cond_delay = 1 result2 = source.run() source1_path = result2.outputs.source1_path # Clear cache to start again shutil.rmtree(cache_dir, ignore_errors=True) # Create tmp_dir before running interface, this time should wait for 1 # second, check to see that the session hasn't been created and then # clear it and redownload the fileset. internal_dir = op.join(tmp_dir, 'internal') deleted_tmp_dir = tmp_dir + '.deleted' def simulate_download(): "Simulates a download in a separate process" os.makedirs(internal_dir) time.sleep(5) # Modify a file in the temp dir to make the source download keep # waiting logger.info('Updating simulated download directory') with open(op.join(internal_dir, 'download'), 'a') as f: f.write('downloading') time.sleep(10) # Simulate the finalising of the download by copying the previously # downloaded file into place and deleting the temp dir. logger.info('Finalising simulated download') with open(target_path, 'a') as f: f.write('simulated') shutil.move(tmp_dir, deleted_tmp_dir) source.inputs.race_cond_delay = 10 p = Process(target=simulate_download) p.start() # Start the simulated download in separate process time.sleep(1) source.run() # Run the local download p.join() with open(op.join(deleted_tmp_dir, 'internal', 'download')) as f: d = f.read() self.assertEqual(d, 'downloading') with open(target_path) as f: d = f.read() self.assertEqual(d, 'simulated')
WORK_PATH = os.path.join('/scratch', 'dq13', 'aspree', 'qsm') CACHE_PROJECT_PATH = os.path.join(WORK_PATH, 'project.pkl') try: os.makedirs(WORK_PATH) except OSError as e: if e.errno != errno.EEXIST: raise session_ids_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', 'resources', 'old_swi_coils_remaining.txt') print(session_ids_path) with open(session_ids_path) as f: ids = f.read().split() PROJECT_ID = 'MRH017' filesets = {FilesetFilter('coils', 'swi_coils', zip_format)} visit_ids = visit_ids['MR01'] repository = XnatRepo(cache_dir='/scratch/dq13/xnat_cache3') if args.cache_project: project = repository.project(PROJECT_ID, subject_ids=ids, visit_ids=visit_ids) with open(CACHE_PROJECT_PATH, 'w') as f: pkl.dump(project, f) else: with open(CACHE_PROJECT_PATH) as f: project = pkl.load(f) repository.cache(PROJECT_ID, filesets.values(), subject_ids=ids,