def test_difference_when_many_diffs_h_vs_ss(self):
     header_metadata = SAMFileHeaderMetadata('/seq/123.bam',
                                             samples={'name': set(['S9']), 'accession_number': set(['Acc9'])},
                                             libraries={'internal_id': set(['123'])}, studies={})
     seqscape_metadata = SeqscapeMetadata(samples={'name': set(['S1'])}, libraries={'internal_id': set(['444'])}, studies={})
     result = header_metadata.difference(seqscape_metadata)
     self.assertEqual(result,{'samples': {'name': set(['S9'])}, 'libraries': {'internal_id': set(['123'])}})
 def test_difference_when_diffs_h_vs_i(self):
     irods_metadata = IrodsSeqFileMetadata('/seq/123.bam',
                                           samples={
                                               'name': set(['S1']),
                                               'accession_number':
                                               set(['EGA1']),
                                               'internal_id': set(['1'])
                                           },
                                           libraries={
                                               'name': set(['123']),
                                               'internal_id': set(['123'])
                                           },
                                           studies={
                                               'name':
                                               set(["Crohns disease"]),
                                               'accession_number':
                                               set(['EGAS4']),
                                               'internal_id':
                                               set(['4'])
                                           })
     header_metadata = SAMFileHeaderMetadata(
         '/seq/123.bam',
         samples={
             'name': set(['S100']),
             'accession_number': set(),
             'internal_id': set()
         },
         libraries={'internal_id': set(['123'])},
         studies={})
     result = header_metadata.difference(irods_metadata)
     self.assertDictEqual(result, {'samples': {'name': set(['S100'])}})
 def test_difference_when_there_are_diffs_h_vs_ss2(self):
     header_metadata = SAMFileHeaderMetadata('/seq/123.bam',
                                             samples={'name': set(['S9'])},
                                             libraries={},
                                             studies={})
     seqscape_metadata = SeqscapeMetadata(samples={'name': set(['S1'])},
                                          libraries={},
                                          studies={})
     result = header_metadata.difference(seqscape_metadata)
     self.assertEqual(result, {'samples': {'name': set(['S9'])}})
 def test_difference_when_diffs_h_vs_i(self):
     irods_metadata = IrodsSeqFileMetadata('/seq/123.bam',
                                           samples={'name': set(['S1']), 'accession_number': set(['EGA1']), 'internal_id': set(['1'])},
                                           libraries={'name': set(['123']), 'internal_id': set(['123'])},
                                           studies={'name': set(["Crohns disease"]), 'accession_number': set(['EGAS4']), 'internal_id': set(['4'])})
     header_metadata = SAMFileHeaderMetadata('/seq/123.bam',
                                             samples={'name': set(['S100']), 'accession_number' : set(), 'internal_id': set()},
                                             libraries={'internal_id': set(['123'])}, studies={})
     result = header_metadata.difference(irods_metadata)
     self.assertDictEqual(result, {'samples': {'name': set(['S100'])}})
 def test_fix_metadata_2(self):
     metadata = SAMFileHeaderMetadata(fpath='some_path')
     metadata.samples = {'name': {'sample1', 'sample2', 'sample3'}}
     metadata.libraries = {'internal_id': {-1, '', 123}, 'name': set()}
     expected = SAMFileHeaderMetadata(fpath='some_path')
     expected.samples = {'name': {'sample1', 'sample2', 'sample3'}}
     expected.libraries = {'internal_id': {123}, 'name': set()}
     metadata.fix_metadata()
     self.assertDictEqual(metadata.samples, expected.samples)
     self.assertDictEqual(metadata.libraries, expected.libraries)
 def test_difference_when_there_are_diff_type_ids_h_vs_ss(self):
     header_metadata = SAMFileHeaderMetadata('/seq/123.bam',
                                             samples={
                                                 'name':
                                                 set(['S1']),
                                                 'accession_number':
                                                 set(['Acc1'])
                                             },
                                             libraries={},
                                             studies={})
     seqscape_metadata = SeqscapeMetadata(samples={'name': set(['S1'])},
                                          libraries={},
                                          studies={})
     result = header_metadata.difference(seqscape_metadata)
     self.assertEqual(result, {})
 def test_fetch(self):
     fpath = 'test_data/A.bam'
     result = SAMFileHeaderMetadataProvider.fetch_metadata(fpath)
     expected = SAMFileHeaderMetadata(
         fpath='test_data/A.bam',
         fname='A.bam',
         samples={
             'name': set(['A_J']),
             'internal_id': set(),
             'accession_number': set()
         },
         platforms={'SLX'},
         libraries={
             'name':
             set([
                 'A_J_SLX_500_HC_2', 'A_J_SLX_200_NOPCR_3',
                 'A_J_SLX_200_NOPCR_2', 'A_J_SLX_200_NOPCR_2',
                 'A_J_SLX_200_NOPCR_3', 'A_J_SLX_300_DSS_1',
                 'A_J_SLX_300_DSS_2', 'A_J_SLX_300_DSS_3',
                 'A_J_SLX_500_DSS_1', 'A_J_SLX_500_DSS_2',
                 'A_J_SLX_500_DSS_3'
             ]),
             'internal_id':
             set(),
             'accession_number':
             set()
         })
     self.assertEqual(result, expected)
 def test_fix_metadata_1(self):
     metadata = SAMFileHeaderMetadata(fpath='some_path')
     metadata.samples = {'name': {'sample1', 'sample2', 'sample3', None}}
     metadata.libraries = {'internal_id': {-1, '', 123}, 'name': {'', None}}
     expected = SAMFileHeaderMetadata(fpath='some_path')
     expected.samples = {'name': {'sample1', 'sample2', 'sample3'}}
     expected.libraries = {'internal_id': {123}, 'name': set()}
     metadata.fix_metadata()
     self.assertDictEqual(metadata.samples, expected.samples)
     self.assertDictEqual(metadata.libraries, expected.libraries)
 def test_check_for_invalid_ids_4(self):
     multi_ids = {
         'name': {'Ana', ''},
         'id': set(),
         'accession_number': {'EGA1', None}
     }
     result = SAMFileHeaderMetadata._check_for_invalid_ids(
         multi_ids, 'sample')
     self.assertEqual(result.result, RESULT.FAILURE)
 def test_difference_when_many_diffs_h_vs_ss(self):
     header_metadata = SAMFileHeaderMetadata(
         '/seq/123.bam',
         samples={
             'name': set(['S9']),
             'accession_number': set(['Acc9'])
         },
         libraries={'internal_id': set(['123'])},
         studies={})
     seqscape_metadata = SeqscapeMetadata(
         samples={'name': set(['S1'])},
         libraries={'internal_id': set(['444'])},
         studies={})
     result = header_metadata.difference(seqscape_metadata)
     self.assertEqual(
         result, {
             'samples': {
                 'name': set(['S9'])
             },
             'libraries': {
                 'internal_id': set(['123'])
             }
         })
Ejemplo n.º 11
0
 def fetch_metadata(cls, fpath, irods=False):
     if irods:
         header_as_text = IrodsSamFileHeaderExtractor.extract(fpath)
     else:
         header_as_text = LustreSamFileHeaderExtractor.extract(fpath)
     raw_header = SAMFileHeaderParser.parse(header_as_text)
     rg_tags_parsed = SAMFileRGTagParser.parse(raw_header.rg_tags)
     samples = EntityIdentifier.separate_identifiers_by_type(
         rg_tags_parsed.samples)
     libraries = EntityIdentifier.separate_identifiers_by_type(
         rg_tags_parsed.libraries)
     return SAMFileHeaderMetadata(fpath=fpath,
                                  samples=samples,
                                  libraries=libraries,
                                  platforms=rg_tags_parsed.platforms)
Ejemplo n.º 12
0
    def test_mdata_from_diff_srcs_when_different_id_types(self):
        irods_metadata = IrodsSeqFileMetadata('/seq/123.bam',
                                              samples={'name': set(['S1']), 'accession_number': set(['EGA1']),
                                                       'internal_id': set()},
                                              libraries={}, studies={})
        header_metadata = SAMFileHeaderMetadata('/seq/123.bam', samples={'name': set(['S1'])}, libraries={},
                                                studies={})
        seqscape_metadata = SeqscapeMetadata(samples={'name': set(['S1'])}, libraries={}, studies={})
        issues_dict = defaultdict(list)
        FileMetadataComparison.check_metadata_across_different_sources({'/seq/213.bam': irods_metadata},
                                                                       {'/seq/213.bam': header_metadata},
                                                                       {'/seq/213.bam': seqscape_metadata},
                                                                       issues_dict)
        check_results = issues_dict['/seq/213.bam']
        self.assertEqual(4, len(check_results))

        results = {c.result for c in check_results}
        self.assertSetEqual(results, {RESULT.SUCCESS})
 def test_difference_when_no_diffs_i_vs_h(self):
     irods_metadata = IrodsSeqFileMetadata('/seq/123.bam',
                                           samples={
                                               'name': set(['S1']),
                                               'accession_number': set(),
                                               'internal_id': set()
                                           },
                                           libraries={},
                                           studies={})
     header_metadata = SAMFileHeaderMetadata('/seq/123.bam',
                                             samples={
                                                 'name': set(['S1']),
                                                 'accession_number': set(),
                                                 'internal_id': set()
                                             },
                                             libraries={},
                                             studies={})
     result = irods_metadata.difference(header_metadata)
     self.assertDictEqual(result, {})
 def test_check_for_invalid_ids_3(self):
     multi_ids = {'name': {'Ana'}}
     result = SAMFileHeaderMetadata._check_for_invalid_ids(
         multi_ids, 'sample')
     self.assertEqual(result.result, RESULT.SUCCESS)
 def test_check_for_invalid_ids_2(self):
     multi_ids = {'name': {'Ana', ''}, 'ids': {-1, 0}}
     result = SAMFileHeaderMetadata._check_for_invalid_ids(
         multi_ids, 'sample')
     self.assertEqual(result.result, RESULT.FAILURE)
 def test_filter_out_invalid_ids_4(self):
     ids = {None, 123}
     expected_result = {123}
     actual_result = SAMFileHeaderMetadata._filter_out_invalid_ids(ids)
     self.assertSetEqual(expected_result, actual_result)
Ejemplo n.º 17
0
 def test_is_id_valid_6(self):
     id = "123"
     self.assertTrue(SAMFileHeaderMetadata._is_id_valid(id))
 def test_difference_when_there_are_diffs_h_vs_ss2(self):
     header_metadata = SAMFileHeaderMetadata('/seq/123.bam', samples={'name': set(['S9'])}, libraries={}, studies={})
     seqscape_metadata = SeqscapeMetadata(samples={'name': set(['S1'])}, libraries={}, studies={})
     result = header_metadata.difference(seqscape_metadata)
     self.assertEqual(result,{'samples': {'name': set(['S9'])}})
Ejemplo n.º 19
0
 def test_is_id_valid_5(self):
     id = None
     self.assertFalse(SAMFileHeaderMetadata._is_id_valid(id))
Ejemplo n.º 20
0
 def test_check_for_invalid_ids_2(self):
     multi_ids = {'name': {'Ana', ''}, 'ids': {-1, 0}}
     result = SAMFileHeaderMetadata._check_for_invalid_ids(multi_ids, 'sample')
     self.assertEqual(result.result, RESULT.FAILURE)
Ejemplo n.º 21
0
 def test_check_for_invalid_ids_3(self):
     multi_ids = {'name': {'Ana'}}
     result = SAMFileHeaderMetadata._check_for_invalid_ids(multi_ids, 'sample')
     self.assertEqual(result.result, RESULT.SUCCESS)
Ejemplo n.º 22
0
 def test_check_for_invalid_ids_4(self):
     multi_ids = {'name': {'Ana', ''}, 'id': set(), 'accession_number': {'EGA1', None}}
     result = SAMFileHeaderMetadata._check_for_invalid_ids(multi_ids, 'sample')
     self.assertEqual(result.result, RESULT.FAILURE)
Ejemplo n.º 23
0
 def test_filter_out_invalid_ids_4(self):
     ids = {None, 123}
     expected_result = {123}
     actual_result = SAMFileHeaderMetadata._filter_out_invalid_ids(ids)
     self.assertSetEqual(expected_result, actual_result)
 def test_is_id_valid_2(self):
     id = 'undefined'
     self.assertFalse(SAMFileHeaderMetadata._is_id_valid(id))
 def test_difference_when_there_are_diff_type_ids_h_vs_ss(self):
     header_metadata = SAMFileHeaderMetadata('/seq/123.bam', samples={'name': set(['S1']), 'accession_number': set(['Acc1'])}, libraries={}, studies={})
     seqscape_metadata = SeqscapeMetadata(samples={'name': set(['S1'])}, libraries={}, studies={})
     result = header_metadata.difference(seqscape_metadata)
     self.assertEqual(result, {})
 def test_is_id_valid_5(self):
     id = None
     self.assertFalse(SAMFileHeaderMetadata._is_id_valid(id))
Ejemplo n.º 27
0
 def test_is_id_valid_2(self):
     id = 'undefined'
     self.assertFalse(SAMFileHeaderMetadata._is_id_valid(id))
 def test_is_id_valid_6(self):
     id = "123"
     self.assertTrue(SAMFileHeaderMetadata._is_id_valid(id))