def test_msa_to_stockholm_extensive(self): fp = get_data_path('stockholm_all_data_types') msa = TabularMSA([DNA('GAGGCCATGCCCAGGTGAAG', metadata=OrderedDict([('DT', 'February 1, 2016'), ('NM', 'Unknown')])), DNA('ACCTGAGCCACAGTAGAAGT'), DNA('CCCTTCGCTGGAAATGTATG', metadata={'DT': 'Unknown'}, positional_metadata=OrderedDict([('AS', list('CCGAAAGT' 'CGTTCGA' 'AAATG')), ('SS', list('GGCGAGTC' 'GTTCGAGC' 'TGG' 'C'))]))], metadata=OrderedDict([('NM', 'Kestrel Gorlick'), ('DT', 'February 11, 2016'), ('FN', 'Writer test file')]), positional_metadata=OrderedDict([('AS_cons', list('CGTTCGTTCTAAC' 'AATTCCA')), ('SS_cons', list('GGCGCTACGACCT' 'ACGACCG'))]), index=['seq1', 'seq2', 'seq3']) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_rn_not_list_of_refs_error(self): msa = TabularMSA([], metadata={'RN': '1'}) with self.assertRaisesRegex( StockholmFormatError, r"Expected 'RN'.*list of reference" ".*got '1'"): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_msa_to_stockholm_multiple_references(self): fp = get_data_path('stockholm_multiple_references') msa = TabularMSA( [], metadata={'RN': [OrderedDict([('RM', '123456789'), ('RT', 'Title 1'), ('RA', 'Author 1'), ('RL', 'Location 1'), ('RC', 'Comment 1')]), OrderedDict([('RM', '987654321'), ('RT', 'Title 2'), ('RA', 'Author 2'), ('RL', 'Location 2'), ('RC', 'Comment 2')]), OrderedDict([('RM', '132465879'), ('RT', 'Title 3'), ('RA', 'Author 3'), ('RL', 'Location 3'), ('RC', 'Comment 3')])]}) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_rn_not_list_of_refs_error(self): msa = TabularMSA([], metadata={'RN': '1'}) with self.assertRaisesRegex(StockholmFormatError, r"Expected 'RN'.*list of reference" ".*got '1'"): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_unoriginal_index_error(self): msa = TabularMSA([DNA('ATCGCCAGCT'), DNA('TTGTGCTGGC')], index=['seq1', 'seq1']) with self.assertRaisesRegex(StockholmFormatError, 'index labels must be unique.'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_unoriginal_index_error(self): msa = TabularMSA( [DNA('ATCGCCAGCT'), DNA('TTGTGCTGGC')], index=['seq1', 'seq1']) with self.assertRaisesRegex(StockholmFormatError, r'index labels must be unique.'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_msa_to_stockholm_extensive(self): fp = get_data_path('stockholm_all_data_types') msa = TabularMSA([ DNA('GAGGCCATGCCCAGGTGAAG', metadata=OrderedDict([('DT', 'February 1, 2016'), ('NM', 'Unknown')])), DNA('ACCTGAGCCACAGTAGAAGT'), DNA('CCCTTCGCTGGAAATGTATG', metadata={'DT': 'Unknown'}, positional_metadata=OrderedDict( [('AS', list('CCGAAAGT' 'CGTTCGA' 'AAATG')), ('SS', list('GGCGAGTC' 'GTTCGAGC' 'TGG' 'C'))])) ], metadata=OrderedDict([('NM', 'Kestrel Gorlick'), ('DT', 'February 11, 2016'), ('FN', 'Writer test file')]), positional_metadata=OrderedDict([ ('AS_cons', list('CGTTCGTTCTAAC' 'AATTCCA')), ('SS_cons', list('GGCGCTACGACCT' 'ACGACCG')) ]), index=['seq1', 'seq2', 'seq3']) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_multiple_references(self): fp = get_data_path('stockholm_multiple_references') msa = TabularMSA( [], metadata={'RN': [OrderedDict([('RM', '123456789'), ('RT', 'Title 1'), ('RA', 'Author 1'), ('RL', 'Location 1'), ('RC', 'Comment 1')]), OrderedDict([('RM', '987654321'), ('RT', 'Title 2'), ('RA', 'Author 2'), ('RL', 'Location 2'), ('RC', 'Comment 2')]), OrderedDict([('RM', '132465879'), ('RT', 'Title 3'), ('RA', 'Author 3'), ('RL', 'Location 3'), ('RC', 'Comment 3')])]}) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_handles_missing_metadata_efficiently(self): msa = TabularMSA([DNA('ACTG'), DNA('GTCA')], index=['seq1', 'seq2']) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) self.assertIsNone(msa._metadata) self.assertIsNone(msa._positional_metadata) self.assertIsNone(msa[0]._metadata) self.assertIsNone(msa[0]._positional_metadata)
def test_invalid_reference_tag_error(self): msa = TabularMSA([], metadata={'RN': [OrderedDict([('RL', 'Flagstaff'), ('foo', 'bar')])]}) with self.assertRaisesRegex(StockholmFormatError, "Invalid reference.*foo' found in.*1.*Vali" "d reference tags are:"): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_invalid_reference_tag_error(self): msa = TabularMSA([], metadata={'RN': [OrderedDict([('RL', 'Flagstaff'), ('foo', 'bar')])]}) with self.assertRaisesRegex(StockholmFormatError, r"Invalid reference.*foo' found " "in.*1.*Valid reference tags are:"): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_msa_to_stockholm_single_tree(self): fp = get_data_path('stockholm_single_tree_without_id') msa = TabularMSA([], metadata=OrderedDict([('NH', 'ABCD')])) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_round_trip_empty(self): fp = get_data_path('stockholm_no_data') msa = _stockholm_to_tabular_msa(fp, constructor=Protein) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_round_trip_empty(self): fp = get_data_path('stockholm_no_data') msa = _stockholm_to_tabular_msa(fp, constructor=Protein) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_empty(self): fp = get_data_path('stockholm_no_data') msa = TabularMSA([]) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_round_trip_nonstring_index_values(self): fp = get_data_path('stockholm_nonstring_labels') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_round_trip_nonstring_index_values(self): fp = get_data_path('stockholm_nonstring_labels') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_minimal(self): fp = get_data_path('stockholm_minimal') msa = TabularMSA([DNA('TGTGTCGCAGTTGTCGTTTG')], index=['0235244']) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_rn_data_not_in_dict_error(self): msa = TabularMSA([], metadata={'RN': [OrderedDict([('RL', 'Flagstaff')]), 'Incorrect Item']}) with self.assertRaisesRegex(StockholmFormatError, "Expected reference information.*stored" " as a dictionary, found.*2 stored as " "'str'"): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_msa_to_stockholm_single_tree_as_dict(self): fp = get_data_path('stockholm_single_tree_with_id') msa = TabularMSA([], metadata={'NH': {'tree1': 'ABCD'}}) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_empty(self): fp = get_data_path('stockholm_no_data') msa = TabularMSA([]) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_minimal(self): fp = get_data_path('stockholm_minimal') msa = TabularMSA([DNA('TGTGTCGCAGTTGTCGTTTG')], index=['0235244']) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_rn_data_not_in_dict_error(self): msa = TabularMSA([], metadata={'RN': [OrderedDict([('RL', 'Flagstaff')]), 'Incorrect Item']}) with self.assertRaisesRegex(StockholmFormatError, r"Expected reference information.*stored" " as a dictionary, found.*2 stored as " "'str'"): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_msa_to_stockholm_single_tree(self): fp = get_data_path('stockholm_single_tree_without_id') msa = TabularMSA([], metadata=OrderedDict([('NH', 'ABCD')])) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_single_tree_as_dict(self): fp = get_data_path('stockholm_single_tree_with_id') msa = TabularMSA([], metadata={'NH': {'tree1': 'ABCD'}}) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_data_only(self): fp = get_data_path('stockholm_data_only') msa = TabularMSA([RNA('ACUCCGACAUGCUCC'), RNA('UAGUGCCGAACGCUG'), RNA('GUGUGGGCGUGAUUC')], index=['seq1', 'seq2', 'seq3']) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_data_only(self): fp = get_data_path('stockholm_data_only') msa = TabularMSA([RNA('ACUCCGACAUGCUCC'), RNA('UAGUGCCGAACGCUG'), RNA('GUGUGGGCGUGAUUC')], index=['seq1', 'seq2', 'seq3']) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_gc_wrong_dataframe_item_length_error(self): seq1 = list('GAGCAAGCCACTAGA') seq1.append('GG') pos_metadata_dataframe = pd.DataFrame({'AC': seq1, 'SS': list('TCCTTGAACTACCCGA'), 'AS': list('TCAGCTCTGCAGCGTT')}) msa = TabularMSA([DNA('TCCTTGAACTACCCGA')], positional_metadata=pos_metadata_dataframe) with self.assertRaisesRegex(StockholmFormatError, 'Multiple sequence alignment positional ' 'metadata.*must contain a single character' '.*Found value\(s\) in column AC'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_gc_wrong_dataframe_item_length_error(self): seq1 = list('GAGCAAGCCACTAGA') seq1.append('GG') pos_metadata_dataframe = pd.DataFrame({'AC': seq1, 'SS': list('TCCTTGAACTACCCGA'), 'AS': list('TCAGCTCTGCAGCGTT')}) msa = TabularMSA([DNA('TCCTTGAACTACCCGA')], positional_metadata=pos_metadata_dataframe) with self.assertRaisesRegex(StockholmFormatError, 'Multiple sequence alignment positional ' 'metadata.*must contain a single character' '.*Found value\(s\) in column AC'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_msa_to_stockholm_multiple_trees(self): fp = get_data_path('stockholm_multiple_trees') msa = TabularMSA([], metadata=OrderedDict([('NH', OrderedDict([('tree1', 'ABCD'), ('tree2', 'EFGH'), ('tree3', 'IJKL') ]))])) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_single_reference(self): fp = get_data_path('stockholm_single_reference') msa = TabularMSA( [], metadata={'RN': [OrderedDict([('RM', '123456789'), ('RT', 'A Title'), ('RA', 'The Author'), ('RL', 'A Location'), ('RC', 'Comment')])]}) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_nonstring_values(self): fp = get_data_path('stockholm_nonstring_labels') msa = TabularMSA([DNA('ACTG', metadata=OrderedDict([(8, 123)]), positional_metadata=OrderedDict([(1.0, [1, 2, 3, 4])]) )], metadata=OrderedDict([(1.3, 2857)]), positional_metadata=OrderedDict([(25, [4, 3, 2, 1])]), index=[11214]) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_nonstring_values(self): fp = get_data_path('stockholm_nonstring_labels') msa = TabularMSA([DNA('ACTG', metadata=OrderedDict([(8, 123)]), positional_metadata=OrderedDict([(1.0, [1, 2, 3, 4])]) )], metadata=OrderedDict([(1.3, 2857)]), positional_metadata=OrderedDict([(25, [4, 3, 2, 1])]), index=[11214]) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_multiple_trees(self): fp = get_data_path('stockholm_multiple_trees') msa = TabularMSA([], metadata=OrderedDict([('NH', OrderedDict([('tree1', 'ABCD'), ('tree2', 'EFGH'), ('tree3', 'IJKL')]))])) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_msa_to_stockholm_single_reference(self): fp = get_data_path('stockholm_single_reference') msa = TabularMSA( [], metadata={'RN': [OrderedDict([('RM', '123456789'), ('RT', 'A Title'), ('RA', 'The Author'), ('RL', 'A Location'), ('RC', 'Comment')])]}) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_unoriginal_gc_feature_names_error(self): pos_metadata_dataframe = pd.DataFrame.from_items([ ('AC', list('GAGCAAGCCACTA' 'GA')), ('SS', list('TCCTTGAACTACC' 'CG')), ('SS', list('TCAGCTCTGCAGC' 'GT')), ('AC', list('GTCAGGCGCTCGG' 'TG')) ]) msa = TabularMSA([DNA('CCCCTGCTTTCGTAG')], positional_metadata=pos_metadata_dataframe) with self.assertRaisesRegex( StockholmFormatError, 'Multiple sequence alignment positional ' 'metadata.*must be unique. Found 2 ' 'duplicate'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_gr_wrong_dataframe_item_length_error(self): seq1 = list('GAGCAAGCCACTAGA') seq1.append('GG') pos_metadata_dataframe = pd.DataFrame({ 'AC': seq1, 'SS': list('TCCTTGAACTACCCGA'), 'AS': list('TCAGCTCTGCAGCGTT') }) msa = TabularMSA([ DNA('TCCTTGAACTACCCGA', positional_metadata=pos_metadata_dataframe) ]) with six.assertRaisesRegex( self, StockholmFormatError, 'Sequence-specific positional metadata.*' 'must contain a single character.*Found ' 'value\(s\) in column AC'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_unoriginal_gr_feature_names_error(self): pos_metadata_dataframe = pd.DataFrame.from_items([ ('AC', list('GAGCAAGCCACTA' 'GA')), ('SS', list('TCCTTGAACTACC' 'CG')), ('AS', list('TCAGCTCTGCAGC' 'GT')), ('SS', list('GTCAGGCGCTCGG' 'TG')) ]) msa = TabularMSA([ DNA('CGTCAATCTCGAACT', positional_metadata=pos_metadata_dataframe) ], index=['seq1']) with self.assertRaisesRegex( StockholmFormatError, 'Sequence-specific positional metadata.*' 'must be unique. Found 1 duplicate'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_unoriginal_gc_feature_names_error(self): pos_metadata_dataframe = pd.DataFrame( [ list('GAGCAAGCCACTAGA'), list('TCCTTGAACTACCCG'), list('TCAGCTCTGCAGCGT'), list('GTCAGGCGCTCGGTG') ], index=['AC', 'SS', 'SS', 'AC'] ).T msa = TabularMSA([DNA('CCCCTGCTTTCGTAG')], positional_metadata=pos_metadata_dataframe) with self.assertRaisesRegex(StockholmFormatError, r'Multiple sequence alignment positional ' 'metadata.*must be unique. Found 2 ' 'duplicate'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)
def test_unoriginal_gr_feature_names_error(self): pos_metadata_dataframe = pd.DataFrame.from_items([('AC', list('GAGCAAGCCACTA' 'GA')), ('SS', list('TCCTTGAACTACC' 'CG')), ('AS', list('TCAGCTCTGCAGC' 'GT')), ('SS', list('GTCAGGCGCTCGG' 'TG'))]) msa = TabularMSA([DNA('CGTCAATCTCGAACT', positional_metadata=pos_metadata_dataframe)], index=['seq1']) with self.assertRaisesRegex(StockholmFormatError, 'Sequence-specific positional metadata.*' 'must be unique. Found 1 duplicate'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh)