def test_duplicate_sequence_names_error(self): fp = get_data_path('stockholm_duplicate_sequence_names') with self.assertRaisesRegex( StockholmFormatError, 'duplicate sequence name.*ASR132.*supported by the reader.'): _stockholm_to_tabular_msa(fp, constructor=RNA)
def test_stockholm_duplicate_tree_id_error(self): fp = get_data_path('stockholm_duplicate_tree_ids') with self.assertRaisesRegex(StockholmFormatError, 'Tree.*tree1.*in file.'): _stockholm_to_tabular_msa(fp, constructor=DNA)
def test_nonexistent_gs_error(self): fp = get_data_path('stockholm_invalid_nonexistent_gs') with self.assertRaisesRegex(StockholmFormatError, 'GS or GR.*nonexistent sequence.*AC14.'): _stockholm_to_tabular_msa(fp, constructor=RNA)
def test_stockholm_multi_line_tree_no_id(self): fp = get_data_path('stockholm_multi_line_tree_no_id') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'NH': 'ABCDEFGH'}) self.assertEqual(msa, exp)
def test_stockholm_single_tree_with_id(self): fp = get_data_path('stockholm_single_tree_with_id') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'NH': {'tree1': 'ABCD'}}) self.assertEqual(msa, exp)
def test_missing_header_error(self): fp = get_data_path('stockholm_missing_header') with self.assertRaisesRegex(StockholmFormatError, 'File missing.*header'): _stockholm_to_tabular_msa(fp, constructor=DNA)
def test_data_type_error(self): fp = get_data_path('stockholm_invalid_data_type') with self.assertRaisesRegex(StockholmFormatError, "Unrecognized.*'#=GZ"): _stockholm_to_tabular_msa(fp, constructor=DNA)
def test_unsupported_constructor_error(self): fp = get_data_path('empty') with self.assertRaisesRegex(TypeError, r'`constructor`.*`GrammaredSequence`.'): _stockholm_to_tabular_msa(fp, constructor=TabularMSA)
def test_stockholm_extensive_mixed(self): fp = get_data_path('stockholm_extensive_mixed') msa = _stockholm_to_tabular_msa(fp, constructor=Protein) exp = TabularMSA( [ Protein( 'MTCRAQLIAVPRASSLAE..AIACAQKM....' 'RVSRVPVYERS', positional_metadata={ 'SA': list('9998877564' '53524252..' '55152525..' '..36463774' '777') }), Protein( 'EVMLTDIPRLHINDPIMK..GFGMVINN....' '..GFVCVENDE', metadata={'OS': 'Bacillus subtilis'}, positional_metadata={ 'SS': list('CCCCCCCHHHH' 'HHHHHHH..HE' 'EEEEEE....E' 'EEEEEE' 'EEEH') }), Protein( 'EVMLTDIPRLHINDPIMK..GFGMVINN...' '...GFVCVENDE', positional_metadata={ 'AS': list('___________' '_____*_____' '___________' '________' '__'), 'IN': list('___________' '_1_________' '_____2_____' '_____0_' '___') }) ], metadata={ 'ID': 'CBS', 'AC': 'PF00571', 'AU': 'Bateman A', 'SQ': '67' }, positional_metadata={ 'SS_cons': list('CCCCCHHHHHHHH' 'HHHHH..EEEEEE' 'EE....EEEEEEE' 'EEEH') }, index=['O83071/192-246', 'O31698/88-139', 'O31699/88-139']) self.assertEqual(msa, exp)
def test_differing_data_lengths_gc_error(self): fp = get_data_path('stockholm_differing_gc_data_length') with self.assertRaisesRegex(ValueError, r'Number.*12.*(10).'): _stockholm_to_tabular_msa(fp, constructor=RNA)
def test_no_constructor_error(self): fp = get_data_path('empty') with self.assertRaisesRegex(ValueError, r'Must provide.*parameter.'): _stockholm_to_tabular_msa(fp)
def test_differing_sequence_lengths_error(self): fp = get_data_path('stockholm_differing_seq_lengths') with self.assertRaisesRegex(ValueError, r'Each sequence.*11 != 10'): _stockholm_to_tabular_msa(fp, constructor=RNA)
def test_stockholm_missing_reference_number_error(self): fp = get_data_path('stockholm_missing_rn_tag') with self.assertRaisesRegex(StockholmFormatError, r"Expected 'RN'.*'RL' tag."): _stockholm_to_tabular_msa(fp, constructor=DNA)
def test_stockholm_multiple_multi_line_trees(self): fp = get_data_path('stockholm_multiple_multi_line_trees') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'NH': {'tree1': 'ABCDEFGH', 'tree2': 'IJKLMNOP'}}) self.assertEqual(msa, exp)
def test_duplicate_gc_error(self): fp = get_data_path('stockholm_duplicate_gc') with self.assertRaisesRegex( StockholmFormatError, 'Found duplicate GC.*SS_cons.*supported ' 'by the reader.'): _stockholm_to_tabular_msa(fp, constructor=DNA)
def test_stockholm_minimal(self): fp = get_data_path('stockholm_minimal') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([DNA('TGTGTCGCAGTTGTCGTTTG')], index=['0235244']) self.assertEqual(msa, exp)
def test_empty_file_error(self): fp = get_data_path('empty') with self.assertRaisesRegex(StockholmFormatError, 'File is empty.'): _stockholm_to_tabular_msa(fp, constructor=RNA)
def test_stockholm_no_data(self): fp = get_data_path('stockholm_no_data') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([]) self.assertEqual(msa, exp)
def test_missing_footer_error(self): fp = get_data_path('stockholm_missing_footer') with self.assertRaisesRegex(StockholmFormatError, 'Final line.*only "//".'): _stockholm_to_tabular_msa(fp, constructor=DNA)
def test_stockholm_with_whitespace_only_lines(self): fp = get_data_path('stockholm_whitespace_only_lines') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'AL': 'ABCD', 'NM': '1234'}) self.assertEqual(msa, exp)
def test_malformed_gc_line_error(self): fp = get_data_path('stockholm_malformed_gc_line') with self.assertRaisesRegex(StockholmFormatError, 'Line contains 2.*must contain.*3.'): _stockholm_to_tabular_msa(fp, constructor=DNA)
def test_stockholm_metadata_only(self): fp = get_data_path('stockholm_metadata_only') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'NM': 'Kestrel Gorlick', 'DT': 'February 5th, 2016'}) self.assertEqual(msa, exp)