def test_2_no_seqid(self): csv_path = self.csv_files_dir.joinpath("test_2_no_seqid.csv") csv_content = pathtools.get_file_content_stringio(csv_path) with self.assertRaises(Exception) as err: _ = msa.MetadataCSV(csv_content, csv_path) self.assertEqual(f"No \'seqid\' column in metadata csv.", str(err.exception))
def setUp(self): metadata_path = Path(__file__).parent.joinpath( "../seq_metadata.csv").resolve() self.metadatacsv = msa.MetadataCSV( pathtools.get_file_content_stringio(metadata_path), metadata_path) self.po_files_dir = Path(__file__).parent.joinpath( "po_files").resolve()
def test_3_empty_file(self): csv_path = self.csv_files_dir.joinpath("test_3_empty_file.csv") csv_content = pathtools.get_file_content_stringio(csv_path) with self.assertRaises(Exception) as err: _ = msa.MetadataCSV(csv_content, csv_path) self.assertEqual(f"Empty csv file.", str(err.exception))
def setUp(self): metadata_path = Path(__file__).parent.joinpath( "../seq_metadata.csv").resolve() self.metadatacsv = msa.MetadataCSV( pathtools.get_file_content_stringio(metadata_path), metadata_path) self.maf_files_dir = Path(__file__).parent.joinpath( "maf_files_with_gaps").resolve() self.missing_n = missings.MissingBase()
def test_7_not_unique_seqids(self): csv_path = self.csv_files_dir.joinpath("test_7_not_unique_seqids.csv") csv_content = pathtools.get_file_content_stringio(csv_path) with self.assertRaises(Exception) as err: _ = msa.MetadataCSV(csv_content, csv_path) self.assertEqual( "Repeated values in seqid column in metadata file. Make them unique.", str(err.exception))
def setUp(self): metadata_path = Path(__file__).parent.joinpath( "../seq_metadata.csv").resolve() self.metadatacsv = msa.MetadataCSV( pathtools.get_file_content_stringio(metadata_path), metadata_path) self.maf_files_dir = Path(__file__).parent.joinpath( "maf_files_with_gaps").resolve() self.fasta_provider = DAGMaf2PoagraphFakeFastaProviderTests.FakeFastaProvider( )
def test_6_incorrect_commas_number(self): csv_path = self.csv_files_dir.joinpath( "test_6_incorrect_commas_number.csv") csv_content = pathtools.get_file_content_stringio(csv_path) with self.assertRaises(Exception) as err: _ = msa.MetadataCSV(csv_content, csv_path) self.assertEqual( "CSV metadata error. Different number of columns in line 0 than in header line.", str(err.exception))
def test_9_get_seqids(self): metadata_path = self.csv_files_dir.joinpath("test_1_correct.csv") csv_content = pathtools.get_file_content_stringio(metadata_path) expected_seqids = [ msa.SequenceID('s1'), msa.SequenceID('s2'), msa.SequenceID('s3') ] m = msa.MetadataCSV(csv_content, metadata_path) actual_seqids = m.get_all_sequences_ids() self.assertEqual(expected_seqids, actual_seqids)
def test_10_metadata_feed_to_alignment_from_csv(self, test_name, maf_name, csv_name, po_name, expected_metadata): maf_path = self.alignment_files_dir.joinpath(maf_name) csv_path = self.csv_files_dir.joinpath(csv_name) po_path = self.alignment_files_dir.joinpath(po_name) poagraph, _ = builder.build_from_dagmaf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), self.fasta_provider, msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path), csv_path)) actual_metadata = { seq_id: seq.seqmetadata for seq_id, seq in poagraph.sequences.items() } self.assertEqual(expected_metadata, actual_metadata) poagraph = builder.build_from_maf( msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path), msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path), csv_path)) actual_metadata = { seq_id: seq.seqmetadata for seq_id, seq in poagraph.sequences.items() } self.assertEqual(expected_metadata, actual_metadata) poagraph = builder.build_from_po( msa.Po(pathtools.get_file_content_stringio(po_path), maf_path), msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path), csv_path)) actual_metadata = { seq_id: seq.seqmetadata for seq_id, seq in poagraph.sequences.items() } self.assertEqual(expected_metadata, actual_metadata)
def test_4_seqid_is_last(self): metadata_path = self.csv_files_dir.joinpath("test_4_seqid_is_last.csv") csv_content = pathtools.get_file_content_stringio(metadata_path) expected_metadata = { msa.SequenceID('s1'): { 'name': 'sequence1', 'group': 'A' }, msa.SequenceID('s2'): { 'name': 'sequence2', 'group': 'B' }, msa.SequenceID('s3'): { 'name': 'sequence3', 'group': 'B' } } m = msa.MetadataCSV(csv_content, metadata_path) actual_metadata = m.metadata self.assertEqual(expected_metadata, actual_metadata)
def setUp(self): metadata_path = Path(__file__).parent.joinpath("../seq_metadata.csv").resolve() self.metadatacsv = msa.MetadataCSV(pathtools.get_file_content_stringio(metadata_path), metadata_path) self.maf_files_dir = Path(__file__).parent.joinpath("maf_files_with_cycles_or_reversion").resolve() self.fasta_provider = missings.ConstBaseProvider(missings.MissingBase())