def test_2_no_seqid(self):
     csv_path = self.csv_files_dir.joinpath("test_2_no_seqid.csv")
     csv_content = pathtools.get_file_content_stringio(csv_path)
     with self.assertRaises(Exception) as err:
         _ = msa.MetadataCSV(csv_content, csv_path)
     self.assertEqual(f"No \'seqid\' column in metadata csv.",
                      str(err.exception))
 def setUp(self):
     metadata_path = Path(__file__).parent.joinpath(
         "../seq_metadata.csv").resolve()
     self.metadatacsv = msa.MetadataCSV(
         pathtools.get_file_content_stringio(metadata_path), metadata_path)
     self.po_files_dir = Path(__file__).parent.joinpath(
         "po_files").resolve()
    def test_3_empty_file(self):
        csv_path = self.csv_files_dir.joinpath("test_3_empty_file.csv")

        csv_content = pathtools.get_file_content_stringio(csv_path)
        with self.assertRaises(Exception) as err:
            _ = msa.MetadataCSV(csv_content, csv_path)
        self.assertEqual(f"Empty csv file.", str(err.exception))
 def setUp(self):
     metadata_path = Path(__file__).parent.joinpath(
         "../seq_metadata.csv").resolve()
     self.metadatacsv = msa.MetadataCSV(
         pathtools.get_file_content_stringio(metadata_path), metadata_path)
     self.maf_files_dir = Path(__file__).parent.joinpath(
         "maf_files_with_gaps").resolve()
     self.missing_n = missings.MissingBase()
    def test_7_not_unique_seqids(self):
        csv_path = self.csv_files_dir.joinpath("test_7_not_unique_seqids.csv")

        csv_content = pathtools.get_file_content_stringio(csv_path)
        with self.assertRaises(Exception) as err:
            _ = msa.MetadataCSV(csv_content, csv_path)
        self.assertEqual(
            "Repeated values in seqid column in metadata file. Make them unique.",
            str(err.exception))
Example #6
0
 def setUp(self):
     metadata_path = Path(__file__).parent.joinpath(
         "../seq_metadata.csv").resolve()
     self.metadatacsv = msa.MetadataCSV(
         pathtools.get_file_content_stringio(metadata_path), metadata_path)
     self.maf_files_dir = Path(__file__).parent.joinpath(
         "maf_files_with_gaps").resolve()
     self.fasta_provider = DAGMaf2PoagraphFakeFastaProviderTests.FakeFastaProvider(
     )
    def test_6_incorrect_commas_number(self):
        csv_path = self.csv_files_dir.joinpath(
            "test_6_incorrect_commas_number.csv")

        csv_content = pathtools.get_file_content_stringio(csv_path)
        with self.assertRaises(Exception) as err:
            _ = msa.MetadataCSV(csv_content, csv_path)
        self.assertEqual(
            "CSV metadata error. Different number of columns in line 0 than in header line.",
            str(err.exception))
    def test_9_get_seqids(self):
        metadata_path = self.csv_files_dir.joinpath("test_1_correct.csv")
        csv_content = pathtools.get_file_content_stringio(metadata_path)

        expected_seqids = [
            msa.SequenceID('s1'),
            msa.SequenceID('s2'),
            msa.SequenceID('s3')
        ]

        m = msa.MetadataCSV(csv_content, metadata_path)
        actual_seqids = m.get_all_sequences_ids()

        self.assertEqual(expected_seqids, actual_seqids)
    def test_10_metadata_feed_to_alignment_from_csv(self, test_name, maf_name,
                                                    csv_name, po_name,
                                                    expected_metadata):
        maf_path = self.alignment_files_dir.joinpath(maf_name)
        csv_path = self.csv_files_dir.joinpath(csv_name)
        po_path = self.alignment_files_dir.joinpath(po_name)

        poagraph, _ = builder.build_from_dagmaf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            self.fasta_provider,
            msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path),
                            csv_path))
        actual_metadata = {
            seq_id: seq.seqmetadata
            for seq_id, seq in poagraph.sequences.items()
        }
        self.assertEqual(expected_metadata, actual_metadata)

        poagraph = builder.build_from_maf(
            msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path),
            msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path),
                            csv_path))
        actual_metadata = {
            seq_id: seq.seqmetadata
            for seq_id, seq in poagraph.sequences.items()
        }
        self.assertEqual(expected_metadata, actual_metadata)

        poagraph = builder.build_from_po(
            msa.Po(pathtools.get_file_content_stringio(po_path), maf_path),
            msa.MetadataCSV(pathtools.get_file_content_stringio(csv_path),
                            csv_path))
        actual_metadata = {
            seq_id: seq.seqmetadata
            for seq_id, seq in poagraph.sequences.items()
        }
        self.assertEqual(expected_metadata, actual_metadata)
    def test_4_seqid_is_last(self):
        metadata_path = self.csv_files_dir.joinpath("test_4_seqid_is_last.csv")
        csv_content = pathtools.get_file_content_stringio(metadata_path)

        expected_metadata = {
            msa.SequenceID('s1'): {
                'name': 'sequence1',
                'group': 'A'
            },
            msa.SequenceID('s2'): {
                'name': 'sequence2',
                'group': 'B'
            },
            msa.SequenceID('s3'): {
                'name': 'sequence3',
                'group': 'B'
            }
        }
        m = msa.MetadataCSV(csv_content, metadata_path)
        actual_metadata = m.metadata

        self.assertEqual(expected_metadata, actual_metadata)
 def setUp(self):
     metadata_path = Path(__file__).parent.joinpath("../seq_metadata.csv").resolve()
     self.metadatacsv = msa.MetadataCSV(pathtools.get_file_content_stringio(metadata_path), metadata_path)
     self.maf_files_dir = Path(__file__).parent.joinpath("maf_files_with_cycles_or_reversion").resolve()
     self.fasta_provider = missings.ConstBaseProvider(missings.MissingBase())