Esempio n. 1
0
 def test_nexus_header(self):
     dataset = Dataset(self.seq_records,
                       format='NEXUS',
                       partitioning='by gene')
     expected = '10'
     result = dataset.number_taxa
     self.assertEqual(expected, result)
Esempio n. 2
0
 def test_dataset(self):
     dataset = Dataset(self.test_data,
                       format='FASTA',
                       partitioning='by gene')
     result = dataset.dataset_str
     expected = open(os.path.join(FASTA_DATA_PATH, 'dataset.fas')).read()
     self.assertEqual(expected, result)
Esempio n. 3
0
 def test_generation_of_errors(self):
     self.seq_records[0].seq = 'TTTN--CAGTAG'
     with self.assertRaises(TranslationErrorMixedGappedSeq):
         Dataset(self.seq_records,
                 format='NEXUS',
                 partitioning='by gene',
                 aminoacids=True)
Esempio n. 4
0
 def test_dataset_as_aminoacids(self):
     dataset = Dataset(test_data,
                       format='MEGA',
                       partitioning='by gene',
                       aminoacids=True)
     result = dataset.dataset_str
     expected = open(os.path.join(MEGA_DATA_PATH, 'dataset_aa.meg')).read()
     self.assertEqual(expected, result)
Esempio n. 5
0
 def test_generation_of_warnings(self):
     self.seq_records[0].seq = 'TTTCAGTAG'
     dataset = Dataset(self.seq_records,
                       format='NEXUS',
                       partitioning='by gene',
                       aminoacids=True)
     expected = "Gene ArgKin, sequence CP100_10 contains stop codons '*'"
     self.assertEqual(expected, dataset.warnings[0])
Esempio n. 6
0
 def test_dataset_all_codon_positions_partitioned_by_gene(self):
     dataset = Dataset(self.seq_records,
                       format='NEXUS',
                       codon_positions='ALL',
                       partitioning='by gene')
     test_data_file = os.path.join(NEXUS_DATA_PATH, 'dataset.nex')
     expected = open(test_data_file, 'r').read()
     result = dataset.dataset_str
     self.assertEqual(expected.strip(), result)
Esempio n. 7
0
 def test_dataset_as_degenerate(self):
     dataset = Dataset(test_data,
                       format='MEGA',
                       partitioning='by gene',
                       degenerate='S')
     result = dataset.dataset_str
     expected = open(os.path.join(MEGA_DATA_PATH,
                                  'dataset_degenerate.meg')).read()
     self.assertEqual(expected, result)
Esempio n. 8
0
 def test_codon_positions_parameter(self):
     dataset = Dataset(self.seq_records,
                       format='NEXUS',
                       partitioning='by gene')
     self.assertRaises(AttributeError,
                       DatasetFooter,
                       data=dataset.data,
                       codon_positions='1st-2nd, 3rd',
                       partitioning='by gene')
Esempio n. 9
0
 def test_aminoacid_dataset(self):
     dataset = Dataset(self.seq_records,
                       format='NEXUS',
                       partitioning='by gene',
                       aminoacids=True)
     result = dataset.dataset_str
     with open(os.path.join(NEXUS_DATA_PATH, 'dataset_aa.nex'),
               'r') as handle:
         expected = handle.read()
     self.assertEqual(expected.strip(), result)
Esempio n. 10
0
 def test_partitioned_by_1st2nd_3rd_only_3rd_codon_position(self):
     test_data = get_test_data("sample_data_numbers.txt")
     dataset = Dataset(test_data,
                       format='FASTA',
                       partitioning='1st-2nd, 3rd',
                       codon_positions='3rd')
     result = dataset.dataset_str
     expected = open(
         os.path.join(FASTA_DATA_PATH, 'dataset_3rd_numbers.fas')).read()
     self.assertEqual(expected, result)
Esempio n. 11
0
 def test_dataset_1st_2nd_codon_positions_partitioned_by_gene(self):
     seq_records = get_test_data("seq_records", "sample_data_numbers.txt")
     dataset = Dataset(seq_records,
                       format='NEXUS',
                       codon_positions='1st-2nd',
                       partitioning='by gene')
     test_data_file = os.path.join(NEXUS_DATA_PATH,
                                   'dataset_1st2nd_codons_numbers.nex')
     expected = open(test_data_file, 'r').read()
     result = dataset.dataset_str
     self.assertEqual(expected, result)
Esempio n. 12
0
    def test_dataset__with_long_name(self):
        """Test that we can output fasta files with long names

        Test taht we don't truncate to 54 characters
        """
        dataset = Dataset(self.test_data,
                          format='FASTA',
                          partitioning='by gene')
        result = dataset.dataset_str
        expected = open(os.path.join(FASTA_DATA_PATH, 'dataset.fas')).read()
        self.assertEqual(expected, result)
Esempio n. 13
0
    def test_make_partition_line(self):
        dataset = Dataset(test_data, format='NEXUS', partitioning='by gene')
        footer = DatasetFooter(data=dataset.data,
                               codon_positions='ALL',
                               partitioning='by gene')
        expected = """
partition GENES = 7: ArgKin, COI-begin, COI_end, ef1a, RpS2, RpS5, wingless;

set partition = GENES;
"""
        result = footer.make_partition_line()
        self.assertEqual(expected.strip(), result)
Esempio n. 14
0
    def test_dataset_with_gaps(self):
        seq = self.test_data[0].seq
        seq = list(seq)
        seq[:3] = "---"
        self.test_data[0].seq = "".join(seq)

        dataset = Dataset(self.test_data,
                          format='FASTA',
                          partitioning='by gene')
        result = dataset.dataset_str
        expected = open(os.path.join(FASTA_DATA_PATH,
                                     'dataset_with_gaps.fas')).read()
        self.assertEqual(expected, result)
Esempio n. 15
0
    def test_dataset_when_seqrecord_taxonomy_is_none(self):
        raw_data = get_test_data('raw_data')
        raw_data[0]['taxonomy'] = None

        seq_records = []
        for i in raw_data:
            seq_record = SeqRecordExpanded(i['seq'],
                                           voucher_code=i['voucher_code'],
                                           taxonomy=i['taxonomy'],
                                           gene_code=i['gene_code'],
                                           reading_frame=i['reading_frame'],
                                           table=i['table'])
            seq_records.append(seq_record)

        dataset = Dataset(seq_records, format='NEXUS', partitioning='by gene')
        self.assertTrue('CP100_10      ' in dataset.dataset_str)
Esempio n. 16
0
    def test_make_charset_block(self):
        dataset = Dataset(self.seq_records,
                          format='NEXUS',
                          partitioning='by gene')
        footer = DatasetFooter(data=dataset.data,
                               codon_positions='ALL',
                               partitioning='by gene')
        expected = """
begin mrbayes;
    charset ArgKin = 1-596;
    charset COI-begin = 597-1265;
    charset COI_end = 1266-2071;
    charset ef1a = 2072-3311;
    charset RpS2 = 3312-3722;
    charset RpS5 = 3723-4339;
    charset wingless = 4340-4739;
        """
        result = footer.charset_block
        self.assertEqual(expected.strip(), result)
Esempio n. 17
0
    def test_dataset_when_seqrecord_taxonomy_is_has_family(self):
        raw_data = get_test_data('raw_data')
        raw_data[0]['taxonomy'] = {
            'family': 'Aussidae',
            'genus': 'Aus',
            'species': 'aus'
        }

        seq_records = []
        for i in raw_data:
            seq_record = SeqRecordExpanded(i['seq'],
                                           voucher_code=i['voucher_code'],
                                           taxonomy=i['taxonomy'],
                                           gene_code=i['gene_code'],
                                           reading_frame=i['reading_frame'],
                                           table=i['table'])
            seq_records.append(seq_record)

        dataset = Dataset(seq_records, format='NEXUS', partitioning='by gene')
        self.assertTrue('CP100_10_Aussidae_Aus_aus    ' in dataset.dataset_str)
Esempio n. 18
0
 def test_charset_file(self):
     dataset = Dataset(self.test_data, format='PHYLIP', partitioning='by gene')
     result = dataset.extra_dataset_str
     with open(os.path.join(PHYLIP_DATA_PATH, 'charset_block_file.txt'), 'r') as handle:
         expected = handle.read()
         self.assertEqual(expected.strip(), result)
Esempio n. 19
0
 def test_dataset(self):
     dataset = Dataset(self.test_data, format='PHYLIP', partitioning='by gene')
     result = dataset.dataset_str
     with open(os.path.join(PHYLIP_DATA_PATH, 'dataset.phy'), 'r') as handle:
         expected = handle.read()
         self.assertEqual(expected.strip(), result.strip())
Esempio n. 20
0
 def test_dataset_with_degenerate(self):
     dataset = Dataset(self.data, format='TNT', partitioning='by gene',
                       degenerate='S')
     result = dataset.dataset_str
     expected = open(os.path.join(TNT_DATA_PATH, 'dataset_degenerate.tnt')).read()
     self.assertEqual(expected.rstrip(), result)
Esempio n. 21
0
 def test_aa_dataset_with_outgroup(self):
     with open(os.path.join(TNT_DATA_PATH, 'dataset_aa_with_outgroup.tnt'), "r") as handle:
         expected = handle.read()
     dataset = Dataset(self.data, format='TNT', partitioning='by gene',
                       outgroup='CP100-15', aminoacids=True)
     self.assertEqual(expected.rstrip(), dataset.dataset_str)