def test_nexus_header(self): dataset = Dataset(self.seq_records, format='NEXUS', partitioning='by gene') expected = '10' result = dataset.number_taxa self.assertEqual(expected, result)
def test_dataset(self): dataset = Dataset(self.test_data, format='FASTA', partitioning='by gene') result = dataset.dataset_str expected = open(os.path.join(FASTA_DATA_PATH, 'dataset.fas')).read() self.assertEqual(expected, result)
def test_generation_of_errors(self): self.seq_records[0].seq = 'TTTN--CAGTAG' with self.assertRaises(TranslationErrorMixedGappedSeq): Dataset(self.seq_records, format='NEXUS', partitioning='by gene', aminoacids=True)
def test_dataset_as_aminoacids(self): dataset = Dataset(test_data, format='MEGA', partitioning='by gene', aminoacids=True) result = dataset.dataset_str expected = open(os.path.join(MEGA_DATA_PATH, 'dataset_aa.meg')).read() self.assertEqual(expected, result)
def test_generation_of_warnings(self): self.seq_records[0].seq = 'TTTCAGTAG' dataset = Dataset(self.seq_records, format='NEXUS', partitioning='by gene', aminoacids=True) expected = "Gene ArgKin, sequence CP100_10 contains stop codons '*'" self.assertEqual(expected, dataset.warnings[0])
def test_dataset_all_codon_positions_partitioned_by_gene(self): dataset = Dataset(self.seq_records, format='NEXUS', codon_positions='ALL', partitioning='by gene') test_data_file = os.path.join(NEXUS_DATA_PATH, 'dataset.nex') expected = open(test_data_file, 'r').read() result = dataset.dataset_str self.assertEqual(expected.strip(), result)
def test_dataset_as_degenerate(self): dataset = Dataset(test_data, format='MEGA', partitioning='by gene', degenerate='S') result = dataset.dataset_str expected = open(os.path.join(MEGA_DATA_PATH, 'dataset_degenerate.meg')).read() self.assertEqual(expected, result)
def test_codon_positions_parameter(self): dataset = Dataset(self.seq_records, format='NEXUS', partitioning='by gene') self.assertRaises(AttributeError, DatasetFooter, data=dataset.data, codon_positions='1st-2nd, 3rd', partitioning='by gene')
def test_aminoacid_dataset(self): dataset = Dataset(self.seq_records, format='NEXUS', partitioning='by gene', aminoacids=True) result = dataset.dataset_str with open(os.path.join(NEXUS_DATA_PATH, 'dataset_aa.nex'), 'r') as handle: expected = handle.read() self.assertEqual(expected.strip(), result)
def test_partitioned_by_1st2nd_3rd_only_3rd_codon_position(self): test_data = get_test_data("sample_data_numbers.txt") dataset = Dataset(test_data, format='FASTA', partitioning='1st-2nd, 3rd', codon_positions='3rd') result = dataset.dataset_str expected = open( os.path.join(FASTA_DATA_PATH, 'dataset_3rd_numbers.fas')).read() self.assertEqual(expected, result)
def test_dataset_1st_2nd_codon_positions_partitioned_by_gene(self): seq_records = get_test_data("seq_records", "sample_data_numbers.txt") dataset = Dataset(seq_records, format='NEXUS', codon_positions='1st-2nd', partitioning='by gene') test_data_file = os.path.join(NEXUS_DATA_PATH, 'dataset_1st2nd_codons_numbers.nex') expected = open(test_data_file, 'r').read() result = dataset.dataset_str self.assertEqual(expected, result)
def test_dataset__with_long_name(self): """Test that we can output fasta files with long names Test taht we don't truncate to 54 characters """ dataset = Dataset(self.test_data, format='FASTA', partitioning='by gene') result = dataset.dataset_str expected = open(os.path.join(FASTA_DATA_PATH, 'dataset.fas')).read() self.assertEqual(expected, result)
def test_make_partition_line(self): dataset = Dataset(test_data, format='NEXUS', partitioning='by gene') footer = DatasetFooter(data=dataset.data, codon_positions='ALL', partitioning='by gene') expected = """ partition GENES = 7: ArgKin, COI-begin, COI_end, ef1a, RpS2, RpS5, wingless; set partition = GENES; """ result = footer.make_partition_line() self.assertEqual(expected.strip(), result)
def test_dataset_with_gaps(self): seq = self.test_data[0].seq seq = list(seq) seq[:3] = "---" self.test_data[0].seq = "".join(seq) dataset = Dataset(self.test_data, format='FASTA', partitioning='by gene') result = dataset.dataset_str expected = open(os.path.join(FASTA_DATA_PATH, 'dataset_with_gaps.fas')).read() self.assertEqual(expected, result)
def test_dataset_when_seqrecord_taxonomy_is_none(self): raw_data = get_test_data('raw_data') raw_data[0]['taxonomy'] = None seq_records = [] for i in raw_data: seq_record = SeqRecordExpanded(i['seq'], voucher_code=i['voucher_code'], taxonomy=i['taxonomy'], gene_code=i['gene_code'], reading_frame=i['reading_frame'], table=i['table']) seq_records.append(seq_record) dataset = Dataset(seq_records, format='NEXUS', partitioning='by gene') self.assertTrue('CP100_10 ' in dataset.dataset_str)
def test_make_charset_block(self): dataset = Dataset(self.seq_records, format='NEXUS', partitioning='by gene') footer = DatasetFooter(data=dataset.data, codon_positions='ALL', partitioning='by gene') expected = """ begin mrbayes; charset ArgKin = 1-596; charset COI-begin = 597-1265; charset COI_end = 1266-2071; charset ef1a = 2072-3311; charset RpS2 = 3312-3722; charset RpS5 = 3723-4339; charset wingless = 4340-4739; """ result = footer.charset_block self.assertEqual(expected.strip(), result)
def test_dataset_when_seqrecord_taxonomy_is_has_family(self): raw_data = get_test_data('raw_data') raw_data[0]['taxonomy'] = { 'family': 'Aussidae', 'genus': 'Aus', 'species': 'aus' } seq_records = [] for i in raw_data: seq_record = SeqRecordExpanded(i['seq'], voucher_code=i['voucher_code'], taxonomy=i['taxonomy'], gene_code=i['gene_code'], reading_frame=i['reading_frame'], table=i['table']) seq_records.append(seq_record) dataset = Dataset(seq_records, format='NEXUS', partitioning='by gene') self.assertTrue('CP100_10_Aussidae_Aus_aus ' in dataset.dataset_str)
def test_charset_file(self): dataset = Dataset(self.test_data, format='PHYLIP', partitioning='by gene') result = dataset.extra_dataset_str with open(os.path.join(PHYLIP_DATA_PATH, 'charset_block_file.txt'), 'r') as handle: expected = handle.read() self.assertEqual(expected.strip(), result)
def test_dataset(self): dataset = Dataset(self.test_data, format='PHYLIP', partitioning='by gene') result = dataset.dataset_str with open(os.path.join(PHYLIP_DATA_PATH, 'dataset.phy'), 'r') as handle: expected = handle.read() self.assertEqual(expected.strip(), result.strip())
def test_dataset_with_degenerate(self): dataset = Dataset(self.data, format='TNT', partitioning='by gene', degenerate='S') result = dataset.dataset_str expected = open(os.path.join(TNT_DATA_PATH, 'dataset_degenerate.tnt')).read() self.assertEqual(expected.rstrip(), result)
def test_aa_dataset_with_outgroup(self): with open(os.path.join(TNT_DATA_PATH, 'dataset_aa_with_outgroup.tnt'), "r") as handle: expected = handle.read() dataset = Dataset(self.data, format='TNT', partitioning='by gene', outgroup='CP100-15', aminoacids=True) self.assertEqual(expected.rstrip(), dataset.dataset_str)