def _impl_test_tree_estimator(self, name, datatype, partitions, **kwargs): num_cpus = kwargs.get('num_cpus', None) filename = data_source_path('anolis.fasta') md = MultiLocusDataset() md.read_files(seq_filename_list=[filename], datatype=datatype) md.relabel_for_sate() # alignment = Alignment() # alignment.read_filepath(filename, 'FASTA') te = self.get_tree_estimator(name) if te is None: _LOG.warn("test%s skipped" % name) return # alignment.datatype = datatype if num_cpus: a = te.run(alignment=md, partitions=partitions, tmp_dir_par=self.ts.top_level_temp, delete_temps=True, num_cpus=num_cpus) else: a = te.run(alignment=md, partitions=partitions, tmp_dir_par=self.ts.top_level_temp, delete_temps=True)
def setUp(self): self.set_up() self.tmp_sub_dir = self.ts.create_temp_subdir( parent=self.ts.top_level_temp, prefix='MultiLocusDatasetTest') self.data_path = os.path.join(self.tmp_sub_dir, self.job_name + '_test.fasta') self.mlds = MultiLocusDataset()
class MultiLocusDatasetTest(SateTestCase): def setUp(self): self.set_up() self.tmp_sub_dir = self.ts.create_temp_subdir( parent=self.ts.top_level_temp, prefix='MultiLocusDatasetTest') self.data_path = os.path.join(self.tmp_sub_dir, self.job_name + '_test.fasta') self.mlds = MultiLocusDataset() def tearDown(self): self.register_files() self.ts.remove_dir(self.tmp_sub_dir) self.tear_down() def _create_seq_file(self, seq_str): out = open(self.data_path, 'w') out.write(seq_str) out.close() def _parse_seq_dataset(self, sd): d = {} for taxon, char_vec in sd.dataset.char_matrices[0].iteritems(): d[taxon.label] = ''.join([i for i in char_vec]) return d def testRNAConversion(self): sf = StringIO() sf.write('>a\nAUGCAUGC\n') sf.write('>b\nAUGCAUGC\n') self._create_seq_file(sf.getvalue()) sf.seek(0) seqs = self.parse_fasta_file(sf) self.mlds.read_files([self.data_path], datatype='RNA', file_format='FASTA') self.assertEqual(len(self.mlds), 1) self.assertSameDataSet([seqs, self._parse_seq_dataset(self.mlds[0])]) self.mlds.convert_rna_to_dna() seqs = self.convert_rna_to_dna(seqs) for k, v in seqs.iteritems(): self.assertTrue('T' in v) self.assertSameDataSet([seqs, self._parse_seq_dataset(self.mlds[0])]) self.mlds.convert_rna_to_dna() self.assertSameDataSet([seqs, self._parse_seq_dataset(self.mlds[0])]) self.mlds.convert_dna_to_rna() seqs = self.convert_rna_to_dna(seqs, reverse=True) for k, v in seqs.iteritems(): self.assertFalse('T' in v) self.assertSameDataSet([seqs, self._parse_seq_dataset(self.mlds[0])]) self.mlds.convert_rna_to_dna() seqs = self.convert_rna_to_dna(seqs) for k, v in seqs.iteritems(): self.assertTrue('T' in v) self.assertSameDataSet([seqs, self._parse_seq_dataset(self.mlds[0])])
def read_input_sequences(seq_filename_list, datatype, missing=None): md = MultiLocusDataset() md.read_files(seq_filename_list=seq_filename_list, datatype=datatype, missing=missing) return md