def _impl_test_aligner(self, name, fn): filename = data_source_path(fn) alignment = Alignment() alignment.read_filepath(filename, 'FASTA') aln = self.get_aligner('%s' % name) if aln is None: _LOG.warn("test%s skipped" % name) return a = aln.run(alignment, tmp_dir_par=self.ts.top_level_temp, delete_temps=True) reference_fn = data_source_path('%s.%s' % (name, fn)) reference_aln = Alignment() reference_aln.read_filepath(reference_fn, 'FASTA') _LOG.debug('Checking results from %s against %s' % (name, reference_fn)) if reference_aln != a: i = 1 while True: nrfn = reference_fn + '.' + str(i) if os.path.exists(nrfn): reference_aln = Alignment() reference_aln.read_filepath(nrfn, 'FASTA') _LOG.debug('Checking results from %s against %s' % (name, nrfn)) if reference_aln == a: self.assertEquals(reference_aln, a) return True i += 1 else: self.assertEquals(reference_aln, a)
def test1000T(self): sd = SequenceDataset() fp = data_source_path('1000T.fasta') sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA') fp = data_source_path('1000T.tree') tree_list = read_and_encode_splits(sd.dataset, open(fp, "rU")) self.assertEqual(len(tree_list), 1)
def setUp(self): self.ts = TempFS() self.ts.create_top_level_temp(prefix='treeEstimatorTest', parent=os.curdir) self.filename = data_source_path('mafft.anolis.fasta') self.alignment = Alignment() self.alignment.read_filepath(data_source_path('mafft.anolis.fasta'), 'FASTA')
def testCentroidEdge(self): sd = SequenceDataset() fp = data_source_path('100T.fasta') sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA') fp = data_source_path('100T.tree') tree_list = read_and_encode_splits(sd.dataset, open(fp, "rU")) self.assertEqual(len(tree_list), 1) t = PhylogeneticTree(tree_list[0]) self._do_test_centroid(t)
def testConcatenateAlignments(self): filename1 = data_source_path('small.fasta') filename2 = data_source_path('small.fasta') a = Alignment() b = Alignment() a.datatype = "DNA" b.datatype = "DNA" a.read_filepath(filename1, 'FASTA') b.read_filepath(filename2, 'FASTA')
def setUp(self): self.set_up() self.tiny_rna = data_source_path("tinyrna.fasta") self.small_rna = data_source_path("smallrna.fasta") self.small_tree = data_source_path("small.tree") self.tiny_aln_path = self.get_path(".marker001.tinyrna.aln") self.small_aln_path = self.get_path(".marker001.smallrna.aln") self.init_aln_path = self.get_path("_temp_iteration_initialsearch_seq_alignment.txt") self.iter_aln_path = self.get_path("_temp_iteration_0_seq_alignment.txt") self.cfg_path = self.get_path("_temp_sate_config.txt")
def setUp(self): self.set_up() self.tiny_rna = data_source_path('tinyrna.fasta') self.small_rna = data_source_path('smallrna.fasta') self.small_tree = data_source_path('small.tree') self.tiny_aln_path = self.get_path('.marker001.tinyrna.aln') self.small_aln_path = self.get_path('.marker001.smallrna.aln') self.init_aln_path = self.get_path( '_temp_iteration_initialsearch_seq_alignment.txt') self.iter_aln_path = self.get_path( '_temp_iteration_0_seq_alignment.txt') self.cfg_path = self.get_path('_temp_sate_config.txt')
def setUp(self): self.set_up() self.dna = data_source_path("small.fasta") self.rna = data_source_path("smallrna.fasta") self.tree = data_source_path("small.tree") self.dna_tmp = self.get_subdir("dna") self.rna_tmp = self.get_subdir("rna") self.dna_aln = self.get_path(name=".marker001.small.aln", parent_dir=self.dna_tmp) self.dna_tree = self.get_path(name=".tre", parent_dir=self.dna_tmp) self.rna_aln = self.get_path(name=".marker001.smallrna.aln", parent_dir=self.rna_tmp) self.rna_tree = self.get_path(name=".tre", parent_dir=self.rna_tmp) self.dna_score = self.get_path(name=".score.txt", parent_dir=self.dna_tmp) self.rna_score = self.get_path(name=".score.txt", parent_dir=self.rna_tmp) self.dna_tmp_aln = self.get_path(name="_temp_iteration_0_seq_alignment.txt", parent_dir=self.dna_tmp) self.rna_tmp_aln = self.get_path(name="_temp_iteration_0_seq_alignment.txt", parent_dir=self.rna_tmp)
def _impl_test_tree_estimator(self, name, datatype, partitions, **kwargs): num_cpus = kwargs.get('num_cpus', None) filename = data_source_path('anolis.fasta') md = MultiLocusDataset() md.read_files(seq_filename_list=[filename], datatype=datatype) md.relabel_for_pasta() # alignment = Alignment() # alignment.read_filepath(filename, 'FASTA') te = self.get_tree_estimator(name) if te is None: _LOG.warn("test%s skipped" % name) return # alignment.datatype = datatype if num_cpus: a = te.run(alignment=md, partitions=partitions, tmp_dir_par=self.ts.top_level_temp, delete_temps=True, num_cpus=num_cpus) else: a = te.run(alignment=md, partitions=partitions, tmp_dir_par=self.ts.top_level_temp, delete_temps=True)
def testDiagnoseBogus(self): fp = data_source_path('caenophidia_mos_bogus.fasta') self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) _LOG.warn( "WARNING: summary_stats_from_parse does not distinguish between all bogus sequences in 'careful' mode" )
def testAlignment(self): filename = data_source_path('small.fasta') alignment = Alignment() alignment.read_filepath(filename, 'FASTA') num_taxa = alignment.get_num_taxa() self.assertEqual(num_taxa, 32) alignment.write_filepath(filename+'.phy', 'PHYLIP') alignment.write_unaligned_fasta(filename+'.raw') alignment.sub_alignment( alignment.keys()[0:2] ).write_unaligned_fasta(filename+'.partial.raw')
def setUp(self): self.set_up() self.multi_dir = data_source_path("testmulti/") self.multi_mixed_dir = os.path.join(self.multi_dir, "mixed") self.in_path1 = os.path.join(self.multi_mixed_dir, "tinydna.fasta") self.in_path2 = os.path.join(self.multi_mixed_dir, "tinyrna.fasta") self.aln_path1 = self.get_path(".marker001.tinydna.aln") self.aln_path2 = self.get_path(".marker002.tinyrna.aln") self.cfg_path = self.get_path("_temp_sate_config.txt") self.concat_path = self.get_path("_temp_iteration_0_seq_alignment.txt")
def setUp(self): self.set_up() self.multi_dir = data_source_path('testmulti/') self.multi_mixed_dir = os.path.join(self.multi_dir, 'mixed') self.in_path1 = os.path.join(self.multi_mixed_dir, 'tinydna.fasta') self.in_path2 = os.path.join(self.multi_mixed_dir, 'tinyrna.fasta') self.aln_path1 = self.get_path('.marker001.tinydna.aln') self.aln_path2 = self.get_path('.marker002.tinyrna.aln') self.cfg_path = self.get_path('_temp_sate_config.txt') self.concat_path = self.get_path('_temp_iteration_0_seq_alignment.txt')
def testLongestBipartition(self): treef = data_source_path('small.tree') pt = self.phylogeneticTreeFromFile(treef, file_format='NEWICK') self.assertEqual(pt.n_leaves, 32) e = pt.get_longest_edge() subtree1, subtree2 = pt.bipartition_by_edge(e) leaf_num = [subtree1.n_leaves, subtree2.n_leaves] leaf_num.sort() self.assertEqual(leaf_num, [1, 31])
def _impl_test_merger(self, name): filename = data_source_path('merger1.fasta') alignment1 = Alignment() alignment1.read_filepath(filename, 'FASTA') filename = data_source_path('merger2.fasta') alignment2 = Alignment() alignment2.read_filepath(filename, 'FASTA') aln = self.get_merger('%s merger' % name) if aln is None: _LOG.warn("test%s skipped" % name) return a = aln.run(alignment1, alignment2, tmp_dir_par=self.ts.top_level_temp, delete_temps=True) reference_fn = data_source_path('merger_result.fasta') reference_aln = Alignment() reference_aln.read_filepath(reference_fn, 'FASTA') self.assertEquals(reference_aln, a)
def setUp(self): self.set_up() data_file = data_source_path("tiny.fasta") unicode_name = u"m\xe9ss\xfdp\xe4th" self.tmp_sub_dir = self.get_subdir(unicode_name) self.data_path = self.get_path(name=unicode_name + ".fasta", parent_dir=self.tmp_sub_dir) src = open(data_file, "rU") out = open(self.data_path, "w") for line in src: out.write(line) src.close() out.close()
def setUp(self): self.set_up() data_file = data_source_path("tiny.fasta") space_name = "a path with a lot of spaces" self.tmp_sub_dir = self.get_subdir(space_name) self.data_path = self.get_path(name=space_name + ".fasta", parent_dir=self.tmp_sub_dir) src = open(data_file, "rU") out = open(self.data_path, "w") for line in src: out.write(line) src.close() out.close()
def testCentroidBipartition(self): treef = data_source_path('diffDecomp.nex') pt = self.phylogeneticTreeFromFile(treef, file_format='NEXUS') # pt.add_n_leaf_des_attr() self.assertEqual(pt.n_leaves, 484) e = pt.get_centroid_edge() subtree1, subtree2 = pt.bipartition_by_edge(e) leaf_num = [subtree1.n_leaves, subtree2.n_leaves] leaf_num.sort() self.assertEqual(leaf_num, [231, 253])
def testMulti(self): if is_test_enabled(TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([ self.__class__.__name__, sys._getframe().f_code.co_name ])): self._main_execution([ '-m', '-i', data_source_path('testmulti'), '-o', self.ts.top_level_temp, '--temporaries=%s' % self.ts.top_level_temp, '-j', self.job_name, '--iter-limit=1' ])
def testDiagnoseMulti(self): multi_dir = data_source_path('testmulti/caenophidia') fp = os.path.join(multi_dir, 'caenophidia_mos.fasta') fp2 = os.path.join(multi_dir, 'caenophidia_mos2.fasta') s = summary_stats_from_parse([fp, fp2], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189), (109, 202)]) self.assertEqual( s[2], 116 ) # two taxa names were changed and 5 were deleted, so the union is 116 self.assertEqual(s[3], False) fp3 = data_source_path('smallrna.fasta') s = summary_stats_from_parse([fp3, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" ) fp4 = data_source_path('small.fasta') fp5 = data_source_path('smallunaligned.fasta') s = summary_stats_from_parse([fp4, fp4], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" ) fp4 = data_source_path('small.fasta') fp5 = data_source_path('smallunaligned.fasta') s = summary_stats_from_parse([fp4, fp5], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], False) self.assertRaises(Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" )
def setUp(self): self.set_up() data_file = data_source_path('tiny.fasta') space_name = 'a path with a lot of spaces' self.tmp_sub_dir = self.get_subdir(space_name) self.data_path = self.get_path(name=space_name + '.fasta', parent_dir=self.tmp_sub_dir) src = open(data_file, 'rU') out = open(self.data_path, 'w') for line in src: out.write(line) src.close() out.close()
def setUp(self): self.set_up() data_file = data_source_path('tiny.fasta') unicode_name = 'm\xe9ss\xfdp\xe4th' self.tmp_sub_dir = self.get_subdir(unicode_name) self.data_path = self.get_path(name=unicode_name + '.fasta', parent_dir=self.tmp_sub_dir) src = open(data_file, 'rU') out = open(self.data_path, 'w') for line in src: out.write(line) src.close() out.close()
def setUp(self): self.set_up() self.anolis_file = data_source_path('anolis.fasta') self.caenophidia_file = data_source_path('caenophidia_mos.fasta') self.multi_dir = data_source_path('testmulti/') self.multi_aa_dir = os.path.join(self.multi_dir, 'caenophidia') self.figwasp_dir = os.path.join(self.multi_dir, 'figwasps') self.hummingbird_dir = os.path.join(self.multi_dir, 'hummingbirds') self.ambig_dna = data_source_path('small.ambiguities.fasta') self.ambig_dna_tree = data_source_path('small.tree') self.ambig_aa = data_source_path('caenophidia_mos.ambiguities.fasta') self.ambig_aa_tree = data_source_path('caenophidia_mos.tre')
def setUp(self): self.set_up() self.dna = data_source_path('small.fasta') self.rna = data_source_path('smallrna.fasta') self.tree = data_source_path('small.tree') self.dna_tmp = self.get_subdir('dna') self.rna_tmp = self.get_subdir('rna') self.dna_aln = self.get_path(name='.marker001.small.aln', parent_dir=self.dna_tmp) self.dna_tree = self.get_path(name='.tre', parent_dir=self.dna_tmp) self.rna_aln = self.get_path(name='.marker001.smallrna.aln', parent_dir=self.rna_tmp) self.rna_tree = self.get_path(name='.tre', parent_dir=self.rna_tmp) self.dna_score = self.get_path(name='.score.txt', parent_dir=self.dna_tmp) self.rna_score = self.get_path(name='.score.txt', parent_dir=self.rna_tmp) self.dna_tmp_aln = self.get_path( name='_temp_iteration_0_seq_alignment.txt', parent_dir=self.dna_tmp) self.rna_tmp_aln = self.get_path( name='_temp_iteration_0_seq_alignment.txt', parent_dir=self.rna_tmp)
def testDiagnoseProt(self): fp = data_source_path("caenophidia_mos.fasta") print fp s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189)]) self.assertEqual(s[2], 114) self.assertEqual(s[3], False) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189)]) self.assertEqual(s[2], 114) self.assertEqual(s[3], False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=True)
def testDiagnoseRNA(self): fp = data_source_path("smallrna.fasta") print fp s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "PROTEIN"], careful_parse=False) _LOG.warn("WARNING: summary_stats_from_parse does not distinguish between RNA and DNA in 'careful' mode")
def testDiagnoseDNA(self): fp = data_source_path("small.fasta") print fp s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=True)
def testMulti(self): if is_test_enabled( TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name]) ): self._main_execution( [ "-m", "-i", data_source_path("testmulti"), "-o", self.ts.top_level_temp, "--temporaries=%s" % self.ts.top_level_temp, "-j", self.job_name, "--iter-limit=1", ] )
def testDiagnoseMulti(self): multi_dir = data_source_path("testmulti/caenophidia") fp = os.path.join(multi_dir, "caenophidia_mos.fasta") fp2 = os.path.join(multi_dir, "caenophidia_mos2.fasta") s = summary_stats_from_parse([fp, fp2], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189), (109, 202)]) self.assertEqual(s[2], 116) # two taxa names were changed and 5 were deleted, so the union is 116 self.assertEqual(s[3], False) fp3 = data_source_path("smallrna.fasta") s = summary_stats_from_parse([fp3, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises( Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False ) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" ) fp4 = data_source_path("small.fasta") fp5 = data_source_path("smallunaligned.fasta") s = summary_stats_from_parse([fp4, fp4], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises( Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False ) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" ) fp4 = data_source_path("small.fasta") fp5 = data_source_path("smallunaligned.fasta") s = summary_stats_from_parse([fp4, fp5], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650), (32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], False) self.assertRaises( Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False ) _LOG.warn( "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!" )
def testDiagnoseDNA(self): fp = data_source_path('small.fasta') print(fp) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "DNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=True)
def testDiagnoseProt(self): fp = data_source_path('caenophidia_mos.fasta') print(fp) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189)]) self.assertEqual(s[2], 114) self.assertEqual(s[3], False) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "PROTEIN") self.assertEqual(s[1], [(114, 189)]) self.assertEqual(s[2], 114) self.assertEqual(s[3], False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=False) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=True)
def testDiagnoseRNA(self): fp = data_source_path('smallrna.fasta') print(fp) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True) self.assertEqual(s[0], "RNA") self.assertEqual(s[1], [(32, 1650)]) self.assertEqual(s[2], 32) self.assertEqual(s[3], True) self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "PROTEIN"], careful_parse=False) _LOG.warn( "WARNING: summary_stats_from_parse does not distinguish between RNA and DNA in 'careful' mode" )
def testDNAFasta(self): sd = SequenceDataset() fp = data_source_path('anolis.fasta') sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
def testDiagnoseBogus(self): fp = data_source_path("caenophidia_mos_bogus.fasta") self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False) _LOG.warn( "WARNING: summary_stats_from_parse does not distinguish between all bogus sequences in 'careful' mode" )
def setUp(self): self.set_up() self.data = data_source_path("tiny.lowercase.fasta")
def setUp(self): self.set_up() self.data = data_source_path("tiny.fasta") self.tree = data_source_path("tiny_name_mismatch.tre")
def setUp(self): self.set_up() self.data = data_source_path('tiny.fasta') self.tree = data_source_path('tiny_name_mismatch.tre')
def setUp(self): self.set_up() self.data = data_source_path('tiny.lowercase.fasta')