Example #1
0
    def _impl_test_aligner(self, name, fn):
        filename = data_source_path(fn)
        alignment = Alignment()
        alignment.read_filepath(filename, 'FASTA')

        aln = self.get_aligner('%s' % name)
        if aln is None:
            _LOG.warn("test%s skipped" % name)
            return
        a = aln.run(alignment,
                    tmp_dir_par=self.ts.top_level_temp,
                    delete_temps=True)

        reference_fn = data_source_path('%s.%s' % (name, fn))
        reference_aln = Alignment()
        reference_aln.read_filepath(reference_fn, 'FASTA')
        _LOG.debug('Checking results from %s against %s' % (name, reference_fn))
        if reference_aln != a:
            i = 1
            while True:
                nrfn  = reference_fn + '.' + str(i)
                if os.path.exists(nrfn):
                    reference_aln = Alignment()
                    reference_aln.read_filepath(nrfn, 'FASTA')
                    _LOG.debug('Checking results from %s against %s' % (name, nrfn))
                    if reference_aln == a:
                        self.assertEquals(reference_aln, a)
                        return True
                    i += 1
                else:
                    self.assertEquals(reference_aln, a)
Example #2
0
    def _impl_test_aligner(self, name, fn):
        filename = data_source_path(fn)
        alignment = Alignment()
        alignment.read_filepath(filename, 'FASTA')

        aln = self.get_aligner('%s' % name)
        if aln is None:
            _LOG.warn("test%s skipped" % name)
            return
        a = aln.run(alignment,
                    tmp_dir_par=self.ts.top_level_temp,
                    delete_temps=True)

        reference_fn = data_source_path('%s.%s' % (name, fn))
        reference_aln = Alignment()
        reference_aln.read_filepath(reference_fn, 'FASTA')
        _LOG.debug('Checking results from %s against %s' %
                   (name, reference_fn))
        if reference_aln != a:
            i = 1
            while True:
                nrfn = reference_fn + '.' + str(i)
                if os.path.exists(nrfn):
                    reference_aln = Alignment()
                    reference_aln.read_filepath(nrfn, 'FASTA')
                    _LOG.debug('Checking results from %s against %s' %
                               (name, nrfn))
                    if reference_aln == a:
                        self.assertEquals(reference_aln, a)
                        return True
                    i += 1
                else:
                    self.assertEquals(reference_aln, a)
Example #3
0
 def test1000T(self):
     sd = SequenceDataset()
     fp = data_source_path('1000T.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
     fp = data_source_path('1000T.tree')
     tree_list = read_and_encode_splits(sd.dataset, open(fp, "rU"))
     self.assertEqual(len(tree_list), 1)
Example #4
0
 def setUp(self):
     self.ts = TempFS()
     self.ts.create_top_level_temp(prefix='treeEstimatorTest', parent=os.curdir)
     self.filename = data_source_path('mafft.anolis.fasta')
     self.alignment = Alignment()
     self.alignment.read_filepath(data_source_path('mafft.anolis.fasta'),
             'FASTA')
Example #5
0
 def test1000T(self):
     sd = SequenceDataset()
     fp = data_source_path('1000T.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
     fp = data_source_path('1000T.tree')
     tree_list = read_and_encode_splits(sd.dataset, open(fp, "rU"))
     self.assertEqual(len(tree_list), 1)
Example #6
0
 def testCentroidEdge(self):
     sd = SequenceDataset()
     fp = data_source_path('100T.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
     fp = data_source_path('100T.tree')
     tree_list = read_and_encode_splits(sd.dataset, open(fp, "rU"))
     self.assertEqual(len(tree_list), 1)
     t = PhylogeneticTree(tree_list[0])
     self._do_test_centroid(t)
Example #7
0
 def testConcatenateAlignments(self):
     filename1 = data_source_path('small.fasta')
     filename2 = data_source_path('small.fasta')
     a = Alignment()
     b = Alignment()
     a.datatype = "DNA"
     b.datatype = "DNA"
     a.read_filepath(filename1, 'FASTA')
     b.read_filepath(filename2, 'FASTA')
Example #8
0
 def testConcatenateAlignments(self):
     filename1 = data_source_path('small.fasta')
     filename2 = data_source_path('small.fasta')
     a = Alignment()
     b = Alignment()
     a.datatype = "DNA"
     b.datatype = "DNA"
     a.read_filepath(filename1, 'FASTA')
     b.read_filepath(filename2, 'FASTA')
Example #9
0
 def testCentroidEdge(self):
     sd = SequenceDataset()
     fp = data_source_path('100T.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
     fp = data_source_path('100T.tree')
     tree_list = read_and_encode_splits(sd.dataset, open(fp, "rU"))
     self.assertEqual(len(tree_list), 1)
     t = PhylogeneticTree(tree_list[0])
     self._do_test_centroid(t)
Example #10
0
 def setUp(self):
     self.set_up()
     self.tiny_rna = data_source_path("tinyrna.fasta")
     self.small_rna = data_source_path("smallrna.fasta")
     self.small_tree = data_source_path("small.tree")
     self.tiny_aln_path = self.get_path(".marker001.tinyrna.aln")
     self.small_aln_path = self.get_path(".marker001.smallrna.aln")
     self.init_aln_path = self.get_path("_temp_iteration_initialsearch_seq_alignment.txt")
     self.iter_aln_path = self.get_path("_temp_iteration_0_seq_alignment.txt")
     self.cfg_path = self.get_path("_temp_sate_config.txt")
Example #11
0
 def setUp(self):
     self.set_up()
     self.tiny_rna = data_source_path('tinyrna.fasta')
     self.small_rna = data_source_path('smallrna.fasta')
     self.small_tree = data_source_path('small.tree')
     self.tiny_aln_path = self.get_path('.marker001.tinyrna.aln')
     self.small_aln_path = self.get_path('.marker001.smallrna.aln')
     self.init_aln_path = self.get_path(
         '_temp_iteration_initialsearch_seq_alignment.txt')
     self.iter_aln_path = self.get_path(
         '_temp_iteration_0_seq_alignment.txt')
     self.cfg_path = self.get_path('_temp_sate_config.txt')
Example #12
0
 def setUp(self):
     self.set_up()
     self.dna = data_source_path("small.fasta")
     self.rna = data_source_path("smallrna.fasta")
     self.tree = data_source_path("small.tree")
     self.dna_tmp = self.get_subdir("dna")
     self.rna_tmp = self.get_subdir("rna")
     self.dna_aln = self.get_path(name=".marker001.small.aln", parent_dir=self.dna_tmp)
     self.dna_tree = self.get_path(name=".tre", parent_dir=self.dna_tmp)
     self.rna_aln = self.get_path(name=".marker001.smallrna.aln", parent_dir=self.rna_tmp)
     self.rna_tree = self.get_path(name=".tre", parent_dir=self.rna_tmp)
     self.dna_score = self.get_path(name=".score.txt", parent_dir=self.dna_tmp)
     self.rna_score = self.get_path(name=".score.txt", parent_dir=self.rna_tmp)
     self.dna_tmp_aln = self.get_path(name="_temp_iteration_0_seq_alignment.txt", parent_dir=self.dna_tmp)
     self.rna_tmp_aln = self.get_path(name="_temp_iteration_0_seq_alignment.txt", parent_dir=self.rna_tmp)
Example #13
0
    def _impl_test_tree_estimator(self, name, datatype, partitions, **kwargs):
        num_cpus = kwargs.get('num_cpus', None)
        filename = data_source_path('anolis.fasta')

        md = MultiLocusDataset()
        md.read_files(seq_filename_list=[filename],
                datatype=datatype)
        md.relabel_for_pasta()
        # alignment = Alignment()
        # alignment.read_filepath(filename, 'FASTA')
        te = self.get_tree_estimator(name)
        if te is None:
            _LOG.warn("test%s skipped" % name)
            return
        # alignment.datatype = datatype
        if num_cpus:
            a = te.run(alignment=md,
                       partitions=partitions,
                       tmp_dir_par=self.ts.top_level_temp,
                       delete_temps=True,
                       num_cpus=num_cpus)
        else:
            a = te.run(alignment=md,
                       partitions=partitions,
                       tmp_dir_par=self.ts.top_level_temp,
                       delete_temps=True)
Example #14
0
 def testDiagnoseBogus(self):
     fp = data_source_path('caenophidia_mos_bogus.fasta')
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp],
                       ["DNA", "RNA", "PROTEIN"],
                       careful_parse=False)
     _LOG.warn(
         "WARNING: summary_stats_from_parse does not distinguish between all bogus sequences in 'careful' mode"
     )
Example #15
0
 def testAlignment(self):
     filename = data_source_path('small.fasta')
     alignment = Alignment()
     alignment.read_filepath(filename, 'FASTA')
     num_taxa = alignment.get_num_taxa()
     self.assertEqual(num_taxa, 32)
     alignment.write_filepath(filename+'.phy', 'PHYLIP')
     alignment.write_unaligned_fasta(filename+'.raw')
     alignment.sub_alignment( alignment.keys()[0:2] ).write_unaligned_fasta(filename+'.partial.raw')
Example #16
0
 def setUp(self):
     self.set_up()
     self.multi_dir = data_source_path("testmulti/")
     self.multi_mixed_dir = os.path.join(self.multi_dir, "mixed")
     self.in_path1 = os.path.join(self.multi_mixed_dir, "tinydna.fasta")
     self.in_path2 = os.path.join(self.multi_mixed_dir, "tinyrna.fasta")
     self.aln_path1 = self.get_path(".marker001.tinydna.aln")
     self.aln_path2 = self.get_path(".marker002.tinyrna.aln")
     self.cfg_path = self.get_path("_temp_sate_config.txt")
     self.concat_path = self.get_path("_temp_iteration_0_seq_alignment.txt")
Example #17
0
 def setUp(self):
     self.set_up()
     self.multi_dir = data_source_path('testmulti/')
     self.multi_mixed_dir = os.path.join(self.multi_dir, 'mixed')
     self.in_path1 = os.path.join(self.multi_mixed_dir, 'tinydna.fasta')
     self.in_path2 = os.path.join(self.multi_mixed_dir, 'tinyrna.fasta')
     self.aln_path1 = self.get_path('.marker001.tinydna.aln')
     self.aln_path2 = self.get_path('.marker002.tinyrna.aln')
     self.cfg_path = self.get_path('_temp_sate_config.txt')
     self.concat_path = self.get_path('_temp_iteration_0_seq_alignment.txt')
Example #18
0
    def testLongestBipartition(self):
        treef = data_source_path('small.tree')
        pt = self.phylogeneticTreeFromFile(treef, file_format='NEWICK')
        self.assertEqual(pt.n_leaves, 32)

        e = pt.get_longest_edge()
        subtree1, subtree2 = pt.bipartition_by_edge(e)

        leaf_num = [subtree1.n_leaves, subtree2.n_leaves]
        leaf_num.sort()
        self.assertEqual(leaf_num, [1, 31])
Example #19
0
    def testLongestBipartition(self):
        treef = data_source_path('small.tree')
        pt = self.phylogeneticTreeFromFile(treef, file_format='NEWICK')
        self.assertEqual(pt.n_leaves, 32)

        e = pt.get_longest_edge()
        subtree1, subtree2 = pt.bipartition_by_edge(e)

        leaf_num = [subtree1.n_leaves, subtree2.n_leaves]
        leaf_num.sort()
        self.assertEqual(leaf_num, [1, 31])
Example #20
0
    def _impl_test_merger(self, name):
        filename = data_source_path('merger1.fasta')
        alignment1 = Alignment()
        alignment1.read_filepath(filename, 'FASTA')
        filename = data_source_path('merger2.fasta')
        alignment2 = Alignment()
        alignment2.read_filepath(filename, 'FASTA')

        aln = self.get_merger('%s merger' % name)
        if aln is None:
            _LOG.warn("test%s skipped" % name)
            return
        a = aln.run(alignment1,
                    alignment2,
                    tmp_dir_par=self.ts.top_level_temp,
                    delete_temps=True)

        reference_fn = data_source_path('merger_result.fasta')
        reference_aln = Alignment()
        reference_aln.read_filepath(reference_fn, 'FASTA')
        self.assertEquals(reference_aln, a)
Example #21
0
    def _impl_test_merger(self, name):
        filename = data_source_path('merger1.fasta')
        alignment1 = Alignment()
        alignment1.read_filepath(filename, 'FASTA')
        filename = data_source_path('merger2.fasta')
        alignment2 = Alignment()
        alignment2.read_filepath(filename, 'FASTA')

        aln = self.get_merger('%s merger' % name)
        if aln is None:
            _LOG.warn("test%s skipped" % name)
            return
        a = aln.run(alignment1,
                    alignment2,
                    tmp_dir_par=self.ts.top_level_temp,
                    delete_temps=True)

        reference_fn = data_source_path('merger_result.fasta')
        reference_aln = Alignment()
        reference_aln.read_filepath(reference_fn, 'FASTA')
        self.assertEquals(reference_aln, a)
Example #22
0
 def setUp(self):
     self.set_up()
     data_file = data_source_path("tiny.fasta")
     unicode_name = u"m\xe9ss\xfdp\xe4th"
     self.tmp_sub_dir = self.get_subdir(unicode_name)
     self.data_path = self.get_path(name=unicode_name + ".fasta", parent_dir=self.tmp_sub_dir)
     src = open(data_file, "rU")
     out = open(self.data_path, "w")
     for line in src:
         out.write(line)
     src.close()
     out.close()
Example #23
0
 def setUp(self):
     self.set_up()
     data_file = data_source_path("tiny.fasta")
     space_name = "a path with a lot of spaces"
     self.tmp_sub_dir = self.get_subdir(space_name)
     self.data_path = self.get_path(name=space_name + ".fasta", parent_dir=self.tmp_sub_dir)
     src = open(data_file, "rU")
     out = open(self.data_path, "w")
     for line in src:
         out.write(line)
     src.close()
     out.close()
Example #24
0
    def testCentroidBipartition(self):
        treef = data_source_path('diffDecomp.nex')
        pt = self.phylogeneticTreeFromFile(treef, file_format='NEXUS')
        # pt.add_n_leaf_des_attr()
        self.assertEqual(pt.n_leaves, 484)

        e = pt.get_centroid_edge()
        subtree1, subtree2 = pt.bipartition_by_edge(e)

        leaf_num = [subtree1.n_leaves, subtree2.n_leaves]
        leaf_num.sort()
        self.assertEqual(leaf_num, [231, 253])
Example #25
0
    def testCentroidBipartition(self):
        treef = data_source_path('diffDecomp.nex')
        pt = self.phylogeneticTreeFromFile(treef, file_format='NEXUS')
        # pt.add_n_leaf_des_attr()
        self.assertEqual(pt.n_leaves, 484)

        e = pt.get_centroid_edge()
        subtree1, subtree2 = pt.bipartition_by_edge(e)

        leaf_num = [subtree1.n_leaves, subtree2.n_leaves]
        leaf_num.sort()
        self.assertEqual(leaf_num, [231, 253])
Example #26
0
 def testMulti(self):
     if is_test_enabled(TestLevel.EXHAUSTIVE,
                        _LOG,
                        module_name=".".join([
                            self.__class__.__name__,
                            sys._getframe().f_code.co_name
                        ])):
         self._main_execution([
             '-m', '-i',
             data_source_path('testmulti'), '-o', self.ts.top_level_temp,
             '--temporaries=%s' % self.ts.top_level_temp, '-j',
             self.job_name, '--iter-limit=1'
         ])
Example #27
0
    def testDiagnoseMulti(self):
        multi_dir = data_source_path('testmulti/caenophidia')
        fp = os.path.join(multi_dir, 'caenophidia_mos.fasta')
        fp2 = os.path.join(multi_dir, 'caenophidia_mos2.fasta')
        s = summary_stats_from_parse([fp, fp2], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "PROTEIN")
        self.assertEqual(s[1], [(114, 189), (109, 202)])
        self.assertEqual(
            s[2], 116
        )  # two taxa names were changed and 5 were deleted, so the union is 116
        self.assertEqual(s[3], False)

        fp3 = data_source_path('smallrna.fasta')
        s = summary_stats_from_parse([fp3, fp3], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "RNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], True)
        self.assertRaises(Exception,
                          summary_stats_from_parse, [fp, fp3],
                          ["DNA", "RNA", "PROTEIN"],
                          careful_parse=False)
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )

        fp4 = data_source_path('small.fasta')
        fp5 = data_source_path('smallunaligned.fasta')
        s = summary_stats_from_parse([fp4, fp4], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], True)
        self.assertRaises(Exception,
                          summary_stats_from_parse, [fp, fp3],
                          ["DNA", "RNA", "PROTEIN"],
                          careful_parse=False)
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )

        fp4 = data_source_path('small.fasta')
        fp5 = data_source_path('smallunaligned.fasta')
        s = summary_stats_from_parse([fp4, fp5], ["DNA", "RNA", "PROTEIN"],
                                     careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], False)
        self.assertRaises(Exception,
                          summary_stats_from_parse, [fp, fp3],
                          ["DNA", "RNA", "PROTEIN"],
                          careful_parse=False)
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )
Example #28
0
 def setUp(self):
     self.set_up()
     data_file = data_source_path('tiny.fasta')
     space_name = 'a path with a lot of spaces'
     self.tmp_sub_dir = self.get_subdir(space_name)
     self.data_path = self.get_path(name=space_name + '.fasta',
                                    parent_dir=self.tmp_sub_dir)
     src = open(data_file, 'rU')
     out = open(self.data_path, 'w')
     for line in src:
         out.write(line)
     src.close()
     out.close()
Example #29
0
 def setUp(self):
     self.set_up()
     data_file = data_source_path('tiny.fasta')
     unicode_name = 'm\xe9ss\xfdp\xe4th'
     self.tmp_sub_dir = self.get_subdir(unicode_name)
     self.data_path = self.get_path(name=unicode_name + '.fasta',
                                    parent_dir=self.tmp_sub_dir)
     src = open(data_file, 'rU')
     out = open(self.data_path, 'w')
     for line in src:
         out.write(line)
     src.close()
     out.close()
Example #30
0
 def setUp(self):
     self.set_up()
     self.anolis_file = data_source_path('anolis.fasta')
     self.caenophidia_file = data_source_path('caenophidia_mos.fasta')
     self.multi_dir = data_source_path('testmulti/')
     self.multi_aa_dir = os.path.join(self.multi_dir, 'caenophidia')
     self.figwasp_dir = os.path.join(self.multi_dir, 'figwasps')
     self.hummingbird_dir = os.path.join(self.multi_dir, 'hummingbirds')
     self.ambig_dna = data_source_path('small.ambiguities.fasta')
     self.ambig_dna_tree = data_source_path('small.tree')
     self.ambig_aa = data_source_path('caenophidia_mos.ambiguities.fasta')
     self.ambig_aa_tree = data_source_path('caenophidia_mos.tre')
Example #31
0
 def setUp(self):
     self.set_up()
     self.anolis_file = data_source_path('anolis.fasta')
     self.caenophidia_file = data_source_path('caenophidia_mos.fasta')
     self.multi_dir = data_source_path('testmulti/')
     self.multi_aa_dir = os.path.join(self.multi_dir, 'caenophidia')
     self.figwasp_dir = os.path.join(self.multi_dir, 'figwasps')
     self.hummingbird_dir = os.path.join(self.multi_dir, 'hummingbirds')
     self.ambig_dna = data_source_path('small.ambiguities.fasta')
     self.ambig_dna_tree = data_source_path('small.tree')
     self.ambig_aa = data_source_path('caenophidia_mos.ambiguities.fasta')
     self.ambig_aa_tree = data_source_path('caenophidia_mos.tre')
Example #32
0
 def setUp(self):
     self.set_up()
     self.dna = data_source_path('small.fasta')
     self.rna = data_source_path('smallrna.fasta')
     self.tree = data_source_path('small.tree')
     self.dna_tmp = self.get_subdir('dna')
     self.rna_tmp = self.get_subdir('rna')
     self.dna_aln = self.get_path(name='.marker001.small.aln',
                                  parent_dir=self.dna_tmp)
     self.dna_tree = self.get_path(name='.tre', parent_dir=self.dna_tmp)
     self.rna_aln = self.get_path(name='.marker001.smallrna.aln',
                                  parent_dir=self.rna_tmp)
     self.rna_tree = self.get_path(name='.tre', parent_dir=self.rna_tmp)
     self.dna_score = self.get_path(name='.score.txt',
                                    parent_dir=self.dna_tmp)
     self.rna_score = self.get_path(name='.score.txt',
                                    parent_dir=self.rna_tmp)
     self.dna_tmp_aln = self.get_path(
         name='_temp_iteration_0_seq_alignment.txt',
         parent_dir=self.dna_tmp)
     self.rna_tmp_aln = self.get_path(
         name='_temp_iteration_0_seq_alignment.txt',
         parent_dir=self.rna_tmp)
Example #33
0
 def testDiagnoseProt(self):
     fp = data_source_path("caenophidia_mos.fasta")
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=False)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA"], careful_parse=True)
Example #34
0
 def testDiagnoseRNA(self):
     fp = data_source_path("smallrna.fasta")
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "PROTEIN"], careful_parse=False)
     _LOG.warn("WARNING: summary_stats_from_parse does not distinguish between RNA and DNA in 'careful' mode")
Example #35
0
 def testDiagnoseDNA(self):
     fp = data_source_path("small.fasta")
     print fp
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"], careful_parse=True)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=False)
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["RNA"], careful_parse=True)
Example #36
0
 def testMulti(self):
     if is_test_enabled(
         TestLevel.EXHAUSTIVE, _LOG, module_name=".".join([self.__class__.__name__, sys._getframe().f_code.co_name])
     ):
         self._main_execution(
             [
                 "-m",
                 "-i",
                 data_source_path("testmulti"),
                 "-o",
                 self.ts.top_level_temp,
                 "--temporaries=%s" % self.ts.top_level_temp,
                 "-j",
                 self.job_name,
                 "--iter-limit=1",
             ]
         )
Example #37
0
    def testDiagnoseMulti(self):
        multi_dir = data_source_path("testmulti/caenophidia")
        fp = os.path.join(multi_dir, "caenophidia_mos.fasta")
        fp2 = os.path.join(multi_dir, "caenophidia_mos2.fasta")
        s = summary_stats_from_parse([fp, fp2], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "PROTEIN")
        self.assertEqual(s[1], [(114, 189), (109, 202)])
        self.assertEqual(s[2], 116)  # two taxa names were changed and 5 were deleted, so the union is 116
        self.assertEqual(s[3], False)

        fp3 = data_source_path("smallrna.fasta")
        s = summary_stats_from_parse([fp3, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "RNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], True)
        self.assertRaises(
            Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False
        )
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )

        fp4 = data_source_path("small.fasta")
        fp5 = data_source_path("smallunaligned.fasta")
        s = summary_stats_from_parse([fp4, fp4], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], True)
        self.assertRaises(
            Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False
        )
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )

        fp4 = data_source_path("small.fasta")
        fp5 = data_source_path("smallunaligned.fasta")
        s = summary_stats_from_parse([fp4, fp5], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
        self.assertEqual(s[0], "DNA")
        self.assertEqual(s[1], [(32, 1650), (32, 1650)])
        self.assertEqual(s[2], 32)
        self.assertEqual(s[3], False)
        self.assertRaises(
            Exception, summary_stats_from_parse, [fp, fp3], ["DNA", "RNA", "PROTEIN"], careful_parse=False
        )
        _LOG.warn(
            "WARNING: summary_stats_from_parse will read multi with dna and protein as entirely protein. MIXED data type support is needed!"
        )
Example #38
0
 def testDiagnoseDNA(self):
     fp = data_source_path('small.fasta')
     print(fp)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=False)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=True)
     self.assertEqual(s[0], "DNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["RNA"],
                       careful_parse=False)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["RNA"],
                       careful_parse=True)
Example #39
0
 def testDiagnoseProt(self):
     fp = data_source_path('caenophidia_mos.fasta')
     print(fp)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=False)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=True)
     self.assertEqual(s[0], "PROTEIN")
     self.assertEqual(s[1], [(114, 189)])
     self.assertEqual(s[2], 114)
     self.assertEqual(s[3], False)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["DNA", "RNA"],
                       careful_parse=False)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["DNA", "RNA"],
                       careful_parse=True)
Example #40
0
 def testDiagnoseRNA(self):
     fp = data_source_path('smallrna.fasta')
     print(fp)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=False)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     s = summary_stats_from_parse([fp], ["DNA", "RNA", "PROTEIN"],
                                  careful_parse=True)
     self.assertEqual(s[0], "RNA")
     self.assertEqual(s[1], [(32, 1650)])
     self.assertEqual(s[2], 32)
     self.assertEqual(s[3], True)
     self.assertRaises(Exception,
                       summary_stats_from_parse, [fp], ["DNA", "PROTEIN"],
                       careful_parse=False)
     _LOG.warn(
         "WARNING: summary_stats_from_parse does not distinguish between RNA and DNA in 'careful' mode"
     )
Example #41
0
 def testDNAFasta(self):
     sd = SequenceDataset()
     fp = data_source_path('anolis.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
Example #42
0
 def testDiagnoseBogus(self):
     fp = data_source_path("caenophidia_mos_bogus.fasta")
     self.assertRaises(Exception, summary_stats_from_parse, [fp], ["DNA", "RNA", "PROTEIN"], careful_parse=False)
     _LOG.warn(
         "WARNING: summary_stats_from_parse does not distinguish between all bogus sequences in 'careful' mode"
     )
Example #43
0
 def testDNAFasta(self):
     sd = SequenceDataset()
     fp = data_source_path('anolis.fasta')
     sd.read(open(fp, 'rU'), file_format='FASTA', datatype='DNA')
Example #44
0
 def setUp(self):
     self.set_up()
     self.data = data_source_path("tiny.lowercase.fasta")
Example #45
0
 def setUp(self):
     self.set_up()
     self.data = data_source_path("tiny.fasta")
     self.tree = data_source_path("tiny_name_mismatch.tre")
Example #46
0
 def setUp(self):
     self.set_up()
     self.data = data_source_path('tiny.fasta')
     self.tree = data_source_path('tiny_name_mismatch.tre')
Example #47
0
 def setUp(self):
     self.set_up()
     self.data = data_source_path('tiny.lowercase.fasta')