Ejemplo n.º 1
0
 def setUp(self):
     # library(ape)
     # tree = read.nexus("data/pythonidae.mle.nex")
     # pdm = cophenetic.phylo(tree)
     # write.csv(format(pdm,digits=22), "pythonidae.mle.weighted.pdm.csv")
     with open(pathmap.other_source_path("pythonidae.mle.weighted.pdm.csv")) as src:
         self.reference_pdm_weighted_table = container.DataTable.from_csv(src, default_data_type=float, delimiter=",")
     with open(pathmap.other_source_path("pythonidae.mle.unweighted.pdm.csv")) as src:
         self.reference_pdm_unweighted_table = container.DataTable.from_csv(src, default_data_type=float, delimiter=",")
     self.tree = dendropy.Tree.get(path=pathmap.tree_source_path(
         "pythonidae.mle.nex"),
         schema="nexus",
         preserve_underscores=True)
     self.pdm = self.tree.phylogenetic_distance_matrix()
 def test_distances(self):
     ## get distances from ape
     # library(ape)
     # tr = read.nexus("pythonidae.mle.nex")
     # tr$node.label <- (Ntip(tr)+1):(nrow(tr$edge)+1)
     # tr$tip.label <- (1:Ntip(tr))
     # write.tree(tr)
     # d = dist.nodes(tr)
     # write.csv(d, "file.csv")
     test_runs = [
         ("hiv1.newick", True, "hiv1.node-to-node-dists.csv"),
         ("pythonidae.mle.numbered-nodes.newick", True, "pythonidae.mle.node-to-node-dists.csv"),
         ("hiv1.newick", False, "hiv1.unweighted.node-to-node-dists.csv"),
         ("pythonidae.mle.numbered-nodes.newick", False, "pythonidae.mle.unweighted.node-to-node-dists.csv"),
     ]
     for tree_filename, is_weighted, distances_filename in test_runs:
         tree = dendropy.Tree.get_from_path(
             src=pathmap.tree_source_path(tree_filename), schema="newick", suppress_leaf_node_taxa=True
         )
         ndm = tree.node_distance_matrix()
         reference_table = container.DataTable.from_csv(
             src=open(pathmap.other_source_path(distances_filename)), default_data_type=float, delimiter=","
         )
         for nd1 in tree.postorder_node_iter():
             for nd2 in tree.postorder_node_iter():
                 d = ndm.distance(nd1, nd2, is_weighted_edge_distances=is_weighted)
                 e = reference_table[nd1.label, nd2.label]
                 self.assertAlmostEqual(d, e)
 def test_upgma_average_from_distance_matrices(self):
     # library(phangorn)
     # d = read.csv("wpupgmaex.csv", header=T, row.names=1)
     # upgma(d)
     test_runs = [
         ("wpupgmaex.csv", "((e:11,(a:8.5,b:8.5):2.5):5.5,(c:14,d:14):2.5);"),
         (
             "pythonidae.mle.weighted.pdm.csv",
             "(Candoia_aspera:0.3358679339,(Loxocemus_bicolor:0.239139024,(Xenopeltis_unicolor:0.2306593655,((Python_regius:0.1021235458,(Python_curtus:0.0883212354,(Python_sebae:0.0482829776,Python_molurus:0.0482829776):0.0400382578):0.01380231042):0.04278465647,((Python_timoriensis:0.06537053625,Python_reticulatus:0.06537053625):0.06029549731,(((Liasis_albertisii:0.0590368856,Bothrochilus_boa:0.0590368856):0.03300696511,(Morelia_boeleni:0.08681248898,((Antaresia_melanocephalus:0.03530849105,Antaresia_ramsayi:0.03530849105):0.04351804164,((Liasis_fuscus:0.0168409813,Liasis_mackloti:0.0168409813):0.04845559302,(Apodora_papuana:0.05507916735,Liasis_olivaceus:0.05507916735):0.01021740697):0.01352995836):0.007985956296):0.005231361731):0.005812651357,(((Morelia_tracyae:0.0359450459,(Morelia_amethistina:0.02553168923,(Morelia_clastolepis:0.0084128718,(Morelia_kinghorni:0.0080990783,Morelia_nauta:0.0080990783):0.0003137935):0.01711881743):0.01041335667):0.02235606786,(Morelia_oenpelliensis:0.05722136872,(Morelia_bredli:0.02582922315,Morelia_spilota:0.02582922315):0.03139214558):0.001079745035):0.03700402529,((Morelia_carinata:0.06795956147,(Morelia_viridisN:0.04255445115,Morelia_viridisS:0.04255445115):0.02540511032):0.01460551598,(Antaresia_maculosa:0.07026059995,(Antaresia_perthensis:0.06383429933,(Antaresia_stimsoni:0.01919038275,Antaresia_childreni:0.01919038275):0.04464391658):0.006426300625):0.0123044775):0.01274006159):0.002551363025):0.02780953149):0.01924216872):0.08575116319):0.008479658536):0.09672890989);",
         ),
         (
             "laurasiatherian.distances.ml.csv",
             "(Platypus:0.115554082,((Opposum:0.04554785647,(Bandicoot:0.03760589713,(Wallaroo:0.02994721074,Possum:0.02994721074):0.00765868639):0.007941959338):0.05561264108,(Elephant:0.09767314505,(Tenrec:0.09197201988,(Hedghog:0.08893681608,((Cebus:0.07429923126,(Baboon:0.06398456889,Human:0.06398456889):0.01031466238):0.01347573751,((Mouse:0.04952979734,Vole:0.04952979734):0.03752885067,(Gymnure:0.08423117145,((GuineaPig:0.0740040685,CaneRat:0.0740040685):0.009033085883,(Armadillo:0.0815558576,((Squirrel:0.0719024199,Dormouse:0.0719024199):0.006011912967,(Loris:0.07535539083,((Rabbit:0.06082665549,Pika:0.06082665549):0.01312116941,(LongTBat:0.07253285264,(Aardvark:0.07207716472,(FruitBat:0.06375844723,((((FurSeal:0.03311262951,(HarbSeal:0.004907721762,GraySeal:0.004907721762):0.02820490774):0.01108478546,(Cat:0.04153849221,Dog:0.04153849221):0.002658922751):0.01112227127,((Mole:0.04702531576,Shrew:0.04702531576):0.006612877503,((Rbat:0.0483680062,(FlyingFox:0.01426606479,RyFlyFox:0.01426606479):0.03410194142):0.002903547772,(Pig:0.04899261297,((Horse:0.009073986173,Donkey:0.009073986173):0.02905609806,(WhiteRhino:0.02269245743,IndianRhin:0.02269245743):0.0154376268):0.01086252874):0.002278941003):0.002366639287):0.001681492971):0.004794138467,(Alpaca:0.05736084145,(Hippo:0.05532140845,((Cow:0.02807794212,Sheep:0.02807794212):0.02602799145,(SpermWhale:0.03397215079,(FinWhale:0.01328705847,BlueWhale:0.01328705847):0.02068509233):0.02013378278):0.001215474875):0.002039433007):0.002752983249):0.003644622527):0.008318717494):0.0004556879157):0.001414972262):0.001407565924):0.00255894204):0.003641524737):0.001481296783):0.001194017065):0.002827476566):0.0007163207591):0.001161847302):0.003035203803):0.005701125173):0.0034873525):0.01439358448);",
         ),
         ## note:following fails, probably due to different arbitrary resolutions of equal distances
         # ("pythonidae.mle.unweighted.pdm.csv", "((((Morelia_carinata:1.5,(Morelia_viridisN:1,Morelia_viridisS:1):0.5):1.458333333,((Antaresia_stimsoni:1,Antaresia_childreni:1):0.75,(Antaresia_maculosa:1.5,Antaresia_perthensis:1.5):0.25):1.208333333):1.416666667,((Morelia_bredli:1,Morelia_spilota:1):2.166666667,((Morelia_clastolepis:1.5,(Morelia_kinghorni:1,Morelia_nauta:1):0.5):0.8333333333,(Morelia_oenpelliensis:1.75,(Morelia_tracyae:1.5,Morelia_amethistina:1.5):0.25):0.5833333333):0.8333333333):1.208333333):0.6861111111,(((Morelia_boeleni:2,(Liasis_albertisii:1,Bothrochilus_boa:1):1):0.8333333333,((Antaresia_melanocephalus:1,Antaresia_ramsayi:1):1.5,((Apodora_papuana:1,Liasis_olivaceus:1):1,(Liasis_fuscus:1,Liasis_mackloti:1):1):0.5):0.3333333333):1.888888889,(((Python_sebae:1,Python_molurus:1):0.75,(Python_regius:1.5,Python_curtus:1.5):0.25):1.275,((Python_timoriensis:1,Python_reticulatus:1):1.833333333,(Loxocemus_bicolor:1.5,(Xenopeltis_unicolor:1,Candoia_aspera:1):0.5):1.333333333):0.1916666667):1.697222222):0.3388888889);"),
     ]
     for data_filename, expected_tree_str in test_runs:
         with open(pathmap.other_source_path(data_filename)) as src:
             pdm = dendropy.PhylogeneticDistanceMatrix.from_csv(
                 src,
                 is_first_row_column_names=True,
                 is_first_column_row_names=True,
                 is_allow_new_taxa=True,
                 delimiter=",",
             )
         obs_tree = pdm.upgma_tree()
         expected_tree = dendropy.Tree.get(
             data=expected_tree_str,
             schema="newick",
             # rooting="force-unrooted",
             taxon_namespace=pdm.taxon_namespace,
             preserve_underscores=True,
         )
         self.check_tree(obs_tree=obs_tree, expected_tree=expected_tree)
Ejemplo n.º 4
0
 def test1(self):
     with open(pathmap.other_source_path("multispecies_coalescent_test_data.json")) as src:
         test_regimes = json.load(src)
     for test_regime in test_regimes:
         species_tree = dendropy.Tree.get(
                 data=test_regime["species_tree"],
                 schema="newick",
                 rooting="force-rooted",
                 )
         species_tree.taxon_namespace.is_mutable = False
         msc = multispeciescoalescent.MultispeciesCoalescent(species_tree=species_tree)
         coalescent_species_lineage_label_map = test_regime["coalescent_species_lineage_label_map"]
         coalescent_species_lineage_map_fn = lambda x: species_tree.taxon_namespace.require_taxon(coalescent_species_lineage_label_map[x.label])
         coalescent_taxa = dendropy.TaxonNamespace(sorted(coalescent_species_lineage_label_map.keys()))
         coalescent_taxa.is_mutable = False
         for sub_regime in test_regime["coalescent_trees"]:
             coalescent_tree = dendropy.Tree.get(
                     data=sub_regime["coalescent_tree"],
                     schema="newick",
                     rooting="force-rooted",
                     taxon_namespace=coalescent_taxa,
                     )
             obs_ln_likelihood = msc.score_coalescent_tree(
                     coalescent_tree=coalescent_tree,
                     coalescent_species_lineage_map_fn=coalescent_species_lineage_map_fn,
                     )
             exp_ln_likelihood = sub_regime["log_likelihood"]
             self.assertAlmostEqual(obs_ln_likelihood, exp_ln_likelihood, 2)
Ejemplo n.º 5
0
 def test1(self):
     with open(
             pathmap.other_source_path(
                 "multispecies_coalescent_test_data.json")) as src:
         test_regimes = json.load(src)
     for test_regime in test_regimes:
         species_tree = dendropy.Tree.get(
             data=test_regime["species_tree"],
             schema="newick",
             rooting="force-rooted",
         )
         species_tree.taxon_namespace.is_mutable = False
         msc = multispeciescoalescent.MultispeciesCoalescent(
             species_tree=species_tree)
         coalescent_species_lineage_label_map = test_regime[
             "coalescent_species_lineage_label_map"]
         coalescent_species_lineage_map_fn = lambda x: species_tree.taxon_namespace.require_taxon(
             coalescent_species_lineage_label_map[x.label])
         coalescent_taxa = dendropy.TaxonNamespace(
             sorted(coalescent_species_lineage_label_map.keys()))
         coalescent_taxa.is_mutable = False
         for sub_regime in test_regime["coalescent_trees"]:
             coalescent_tree = dendropy.Tree.get(
                 data=sub_regime["coalescent_tree"],
                 schema="newick",
                 rooting="force-rooted",
                 taxon_namespace=coalescent_taxa,
             )
             obs_ln_likelihood = msc.score_coalescent_tree(
                 coalescent_tree=coalescent_tree,
                 coalescent_species_lineage_map_fn=
                 coalescent_species_lineage_map_fn,
             )
             exp_ln_likelihood = sub_regime["log_likelihood"]
             self.assertAlmostEqual(obs_ln_likelihood, exp_ln_likelihood, 2)
 def setUp(self):
     self.tree = dendropy.Tree.get_from_path(
         src=pathmap.tree_source_path("community.tree.newick"), schema="newick", rooting="force-rooted"
     )
     self.pdm = dendropy.PhylogeneticDistanceMatrix.from_tree(self.tree)
     assemblage_data_filepath = pathmap.other_source_path("community.data.tsv")
     with open(assemblage_data_filepath) as src:
         self.data_table = container.DataTable.from_csv(src, default_data_type=int, delimiter="\t")
     self.assemblage_membership_definitions = self.pdm.assemblage_membership_definitions_from_csv(
         assemblage_data_filepath, delimiter="\t"
     )
     self.assemblage_memberships = self.assemblage_membership_definitions.values()
    def test_njtree_from_distance_matrices(self):

        # library(ape)
        # ---
        # z = matrix( c(0,5,9,9,8, 5,0,10,10,9, 9,10,0,8,7, 9,10,8,0,3, 8,9,7,3,0), byrow=T, nrow=5)
        # rownames(z)  <- c("a", "b", "c", "d", "e")
        # colnames(z)  <- c("a", "b", "c", "d", "e")
        # t = nj(z)
        # write.tree(t)
        # ---
        # p1 = read.csv("pythonidae.mle.unweighted.pdm.csv", header=T, row.names=1)
        # m1 = as.matrix(p1)
        # nj(m1)
        # t = nj(m1)
        # write.tree(t)
        test_runs = [
            ("wpnjex.csv", "(e:1,d:2,((a:2,b:3):3,c:4):2);"),
            ("saitou_and_nei_1987_table1.csv", "(h:6,g:2,((((a:5,b:2):2,c:1):1,d:3):2,(e:1,f:4):2):1);"),
            (
                "pythonidae.mle.unweighted.pdm.csv",
                "(Morelia_spilota:1,Morelia_bredli:1,((((((Morelia_kinghorni:1,Morelia_nauta:1):1,Morelia_clastolepis:1):1,Morelia_amethistina:1):1,Morelia_tracyae:1):1,Morelia_oenpelliensis:1):1,(((((Liasis_albertisii:1,Bothrochilus_boa:1):1,((Antaresia_melanocephalus:1,Antaresia_ramsayi:1):1,((Liasis_fuscus:1,Liasis_mackloti:1):1,(Apodora_papuana:1,Liasis_olivaceus:1):1):1):1):1,Morelia_boeleni:1):1,((Python_timoriensis:1,Python_reticulatus:1):1,((((Python_sebae:1,Python_molurus:1):1,Python_curtus:1):1,Python_regius:1):1,((Xenopeltis_unicolor:1,Candoia_aspera:1):1,Loxocemus_bicolor:1):1):1):1):1,((((Antaresia_stimsoni:1,Antaresia_childreni:1):1,Antaresia_perthensis:1):1,Antaresia_maculosa:1):1,((Morelia_viridisN:1,Morelia_viridisS:1):1,Morelia_carinata:1):1):1):1):1);",
            ),
            (
                "pythonidae.mle.weighted.pdm.csv",
                "((Liasis_albertisii:0.0542142498,Bothrochilus_boa:0.0638595214):0.038444,(((Apodora_papuana:0.0670782319,Liasis_olivaceus:0.0430801028):0.010168,(Liasis_fuscus:0.0194903208,Liasis_mackloti:0.0141916418):0.048505):0.013422,(Antaresia_melanocephalus:0.0380695554,Antaresia_ramsayi:0.0325474267):0.043626):0.007734,(((((((Antaresia_stimsoni:0.0152390165,Antaresia_childreni:0.023141749):0.032397,Antaresia_perthensis:0.0760812159):0.012848,Antaresia_maculosa:0.0679212061):0.011617,((Morelia_viridisN:0.0377499268,Morelia_viridisS:0.0473589755):0.027329,Morelia_carinata:0.0660356718):0.013482):0.015469,((((((Morelia_kinghorni:0.0075825724,Morelia_nauta:0.0086155842):0.004182,Morelia_clastolepis:0.0045446653):0.018597,Morelia_amethistina:0.0227641045):0.007181,Morelia_tracyae:0.0377936102):0.024796,Morelia_oenpelliensis:0.0579745143):0.004283,(Morelia_bredli:0.0274921037,Morelia_spilota:0.0241663426):0.026356):0.031732):0.006602,(((((Python_sebae:0.0629755585,Python_molurus:0.0335903967):0.02165,Python_curtus:0.1067094932):0.016163,Python_regius:0.1058922755):0.032743,((Xenopeltis_unicolor:0.1983677797,Candoia_aspera:0.4092923305):0.048508,Loxocemus_bicolor:0.2627888765):0.060789):0.030952,(Python_timoriensis:0.074479767,Python_reticulatus:0.0562613055):0.06004):0.027099):0.002859,Morelia_boeleni:0.0843874314):0.002713);",
            ),
        ]
        for data_filename, expected_tree_str in test_runs:
            with open(pathmap.other_source_path(data_filename)) as src:
                pdm = dendropy.PhylogeneticDistanceMatrix.from_csv(
                    src,
                    is_first_row_column_names=True,
                    is_first_column_row_names=True,
                    is_allow_new_taxa=True,
                    delimiter=",",
                )
            obs_tree = pdm.nj_tree()
            # print(obs_tree.as_string("newick"))
            # print(obs_tree.as_ascii_plot(plot_metric="length"))
            expected_tree = dendropy.Tree.get(
                data=expected_tree_str,
                schema="newick",
                rooting="force-unrooted",
                taxon_namespace=pdm.taxon_namespace,
                preserve_underscores=True,
            )
            self.check_tree(obs_tree=obs_tree, expected_tree=expected_tree)
 def test_nonabundance_edgeweighted_unnormalized_ses_mntd(self):
     # suppressMessages(library(picante))
     # dists = as.matrix(read.csv("data/dist.csv",header=T,row.names=1))
     # comm = as.matrix(read.csv("data/community.data.tsv",sep="\t",header=T,row.names=1))
     # results.mntd = ses.mntd(comm, dists, null.model="taxa.labels",abundance.weighted=F,runs=100000)
     # write.csv(format(results.mntd, digits=22), quote=F, "community.data.weighted.unnormalized.ses.mntd.csv")
     # results.mntd = ses.mntd(comm, dists, null.model="taxa.labels",abundance.weighted=F,runs=100000)
     # write.csv(format(results.mntd,digits=22), quote=F, "community.data.weighted.unnormalized.ses.mntd.csv")
     with open(pathmap.other_source_path("community.data.weighted.unnormalized.ses.mntd.csv")) as src:
         expected_results_data_table = container.DataTable.from_csv(src, default_data_type=float, delimiter=",")
     # for row_name in expected_results_data_table.row_name_iter():
     #     for column_name in expected_results_data_table.column_name_iter():
     #         v = expected_results_data_table[row_name, column_name]
     #         print("{}, {}: {} ({})".format(row_name, column_name, v, type(v)))
     obs_results = self.pdm.standardized_effect_size_mean_nearest_taxon_distance(
         assemblage_memberships=self.assemblage_memberships,
         num_randomization_replicates=100,
         is_weighted_edge_distances=True,
         is_normalize_by_tree_size=False,
     )
     self.assertEqual(len(obs_results), expected_results_data_table.num_rows())
     for obs_result, expected_result_row_name in zip(obs_results, expected_results_data_table.row_name_iter()):
         self.assertTrue(
             self._low_precision_equality(
                 obs_result.obs, expected_results_data_table[expected_result_row_name, "mntd.obs"]
             )
         )
         self.assertTrue(
             self._low_precision_equality(
                 obs_result.null_model_mean, expected_results_data_table[expected_result_row_name, "mntd.rand.mean"]
             )
         )
         self.assertTrue(
             self._low_precision_equality(
                 obs_result.null_model_sd, expected_results_data_table[expected_result_row_name, "mntd.rand.sd"]
             )
         )
         self.assertTrue(
             self._low_precision_equality(
                 obs_result.z, expected_results_data_table[expected_result_row_name, "mntd.obs.z"]
             )
         )
         self.assertTrue(
             self._low_precision_equality(
                 obs_result.p, expected_results_data_table[expected_result_row_name, "mntd.obs.p"]
             )
         )