Python assign_traits_to_treeの例、picrust.predict_traits.assign_traits_to_tree Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: yucy207/picrust

 def test_assign_traits_to_tree(self):
     """assign_traits_to_tree should map reconstructed traits to tree nodes"""
     
     # Test that the function assigns traits from a dict to a tree node
     traits = self.SimpleTreeTraits
     tree = self.SimpleTree
     
     # Test on simple tree
     result_tree = assign_traits_to_tree(traits,tree)
     
     # Test that each node is assigned correctly
     for node in result_tree.preorder():
         obs = node.Reconstruction 
         exp = traits.get(node.Name, None)
         self.assertEqual(obs,exp)
     
     # Test on polytomy tree
     
     tree = self.SimplePolytomyTree
     result_tree = assign_traits_to_tree(traits,tree)
     
     # Test that each node is assigned correctly
     for node in result_tree.preorder():
         obs = node.Reconstruction 
         exp = traits.get(node.Name, None)
         self.assertEqual(obs,exp)

コード例 #2

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: adamrp/picrust

 def test_assign_traits_to_tree(self):
     """assign_traits_to_tree should map reconstructed traits to tree nodes"""
     
     # Test that the function assigns traits from a dict to a tree node
     traits = self.SimpleTreeTraits
     tree = self.SimpleTree
     
     # Test on simple tree
     result_tree = assign_traits_to_tree(traits,tree)
     
     # Test that each node is assigned correctly
     for node in result_tree.preorder():
         obs = node.Reconstruction 
         exp = traits.get(node.Name, None)
         self.assertEqual(obs,exp)
     
     # Test on polytomy tree
     
     tree = self.SimplePolytomyTree
     result_tree = assign_traits_to_tree(traits,tree)
     
     # Test that each node is assigned correctly
     for node in result_tree.preorder():
         obs = node.Reconstruction 
         exp = traits.get(node.Name, None)
         self.assertEqual(obs,exp)

コード例 #3

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: adamrp/picrust

    def test_assign_traits_to_tree_quoted_node_name(self):
        """Assign_traits_to_tree should remove quotes from node names"""
        # Test that the function assigns traits from a dict to a tree node
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        #Make one node quoted
        tree.getNodeMatchingName('A').Name="'A'"
        tree.getNodeMatchingName('B').Name='"B"'

        # Test on simple tree
        result_tree = assign_traits_to_tree(traits,tree,fix_bad_labels=True)
        #Setting fix_bad_labels to false produces NoneType predictions when
        #labels are quoted
        
        # Test that each node is assigned correctly
        for node in result_tree.preorder():
            obs = node.Reconstruction 
            exp = traits.get(node.Name.strip("'").strip('"'), None)
            self.assertEqual(obs,exp)
        
        # Test on polytomy tree
        
        tree = self.SimplePolytomyTree
        result_tree = assign_traits_to_tree(traits,tree)
        
        # Test that each node is assigned correctly
        for node in result_tree.preorder():
            obs = node.Reconstruction 
            exp = traits.get(node.Name, None)
            self.assertEqual(obs,exp)

コード例 #4

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: yucy207/picrust

    def test_assign_traits_to_tree_quoted_node_name(self):
        """Assign_traits_to_tree should remove quotes from node names"""
        # Test that the function assigns traits from a dict to a tree node
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        #Make one node quoted
        tree.getNodeMatchingName('A').Name="'A'"
        tree.getNodeMatchingName('B').Name='"B"'

        # Test on simple tree
        result_tree = assign_traits_to_tree(traits,tree,fix_bad_labels=True)
        #Setting fix_bad_labels to false produces NoneType predictions when
        #labels are quoted
        
        # Test that each node is assigned correctly
        for node in result_tree.preorder():
            obs = node.Reconstruction 
            exp = traits.get(node.Name.strip("'").strip('"'), None)
            self.assertEqual(obs,exp)
        
        # Test on polytomy tree
        
        tree = self.SimplePolytomyTree
        result_tree = assign_traits_to_tree(traits,tree)
        
        # Test that each node is assigned correctly
        for node in result_tree.preorder():
            obs = node.Reconstruction 
            exp = traits.get(node.Name, None)
            self.assertEqual(obs,exp)

コード例 #5

0

ファイルを表示

    def test_get_brownian_motion_param_from_confidence_intervals(self):
        """Get brownian motion parameters from confidence intervals"""
        #TODO: Ensure this works with arrays of brownian motions

        tree = self.SimpleTree

        #Test one-trait case
        traits = {"A": [1.0], "C": [2.0], "E": [1.0], "F": [1.0]}
        tree = assign_traits_to_tree(traits,
                                     tree,
                                     trait_label="Reconstruction")
        tree.getNodeMatchingName('E').upper_bound = [2.0]
        tree.getNodeMatchingName('F').upper_bound = [1.0]
        tree.getNodeMatchingName('E').lower_bound = [0.0]
        tree.getNodeMatchingName('F').lower_bound = [1.0]

        brownian_motion_parameter =\
          get_brownian_motion_param_from_confidence_intervals(tree,\
          upper_bound_trait_label="upper_bound",\
          lower_bound_trait_label="lower_bound",\
          trait_label="Reconstruction",\
          confidence=0.95)

        #self.assertFloatEqual(brownian_motion_parameter,[1.0])
        self.assertEqual(len(brownian_motion_parameter), 1)

        #Test two-trait case

        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,
                                            tree,
                                            trait_label="Reconstruction")

        true_brownian_motion_param = 5.0

        #E_histogram = thresholded_brownian_probability(1.0,\
        #     true_brownian_motion_param,d=0.01)
        #E_true_lower,E_true_upper = get_bounds_from_histogram(E_histogram,test_bin_edges,confidence=0.95)

        #set up tree with confidence intervals
        #{"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]}
        #DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;")

        tree.getNodeMatchingName('E').upper_bound = [1.0, 1.0]
        tree.getNodeMatchingName('F').upper_bound = [1.0, 2.0]
        tree.getNodeMatchingName('E').lower_bound = [-2.0, -2.0]
        tree.getNodeMatchingName('F').lower_bound = [-1.0, 0.0]

        brownian_motion_parameter =\
          get_brownian_motion_param_from_confidence_intervals(tree,\
          upper_bound_trait_label="upper_bound",\
          lower_bound_trait_label="lower_bound",\
          trait_label="Reconstruction",\
          confidence=0.95)

        #self.assertFloatEqual(brownian_motion_parameter,[1.0,1.0])
        self.assertEqual(len(brownian_motion_parameter), 2)

コード例 #6

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: adamrp/picrust

    def test_get_brownian_motion_param_from_confidence_intervals(self):
        """Get brownian motion parameters from confidence intervals"""
        #TODO: Ensure this works with arrays of brownian motions

        tree = self.SimpleTree
        
        #Test one-trait case
        traits = {"A":[1.0],"C":[2.0],"E":[1.0],"F":[1.0]}
        tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") 
        tree.getNodeMatchingName('E').upper_bound = [2.0]  
        tree.getNodeMatchingName('F').upper_bound = [1.0]
        tree.getNodeMatchingName('E').lower_bound = [0.0]  
        tree.getNodeMatchingName('F').lower_bound = [1.0]
        
        brownian_motion_parameter =\
          get_brownian_motion_param_from_confidence_intervals(tree,\
          upper_bound_trait_label="upper_bound",\
          lower_bound_trait_label="lower_bound",\
          trait_label="Reconstruction",\
          confidence=0.95)


        #self.assertFloatEqual(brownian_motion_parameter,[1.0])    
        self.assertEqual(len(brownian_motion_parameter),1) 
        
        #Test two-trait case
        
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") 
        
        true_brownian_motion_param = 5.0
        
        #E_histogram = thresholded_brownian_probability(1.0,\
        #     true_brownian_motion_param,d=0.01)
        #E_true_lower,E_true_upper = get_bounds_from_histogram(E_histogram,test_bin_edges,confidence=0.95)
         
        #set up tree with confidence intervals
        #{"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]}
        #DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;")
        
        tree.getNodeMatchingName('E').upper_bound = [1.0,1.0]  
        tree.getNodeMatchingName('F').upper_bound = [1.0,2.0]
        tree.getNodeMatchingName('E').lower_bound = [-2.0,-2.0]  
        tree.getNodeMatchingName('F').lower_bound = [-1.0,0.0]
        
        brownian_motion_parameter =\
          get_brownian_motion_param_from_confidence_intervals(tree,\
          upper_bound_trait_label="upper_bound",\
          lower_bound_trait_label="lower_bound",\
          trait_label="Reconstruction",\
          confidence=0.95)


        #self.assertFloatEqual(brownian_motion_parameter,[1.0,1.0])    
        self.assertEqual(len(brownian_motion_parameter),2)

コード例 #7

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: yucy207/picrust

    def test_calc_nearest_sequenced_taxon_index(self):
        """calc_nearest_sequenced_taxon_index calculates the NSTI measure"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction")
        #Expected distances:
        # A --> A 0.0
        # B --> A 0.03
        # C --> D 0.02
        # D --> D 0.0
        # = 0.05/4.0 = 0.0125
        exp = 0.0125
        verbose = False
        #Test with default options
        obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,verbose=verbose)
        self.assertFloatEqual(obs_nsti,exp)
        self.assertFloatEqual(obs_distances["A"],0.0)
        self.assertFloatEqual(obs_distances["B"],0.03)
        self.assertFloatEqual(obs_distances["C"],0.02)
        self.assertFloatEqual(obs_distances["D"],0.00)

        #Test calcing the index while 
        #limiting prediction to B and C
        
        # B --> A 0.03
        # C --> D 0.02
        
        exp = 0.025
        obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,\
          limit_to_tips = ["B","C"],verbose=False)
        self.assertFloatEqual(obs_nsti,exp)
        self.assertFloatEqual(obs_distances["B"],0.03)
        self.assertFloatEqual(obs_distances["C"],0.02)

コード例 #8

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: adamrp/picrust

    def test_calc_nearest_sequenced_taxon_index(self):
        """calc_nearest_sequenced_taxon_index calculates the NSTI measure"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction")
        #Expected distances:
        # A --> A 0.0
        # B --> A 0.03
        # C --> D 0.02
        # D --> D 0.0
        # = 0.05/4.0 = 0.0125
        exp = 0.0125
        verbose = False
        #Test with default options
        obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,verbose=verbose)
        self.assertFloatEqual(obs_nsti,exp)
        self.assertFloatEqual(obs_distances["A"],0.0)
        self.assertFloatEqual(obs_distances["B"],0.03)
        self.assertFloatEqual(obs_distances["C"],0.02)
        self.assertFloatEqual(obs_distances["D"],0.00)

        #Test calcing the index while 
        #limiting prediction to B and C
        
        # B --> A 0.03
        # C --> D 0.02
        
        exp = 0.025
        obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,\
          limit_to_tips = ["B","C"],verbose=False)
        self.assertFloatEqual(obs_nsti,exp)
        self.assertFloatEqual(obs_distances["B"],0.03)
        self.assertFloatEqual(obs_distances["C"],0.02)

コード例 #9

0

ファイルを表示

    def test_nearest_neighbor_prediction(self):
        """nearest_neighbor_prediction predicts nearest neighbor's traits"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,
                                            tree,
                                            trait_label="Reconstruction")

        #Test with default options
        results = predict_nearest_neighbor(tree, nodes_to_predict=["B", "C"])
        self.assertEqual(results["B"], array([1.0, 1.0]))
        self.assertEqual(results["C"], array([0.0, 0.0]))

        #Test allowing ancestral NNs
        results = predict_nearest_neighbor(tree, nodes_to_predict =["B","C"],\
         tips_only = False)
        self.assertEqual(results["C"], array([0.0, 1.0]))

        #Test allowing self to be NN AND Ancestral NNs
        results = predict_nearest_neighbor(tree, nodes_to_predict =["A","B","C","D"],\
         tips_only = False,use_self_in_prediction=True)

        self.assertEqual(results["A"], array([1.0, 1.0]))
        self.assertEqual(results["B"], array([1.0, 1.0]))
        self.assertEqual(results["C"], array([0.0, 1.0]))
        self.assertEqual(results["D"], array([0.0, 0.0]))

コード例 #10

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: yucy207/picrust

    def test_get_nearest_annotated_neightbor(self):
        """get_nearest_annotated_neighbor finds nearest relative with traits"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,tree)
 

       
        #Test ancestral NN matching
        nn =  get_nearest_annotated_neighbor(tree,'A',\
              tips_only=False, include_self=False)
        
        self.assertEqual(nn.Name,'E')
        
        nn =  get_nearest_annotated_neighbor(tree,'B',\
              tips_only=False, include_self=False)
        
        self.assertEqual(nn.Name,'E')
        
 
        nn =  get_nearest_annotated_neighbor(tree,'C',\
              tips_only=False, include_self=False)
        
        self.assertEqual(nn.Name,'F')
        
  
        nn =  get_nearest_annotated_neighbor(tree,'D',\
              tips_only=False, include_self=False)
        
        self.assertEqual(nn.Name,'F')
        
       
        #Test tip only, non-self matching
        nn =  get_nearest_annotated_neighbor(tree,'A',\
              tips_only=True, include_self=False)
        
        self.assertEqual(nn.Name,'D')
        
        nn =  get_nearest_annotated_neighbor(tree,'B',\
              tips_only=True, include_self=False)
        
        self.assertEqual(nn.Name,'A')

 
        nn =  get_nearest_annotated_neighbor(tree,'C',\
              tips_only=True, include_self=False)
        
        self.assertEqual(nn.Name,'D')

        nn =  get_nearest_annotated_neighbor(tree,'D',\
              tips_only=True, include_self=False)
        
        self.assertEqual(nn.Name,'A')

コード例 #11

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: adamrp/picrust

    def test_get_nearest_annotated_neightbor(self):
        """get_nearest_annotated_neighbor finds nearest relative with traits"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,tree)
 

       
        #Test ancestral NN matching
        nn =  get_nearest_annotated_neighbor(tree,'A',\
              tips_only=False, include_self=False)
        
        self.assertEqual(nn.Name,'E')
        
        nn =  get_nearest_annotated_neighbor(tree,'B',\
              tips_only=False, include_self=False)
        
        self.assertEqual(nn.Name,'E')
        
 
        nn =  get_nearest_annotated_neighbor(tree,'C',\
              tips_only=False, include_self=False)
        
        self.assertEqual(nn.Name,'F')
        
  
        nn =  get_nearest_annotated_neighbor(tree,'D',\
              tips_only=False, include_self=False)
        
        self.assertEqual(nn.Name,'F')
        
       
        #Test tip only, non-self matching
        nn =  get_nearest_annotated_neighbor(tree,'A',\
              tips_only=True, include_self=False)
        
        self.assertEqual(nn.Name,'D')
        
        nn =  get_nearest_annotated_neighbor(tree,'B',\
              tips_only=True, include_self=False)
        
        self.assertEqual(nn.Name,'A')

 
        nn =  get_nearest_annotated_neighbor(tree,'C',\
              tips_only=True, include_self=False)
        
        self.assertEqual(nn.Name,'D')

        nn =  get_nearest_annotated_neighbor(tree,'D',\
              tips_only=True, include_self=False)
        
        self.assertEqual(nn.Name,'A')

コード例 #12

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: gregcaporaso/picrust

    def test_biom_table_from_predictions(self):
        """format predictions into biom format"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree

        # print "Starting tree:",tree.asciiArt()
        # Test on simple tree
        result_tree = assign_traits_to_tree(traits, tree)
        nodes_to_predict = [n.Name for n in result_tree.tips()]
        # print "Predicting nodes:", nodes_to_predict
        predictions = predict_traits_from_ancestors(result_tree, nodes_to_predict)

        biom_table = biom_table_from_predictions(predictions, ["trait1", "trait2"])

コード例 #13

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: yucy207/picrust

    def test_biom_table_from_predictions(self):
        """format predictions into biom format"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        
        #print "Starting tree:",tree.asciiArt()
        # Test on simple tree
        result_tree = assign_traits_to_tree(traits,tree)
        nodes_to_predict = [n.Name for n in result_tree.tips()]
        #print "Predicting nodes:", nodes_to_predict
        predictions = predict_traits_from_ancestors(result_tree,\
          nodes_to_predict)

        biom_table=biom_table_from_predictions(predictions,["trait1","trait2"])

コード例 #14

0

ファイルを表示

    def test_get_nn_by_tree_descent(self):
        """calc_nearest_sequenced_taxon_index calculates the NSTI measure"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,
                                            tree,
                                            trait_label="Reconstruction")
        #Expected distances:
        # A --> A 0.0
        # B --> A 0.03
        # C --> D 0.02
        # D --> D 0.0
        # = 0.05/4.0 = 0.0125
        exp = 0.0125
        #Test with default options
        nn, distance = get_nn_by_tree_descent(tree, "B", verbose=True)
        self.assertEqual(nn.Name, "A")
        self.assertFloatEqual(distance, 0.03)

        nn, distance = get_nn_by_tree_descent(tree, "A", verbose=True)
        self.assertEqual(nn.Name, "A")
        self.assertFloatEqual(distance, 0.00)

        nn, distance = get_nn_by_tree_descent(tree,
                                              "A",
                                              filter_by_property=False,
                                              verbose=True)
        self.assertEqual(nn.Name, "B")
        self.assertFloatEqual(distance, 0.03)

        nn, distance = get_nn_by_tree_descent(tree, "C", verbose=True)
        self.assertEqual(nn.Name, "D")
        self.assertFloatEqual(distance, 0.02)
        #self.assertFloatEqual(obs_distances["A"],0.0)
        #self.assertFloatEqual(obs_distances["B"],0.03)
        #self.assertFloatEqual(obs_distances["C"],0.02)
        #self.assertFloatEqual(obs_distances["D"],0.00)

        #Test calcing the index while
        #limiting prediction to B and C

        # B --> A 0.03
        # C --> D 0.02

        exp = 0.025
        obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,\
          limit_to_tips = ["B","C"],verbose=False)
        self.assertFloatEqual(obs_nsti, exp)
        self.assertFloatEqual(obs_distances["B"], 0.03)
        self.assertFloatEqual(obs_distances["C"], 0.02)

コード例 #15

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: yucy207/picrust

    def test_predict_random_neighbor(self):
        """predict_random_neighbor predicts randomly"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,tree)
        
        #If there is only one other valid result, this
        #should always be predicted
        
        #self.SimpleTreeTraits =\
        #            {"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]}
        
        #If self predictions are disallowed, then the prediction for A should
        #always come from node D, and be 0,0.   

        results = predict_random_neighbor(tree,['A'],\
          trait_label = "Reconstruction",\
          use_self_in_prediction=False)

        self.assertEqual(results['A'],[0.0,0.0])

        #If use_self is True, ~50% of predictions should be [1.0,1.0] and
        # half should be [0.0,0.0]

        #Pick repeatedly and make sure frequencies are
        #reasonable.  The technique is fast, so 
        #many iterations are reasonable.
        
        iterations = 100000
        a_predictions = 0
        d_predictions = 0
        for i in range(iterations):
            results = predict_random_neighbor(tree,['A'],\
              trait_label = "Reconstruction",\
              use_self_in_prediction=True)
            #print results
            if results['A'] == [1.0,1.0]:
                #print "A pred"
                a_predictions += 1
            elif results['A'] == [0.0,0.0]:
                #print "D pred"
                d_predictions +=1
            else:
                raise RuntimeError(\
                  "Bad prediction result: Neither node A nor node D traits used in prediction")
        #print "All a predictions:",a_predictions
        #print "All d predictions:",d_predictions
        ratio = float(a_predictions)/float(iterations)
        #print "Ratio:", ratio
        self.assertFloatEqual(ratio,0.5,eps=1e-2)

コード例 #16

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: adamrp/picrust

    def test_predict_random_neighbor(self):
        """predict_random_neighbor predicts randomly"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,tree)
        
        #If there is only one other valid result, this
        #should always be predicted
        
        #self.SimpleTreeTraits =\
        #            {"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]}
        
        #If self predictions are disallowed, then the prediction for A should
        #always come from node D, and be 0,0.   

        results = predict_random_neighbor(tree,['A'],\
          trait_label = "Reconstruction",\
          use_self_in_prediction=False)

        self.assertEqual(results['A'],[0.0,0.0])

        #If use_self is True, ~50% of predictions should be [1.0,1.0] and
        # half should be [0.0,0.0]

        #Pick repeatedly and make sure frequencies are
        #reasonable.  The technique is fast, so 
        #many iterations are reasonable.
        
        iterations = 100000
        a_predictions = 0
        d_predictions = 0
        for i in range(iterations):
            results = predict_random_neighbor(tree,['A'],\
              trait_label = "Reconstruction",\
              use_self_in_prediction=True)
            #print results
            if results['A'] == [1.0,1.0]:
                #print "A pred"
                a_predictions += 1
            elif results['A'] == [0.0,0.0]:
                #print "D pred"
                d_predictions +=1
            else:
                raise RuntimeError(\
                  "Bad prediction result: Neither node A nor node D traits used in prediction")
        #print "All a predictions:",a_predictions
        #print "All d predictions:",d_predictions
        ratio = float(a_predictions)/float(iterations)
        #print "Ratio:", ratio
        self.assertFloatEqual(ratio,0.5,eps=1e-2)

コード例 #17

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: gregcaporaso/picrust

    def test_predict_traits_from_ancestors(self):
        """predict_traits_from_ancestors should propagate ancestral states"""

        # When the node is very close to I3, prediction should be approx. I3

        traits = self.PartialReconstructionTraits
        tree = assign_traits_to_tree(traits, self.CloseToI3Tree)

        nodes_to_predict = ["A"]
        prediction = predict_traits_from_ancestors(tree=tree, nodes_to_predict=nodes_to_predict)

        exp = traits["I3"]
        # print "PREDICTION:",prediction
        for node in nodes_to_predict:
            self.assertFloatEqual(around(prediction[node]), exp)

コード例 #18

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: yucy207/picrust

    def test_predict_traits_from_ancestors(self):
        """predict_traits_from_ancestors should propagate ancestral states"""
        # Testing the point predictions first (since these are easiest) 
        # When the node is very close to I3, prediction should be approx. I3

        traits = self.PartialReconstructionTraits
        tree = assign_traits_to_tree(traits,self.CloseToI3Tree)
        
        nodes_to_predict = ['A'] 
        prediction = predict_traits_from_ancestors(tree=tree,\
          nodes_to_predict=nodes_to_predict) 
        
        exp = traits["I3"]
        #print "PREDICTION:",prediction 
        for node in nodes_to_predict:
            self.assertFloatEqual(around(prediction[node]),exp)

コード例 #19

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: adamrp/picrust

    def test_get_nn_by_tree_descent(self):
        """calc_nearest_sequenced_taxon_index calculates the NSTI measure"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction")
        #Expected distances:
        # A --> A 0.0
        # B --> A 0.03
        # C --> D 0.02
        # D --> D 0.0
        # = 0.05/4.0 = 0.0125
        exp = 0.0125
        #Test with default options
        nn,distance = get_nn_by_tree_descent(tree,"B",verbose=True)
        self.assertEqual(nn.Name,"A")
        self.assertFloatEqual(distance,0.03)
        
        nn,distance = get_nn_by_tree_descent(tree,"A",verbose=True)
        self.assertEqual(nn.Name,"A")
        self.assertFloatEqual(distance,0.00)
        
        nn,distance = get_nn_by_tree_descent(tree,"A",filter_by_property=False,verbose=True)
        self.assertEqual(nn.Name,"B")
        self.assertFloatEqual(distance,0.03)
        
        nn,distance = get_nn_by_tree_descent(tree,"C",verbose=True)
        self.assertEqual(nn.Name,"D")
        self.assertFloatEqual(distance,0.02)
        #self.assertFloatEqual(obs_distances["A"],0.0)
        #self.assertFloatEqual(obs_distances["B"],0.03)
        #self.assertFloatEqual(obs_distances["C"],0.02)
        #self.assertFloatEqual(obs_distances["D"],0.00)

        #Test calcing the index while 
        #limiting prediction to B and C
        
        # B --> A 0.03
        # C --> D 0.02
        
        exp = 0.025
        obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,\
          limit_to_tips = ["B","C"],verbose=False)
        self.assertFloatEqual(obs_nsti,exp)
        self.assertFloatEqual(obs_distances["B"],0.03)
        self.assertFloatEqual(obs_distances["C"],0.02)

コード例 #20

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: adamrp/picrust

    def test_nearest_neighbor_prediction(self):
        """nearest_neighbor_prediction predicts nearest neighbor's traits"""
        traits = self.SimpleTreeTraits
        tree = self.SimpleTree
        result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction")
        
        #Test with default options
        results = predict_nearest_neighbor(tree, nodes_to_predict =["B","C"])
        self.assertEqual(results["B"],array([1.0,1.0]))
        self.assertEqual(results["C"],array([0.0,0.0]))
        
        #Test allowing ancestral NNs
        results = predict_nearest_neighbor(tree, nodes_to_predict =["B","C"],\
         tips_only = False)
        self.assertEqual(results["C"],array([0.0,1.0]))

        #Test allowing self to be NN AND Ancestral NNs
        results = predict_nearest_neighbor(tree, nodes_to_predict =["A","B","C","D"],\
         tips_only = False,use_self_in_prediction=True)

        self.assertEqual(results["A"],array([1.0,1.0]))
        self.assertEqual(results["B"],array([1.0,1.0]))
        self.assertEqual(results["C"],array([0.0,1.0]))
        self.assertEqual(results["D"],array([0.0,0.0]))

コード例 #21

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: adamrp/picrust

    def setUp(self):
        self.SimpleTree = \
          DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;")
        
        
        #Set up a tree with obvious differences in the rate of gene content
        #evolution to test confidence interval estimation
        #Features:  
        # --trait 1 is has ~ 10 fold higher confidence intervals than trait 0. 
        # Trait 2 is 10 fold higher than trait 1
        
        # -- of predicted nodes B and D, D has a ~10 fold longer branch

        self.SimpleUnequalVarianceTree =\
          DndParser("((A:0.01,B:0.01)E:0.05,(C:0.01,D:0.10)F:0.05)root;")
        traits = {"A":[1.0,1.0,1.0],"C":[1.0,1.0,1.0],"E":[1.0,1.0,1.0],"F":[1.0,1.0,1.0]}
        self.SimpleUnequalVarianceTree = assign_traits_to_tree(traits,\
          self.SimpleUnequalVarianceTree,trait_label="Reconstruction")
        self.SimpleUnequalVarianceTree.getNodeMatchingName('E').upper_bound = [2.0,20.0,200.0]
        self.SimpleUnequalVarianceTree.getNodeMatchingName('E').lower_bound = [-1.0,-19.0,-199.0]
        self.SimpleUnequalVarianceTree.getNodeMatchingName('F').upper_bound = [2.0,20.0,200.0]
        self.SimpleUnequalVarianceTree.getNodeMatchingName('F').lower_bound = [-1.0,-19.0,-199.0]
        
        #Set up a tree with a three-way polytomy
        self.SimplePolytomyTree = \
          DndParser("((A:0.02,B:0.01,B_prime:0.03)E:0.05,(C:0.01,D:0.01)F:0.05)root;")
    
        self.SimpleTreeTraits =\
            {"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]}
        
        self.PartialReconstructionTree =\
                DndParser("((((B:0.01,C:0.01)I3:0.01,A:0.01)I2:0.01,D:0.01)I1:0.01)root;")

        self.CloseToI3Tree =\
                DndParser("((((B:0.01,C:0.95)I3:0.01,A:0.01)I2:0.95,D:0.05)I1:0.95)root;")
        
        self.CloseToI1Tree =\
                DndParser("((((B:0.95,C:0.95)I3:0.95,A:0.01)I2:0.02,D:0.05)I1:0.05)root;")

        self.BetweenI3AndI1Tree=\
                DndParser("((((B:0.01,C:0.1)I3:0.02,A:0.01)I2:0.02,D:0.05)I1:0.02)root;")


        self.PartialReconstructionTraits =\
                {"B":[1.0,1.0],"C":[1.0,1.0],"I3":[1.0,1.0],"I1":[0.0,1.0],"D":[0.0,1.0]}

        self.GeneCountTraits =\
                {"B":[1.0,1.0],"C":[1.0,2.0],"I3":[1.0,1.0],"I1":[0.0,3.0],"D":[0.0,5.0]}

        #create a tmp trait file
        self.in_trait1_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv')
        self.in_trait1_file=open(self.in_trait1_fp,'w')
        self.in_trait1_file.write(in_trait1)
        self.in_trait1_file.close()

        #create another tmp trait file (with columns in different order)
        self.in_trait2_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv')
        self.in_trait2_file=open(self.in_trait2_fp,'w')
        self.in_trait2_file.write(in_trait2)
        self.in_trait2_file.close()


        #create a tmp trait file with a incorrect trait name
        self.in_bad_trait_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv')
        self.in_bad_trait_file=open(self.in_bad_trait_fp,'w')
        self.in_bad_trait_file.write(in_bad_trait)
        self.in_bad_trait_file.close()

        self.files_to_remove = [self.in_trait1_fp,self.in_trait2_fp,self.in_bad_trait_fp]

コード例 #22

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: adamrp/picrust

    def test_weighted_average_tip_prediction(self):
        """Weighted average node prediction should predict node values"""
        
        
        # When the node is very close to I3, prediction should be approx. I3

        traits = self.PartialReconstructionTraits
        tree = assign_traits_to_tree(traits,self.CloseToI3Tree)
        
        node_to_predict = "A"
        node = tree.getNodeMatchingName(node_to_predict)
        most_recent_reconstructed_ancestor =\
          get_most_recent_reconstructed_ancestor(node)
        
        prediction = weighted_average_tip_prediction(tree=tree,\
          node=node,\
          most_recent_reconstructed_ancestor=\
          most_recent_reconstructed_ancestor)
            
        
        exp = traits["I3"]
        
        self.assertFloatEqual(around(prediction),exp)


        # When the node is very close to I1, prediction should be approx. I1


        traits = self.PartialReconstructionTraits
        tree = assign_traits_to_tree(traits,self.CloseToI1Tree)
        node_to_predict = "A"
        #print "tree:",tree.asciiArt()
        node = tree.getNodeMatchingName(node_to_predict)
        most_recent_reconstructed_ancestor =\
          get_most_recent_reconstructed_ancestor(node)
        prediction = weighted_average_tip_prediction(tree=tree,\
          node=node,\
          most_recent_reconstructed_ancestor=\
          most_recent_reconstructed_ancestor)
        exp = traits["I1"]
        #print "prediction:",prediction
        #print "exp:",exp
        a_node = tree.getNodeMatchingName('A')
        #for node in tree.preorder():
        #    print node.Name,node.distance(a_node),node.Reconstruction
        self.assertFloatEqual(around(prediction),exp)

        # Try out the B case with exponential weighting
        
        traits = self.PartialReconstructionTraits
        tree = assign_traits_to_tree(traits,self.CloseToI3Tree)
        weight_fn = make_neg_exponential_weight_fn(exp_base=e)
        
        
        node_to_predict = "A"
        node = tree.getNodeMatchingName(node_to_predict)
        most_recent_reconstructed_ancestor =\
          get_most_recent_reconstructed_ancestor(node)
        prediction = weighted_average_tip_prediction(tree=tree,\
          node=node,\
          most_recent_reconstructed_ancestor=\
          most_recent_reconstructed_ancestor)

        #prediction = weighted_average_tip_prediction(tree=tree,\
        #  node_to_predict=node_to_predict,weight_fn=weight_fn) 
        exp = traits["B"]
        self.assertFloatEqual(around(prediction),exp)

        # Try out the I1 case with exponential weighting
        
        traits = self.PartialReconstructionTraits
        tree = assign_traits_to_tree(traits,self.CloseToI1Tree)
        weight_fn = make_neg_exponential_weight_fn(exp_base=e)
        #weight_fn = linear_weight
        
        node_to_predict = "A"
        node = tree.getNodeMatchingName(node_to_predict)
        most_recent_reconstructed_ancestor =\
          get_most_recent_reconstructed_ancestor(node)
        prediction = weighted_average_tip_prediction(tree=tree,\
          node=node,\
          most_recent_reconstructed_ancestor=\
          most_recent_reconstructed_ancestor)

        exp = traits["I1"]
        self.assertFloatEqual(around(prediction),exp)

        # Try out the balanced case where children and ancestors 
        # should be weighted a equally with exponential weighting
        
        # We'll  try this with full gene count data to ensure 
        # that case is tested

        traits = self.GeneCountTraits
        tree = assign_traits_to_tree(traits,self.BetweenI3AndI1Tree)
        weight_fn = make_neg_exponential_weight_fn(exp_base=e)
        
        node_to_predict = "A"
        
        node = tree.getNodeMatchingName(node_to_predict)
        most_recent_reconstructed_ancestor =\
          get_most_recent_reconstructed_ancestor(node)
        prediction = weighted_average_tip_prediction(tree=tree,\
          node=node,\
          most_recent_reconstructed_ancestor=\
          most_recent_reconstructed_ancestor)


        
        
        
        #prediction = weighted_average_tip_prediction(tree=tree,\
        #  node_to_predict=node_to_predict,weight_fn=weight_fn) 
        
        exp = (array(traits["I1"]) + array(traits["I3"]))/2.0
        self.assertFloatEqual(prediction,exp)

コード例 #23

0

ファイルを表示

ファイル: predict_traits.py プロジェクト: picrust/picrust

def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    #if we specify we want NSTI only then we have to calculate it first
    if opts.output_accuracy_metrics_only:
        opts.calculate_accuracy_metrics=True

    if opts.verbose:
        print "Loading tree from file:", opts.tree

    if opts.no_round:
        round_opt = False 
    else:
        round_opt = True

    # Load Tree
    tree = load_picrust_tree(opts.tree, opts.verbose)

    table_headers=[]
    traits={}
    #load the asr trait table using the previous list of functions to order the arrays
    if opts.reconstructed_trait_table:
        table_headers,traits =\
                update_trait_dict_from_file(opts.reconstructed_trait_table)

        #Only load confidence intervals on the reconstruction
        #If we actually have ASR values in the analysis
        if opts.reconstruction_confidence:
            if opts.verbose:
                print "Loading ASR confidence data from file:",\
                opts.reconstruction_confidence
                print "Assuming confidence data is of type:",opts.confidence_format

            asr_confidence_output = open(opts.reconstruction_confidence)
            asr_min_vals,asr_max_vals, params,column_mapping =\
              parse_asr_confidence_output(asr_confidence_output,format=opts.confidence_format)
            if 'sigma' in params:
                brownian_motion_parameter = params['sigma'][0]
            else:
                brownian_motion_parameter = None

            if opts.verbose:
                print "Done. Loaded %i confidence interval values." %(len(asr_max_vals))
                print "Brownian motion parameter:",brownian_motion_parameter
        else:
            brownian_motion_parameter = None

    #load the trait table into a dict with organism names as keys and arrays as functions
    table_headers,genome_traits =\
            update_trait_dict_from_file(opts.observed_trait_table,table_headers)


    #Combine the trait tables overwriting the asr ones if they exist in the genome trait table.
    traits.update(genome_traits)

    # Specify the attribute where we'll store the reconstructions
    trait_label = "Reconstruction"

    if opts.verbose:
        print "Assigning traits to tree..."

    # Decorate tree using the traits
    tree = assign_traits_to_tree(traits,tree, trait_label=trait_label)


    if opts.reconstruction_confidence:
        if opts.verbose:
            print "Assigning trait confidence intervals to tree..."
        tree = assign_traits_to_tree(asr_min_vals,tree,\
            trait_label="lower_bound")

        tree = assign_traits_to_tree(asr_max_vals,tree,\
            trait_label="upper_bound")

        if brownian_motion_parameter is None:

             if opts.verbose:
                 print "No Brownian motion parameters loaded. Inferring these from 95% confidence intervals..."
             brownian_motion_parameter = get_brownian_motion_param_from_confidence_intervals(tree,\
                      upper_bound_trait_label="upper_bound",\
                      lower_bound_trait_label="lower_bound",\
                      trait_label=trait_label,\
                      confidence=0.95)
             if opts.verbose:
                 print "Inferred the following rate parameters:",brownian_motion_parameter
    if opts.verbose:
        print "Collecting list of nodes to predict..."

    #Start by predict all tip nodes.
    nodes_to_predict = [tip.Name for tip in tree.tips()]

    if opts.verbose:
        print "Found %i nodes to predict." % len(nodes_to_predict)

    if opts.limit_predictions_to_organisms:
        organism_id_str = opts.limit_predictions_to_organisms
        ok_organism_ids = organism_id_str.split(',')
        ok_organism_ids = [n.strip() for n in ok_organism_ids]
        for f in set_label_conversion_fns(True,True):
            ok_organism_ids = [f(i) for i in ok_organism_ids]

        if opts.verbose:
            print "Limiting predictions to user-specified ids:",\
              ",".join(ok_organism_ids)


        if not ok_organism_ids:
            raise RuntimeError(\
              "Found no valid ids in input: %s. Were comma-separated ids specified on the command line?"\
              % opts.limit_predictions_to_organisms)

        nodes_to_predict =\
          [n for n in nodes_to_predict if n in ok_organism_ids]

        if not nodes_to_predict:
            raise RuntimeError(\
              "Filtering by user-specified ids resulted in an empty set of nodes to predict.   Are the ids on the commmand-line and tree ids in the same format?  Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],ok_organism_ids[0]))

        if opts.verbose:
            print "After filtering organisms to predict by the ids specified on the commandline, %i nodes remain to be predicted" %(len(nodes_to_predict))

    if opts.limit_predictions_by_otu_table:
        if opts.verbose:
            print "Limiting predictions to ids in user-specified OTU table:",\
              opts.limit_predictions_by_otu_table
        otu_table = open(opts.limit_predictions_by_otu_table,"U")
        #Parse OTU table for ids

        otu_ids =\
          extract_ids_from_table(otu_table.readlines(),delimiter="\t")

        if not otu_ids:
            raise RuntimeError(\
              "Found no valid ids in input OTU table: %s.  Is the path correct?"\
              % opts.limit_predictions_by_otu_table)

        nodes_to_predict =\
          [n for n in nodes_to_predict if n in otu_ids]

        if not nodes_to_predict:
            raise RuntimeError(\
              "Filtering by OTU table resulted in an empty set of nodes to predict.   Are the OTU ids and tree ids in the same format?  Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],otu_ids[0]))

        if opts.verbose:
            print "After filtering by OTU table, %i nodes remain to be predicted" %(len(nodes_to_predict))

    # Calculate accuracy of PICRUST for the given tree, sequenced genomes
    # and set of ndoes to predict
    accuracy_metrics = ['NSTI']
    accuracy_metric_results = None
    if opts.calculate_accuracy_metrics:
        if opts.verbose:
            print "Calculating accuracy metrics: %s" %([",".join(accuracy_metrics)])
        accuracy_metric_results = {}
        if 'NSTI' in accuracy_metrics:

            nsti_result,min_distances =\
                calc_nearest_sequenced_taxon_index(tree,\
                limit_to_tips = nodes_to_predict,\
                trait_label = trait_label, verbose=opts.verbose)

            #accuracy_metric_results['NSTI'] = nsti_result
            for organism in min_distances.keys():
                accuracy_metric_results[organism] = {'NSTI': min_distances[organism]}

            if opts.verbose:
                print "NSTI:", nsti_result

        if opts.output_accuracy_metrics_only:
            #Write accuracy metrics to file
            if opts.verbose:
                print "Writing accuracy metrics to file:",opts.output_accuracy_metrics

            f = open(opts.output_accuracy_metrics_only,'w+')
            f.write("metric\torganism\tvalue\n")
            lines =[]
            for organism in accuracy_metric_results.keys():
                for metric in accuracy_metric_results[organism].keys():
                    lines.append('\t'.join([metric,organism,\
                      str(accuracy_metric_results[organism][metric])])+'\n')
            f.writelines(sorted(lines))
            f.close()
            exit()


    if opts.verbose:
        print "Generating predictions using method:",opts.prediction_method

    if opts.weighting_method == 'exponential':
        #For now, use exponential weighting
        weight_fn = make_neg_exponential_weight_fn(e)

    variances=None #Overwritten by methods that calc variance
    confidence_intervals=None #Overwritten by methods that calc variance

    if opts.prediction_method == 'asr_and_weighting':
        # Perform predictions using reconstructed ancestral states

        if opts.reconstruction_confidence:
            predictions,variances,confidence_intervals =\
              predict_traits_from_ancestors(tree,nodes_to_predict,\
              trait_label=trait_label,\
              lower_bound_trait_label="lower_bound",\
              upper_bound_trait_label="upper_bound",\
              calc_confidence_intervals = True,\
              brownian_motion_parameter=brownian_motion_parameter,\
              weight_fn=weight_fn,verbose=opts.verbose,
              round_predictions=round_opt)

        else:
             predictions =\
              predict_traits_from_ancestors(tree,nodes_to_predict,\
              trait_label=trait_label,\
              weight_fn =weight_fn,verbose=opts.verbose,
              round_predictions=round_opt)

    elif opts.prediction_method == 'weighting_only':
        #Ignore ancestral information
        predictions =\
          weighted_average_tip_prediction(tree,nodes_to_predict,\
          trait_label=trait_label,\
          weight_fn =weight_fn,verbose=opts.verbose)



    elif opts.prediction_method == 'nearest_neighbor':

        predictions = predict_nearest_neighbor(tree,nodes_to_predict,\
          trait_label=trait_label,tips_only = True)

    elif opts.prediction_method == 'random_neighbor':

        predictions = predict_random_neighbor(tree,\
          nodes_to_predict,trait_label=trait_label)

    if opts.verbose:
        print "Done making predictions."

    make_output_dir_for_file(opts.output_trait_table)

    out_fh=open(opts.output_trait_table,'w')
    #Generate the table of biom predictions
    if opts.verbose:
        print "Converting results to .biom format for output..."

    biom_predictions=biom_table_from_predictions(predictions,table_headers,\
                                                         observation_metadata=None,\
                                                         sample_metadata=accuracy_metric_results,convert_to_int=False)
    if opts.verbose:
        print "Writing prediction results to file: ",opts.output_trait_table

    if opts.output_precalc_file_in_biom:

        #write biom table to file
        write_biom_table(biom_predictions, opts.output_trait_table)

    else:
        #convert to precalc (tab-delimited) format

        out_fh = open(opts.output_trait_table, 'w')
        out_fh.write(convert_biom_to_precalc(biom_predictions))
        out_fh.close()

    #Write out variance information to file
    if variances:

        if opts.verbose:
            print "Converting variances to BIOM format"

        if opts.output_precalc_file_in_biom:
            suffix='.biom'
        else:
            suffix='.tab'

        biom_prediction_variances=biom_table_from_predictions({k:v['variance'] for k,v in variances.iteritems()},table_headers,\
        observation_metadata=None,\
        sample_metadata=None,convert_to_int=False)
        outfile_base,extension = splitext(opts.output_trait_table)
        variance_outfile = outfile_base+"_variances"+suffix
        make_output_dir_for_file(variance_outfile)

        if opts.verbose:
            print "Writing variance information to file:",variance_outfile

        if opts.output_precalc_file_in_biom:
            write_biom_table(biom_prediction_variances, variance_outfile)
        else:
            open(variance_outfile,'w').write(\
                convert_biom_to_precalc(biom_prediction_variances))


    if confidence_intervals:

        if opts.verbose:
            print "Converting upper confidence interval values to BIOM format"

        biom_prediction_upper_CI=biom_table_from_predictions({k:v['upper_CI'] for k,v in confidence_intervals.iteritems()},table_headers,\
          observation_metadata=None,\
          sample_metadata=None,convert_to_int=False)

        outfile_base,extension = splitext(opts.output_trait_table)
        upper_CI_outfile = outfile_base+"_upper_CI"+suffix
        make_output_dir_for_file(upper_CI_outfile)

        if opts.verbose:
            print "Writing upper confidence limit information to file:",upper_CI_outfile

        if opts.output_precalc_file_in_biom:
            write_biom_table(biom_prediction_upper_CI, upper_CI_outfile)
        else:
            open(upper_CI_outfile,'w').write(\
                convert_biom_to_precalc(biom_prediction_upper_CI))

        biom_prediction_lower_CI=biom_table_from_predictions({k:v['lower_CI'] for k,v in confidence_intervals.iteritems()},table_headers,\
          observation_metadata=None,\
          sample_metadata=None,convert_to_int=False)

        outfile_base,extension = splitext(opts.output_trait_table)
        lower_CI_outfile = outfile_base+"_lower_CI"+suffix
        make_output_dir_for_file(lower_CI_outfile)

        if opts.verbose:
            print "Writing lower confidence limit information to file",lower_CI_outfile

        if opts.output_precalc_file_in_biom:
            write_biom_table(biom_prediction_lower_CI, lower_CI_outfile)
        else:
            open(lower_CI_outfile,'w').write(\
                convert_biom_to_precalc(biom_prediction_lower_CI))

コード例 #24

0

ファイルを表示

ファイル: predict_traits.py プロジェクト: cleme/picrust

def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    if opts.verbose:
        print "Loading tree from file:", opts.tree
    
    # Load Tree
    #tree = LoadTree(opts.tree)
    tree = load_picrust_tree(opts.tree, opts.verbose)

    table_headers =[]
    traits={}
    #load the asr trait table using the previous list of functions to order the arrays
    if opts.reconstructed_trait_table:
        table_headers,traits =\
                update_trait_dict_from_file(opts.reconstructed_trait_table)

        #Only load confidence intervals on the reconstruction
        #If we actually have ASR values in the analysis
        if opts.reconstruction_confidence:
            if opts.verbose:
                print "Loading ASR confidence data from file:",\
                opts.reconstruction_confidence
            
            asr_confidence_output = open(opts.reconstruction_confidence)
            asr_min_vals,asr_max_vals, params,column_mapping =\
              parse_asr_confidence_output(asr_confidence_output)
            brownian_motion_parameter = params['sigma'][0]
            brownian_motion_error = params['sigma'][1]
            if opts.verbose:
                print "Done. Loaded %i confidence interval values." %(len(asr_max_vals))
                print "Brownian motion parameter:",brownian_motion_parameter
        else:
            brownian_motion_parameter = None

    #load the trait table into a dict with organism names as keys and arrays as functions
    table_headers,genome_traits =\
            update_trait_dict_from_file(opts.observed_trait_table,table_headers)


    #Combine the trait tables overwriting the asr ones if they exist in the genome trait table.
    traits.update(genome_traits)
        
    # Specify the attribute where we'll store the reconstructions
    trait_label = "Reconstruction"
   
    if opts.verbose:
        print "Assigning traits to tree..."

    # Decorate tree using the traits
    tree = assign_traits_to_tree(traits,tree, trait_label=trait_label)

    
    if opts.reconstruction_confidence: 
        if opts.verbose:
            print "Assigning trait confidence intervals to tree..."
        tree = assign_traits_to_tree(asr_min_vals,tree,\
            trait_label="lower_bound")

        tree = assign_traits_to_tree(asr_max_vals,tree,\
            trait_label="upper_bound")


    if opts.verbose:
        print "Collecting list of nodes to predict..."

    #Start by predict all tip nodes.
    nodes_to_predict = [tip.Name for tip in tree.tips()]
    
    if opts.verbose:
        print "Found %i nodes to predict." % len(nodes_to_predict)

    if opts.limit_predictions_to_organisms:
        organism_id_str = opts.limit_predictions_to_organisms
        ok_organism_ids = organism_id_str.split(',')
        ok_organism_ids = [n.strip() for n in ok_organism_ids]
        for f in set_label_conversion_fns(True,True):
            ok_organism_ids = [f(i) for i in ok_organism_ids]
        
        if opts.verbose:
            print "Limiting predictions to user-specified ids:",\
              ",".join(ok_organism_ids)
        
        
        if not ok_organism_ids:
            raise RuntimeError(\
              "Found no valid ids in input: %s. Were comma-separated ids specified on the command line?"\
              % opts.limit_predictions_to_organisms)

        nodes_to_predict =\
          [n for n in nodes_to_predict if n in ok_organism_ids]

        if not nodes_to_predict:
            raise RuntimeError(\
              "Filtering by user-specified ids resulted in an empty set of nodes to predict.   Are the ids on the commmand-line and tree ids in the same format?  Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],ok_organism_ids[0]))
        
        if opts.verbose:
            print "After filtering organisms to predict by the ids specified on the commandline, %i nodes remain to be predicted" %(len(nodes_to_predict))
    
    if opts.limit_predictions_by_otu_table:
        if opts.verbose:
            print "Limiting predictions to ids in user-specified OTU table:",\
              opts.limit_predictions_by_otu_table
        otu_table = open(opts.limit_predictions_by_otu_table,"U")
        #Parse OTU table for ids
        
        otu_ids =\
          extract_ids_from_table(otu_table.readlines(),delimiter="\t")
        
        if not otu_ids:
            raise RuntimeError(\
              "Found no valid ids in input OTU table: %s.  Is the path correct?"\
              % opts.limit_predictions_by_otu_table)

        nodes_to_predict =\
          [n for n in nodes_to_predict if n in otu_ids]

        if not nodes_to_predict:
            raise RuntimeError(\
              "Filtering by OTU table resulted in an empty set of nodes to predict.   Are the OTU ids and tree ids in the same format?  Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],otu_ids[0]))
        
        if opts.verbose:
            print "After filtering by OTU table, %i nodes remain to be predicted" %(len(nodes_to_predict))

    # Calculate accuracy of PICRUST for the given tree, sequenced genomes
    # and set of ndoes to predict
    accuracy_metrics = ['NSTI']
    accuracy_metric_results = None
    if opts.output_accuracy_metrics:
        if opts.verbose:
            print "Calculating accuracy metrics: %s" %([",".join(accuracy_metrics)])
        accuracy_metric_results = {}
        if 'NSTI' in accuracy_metrics:

            nsti_result,min_distances =\
                calc_nearest_sequenced_taxon_index(tree,\
                limit_to_tips = nodes_to_predict,\
                trait_label = trait_label, verbose=opts.verbose)
            
            #accuracy_metric_results['NSTI'] = nsti_result
            for organism in min_distances.keys():
                accuracy_metric_results[organism] = {'NSTI': min_distances[organism]}
        
            if opts.verbose:
                print "NSTI:", nsti_result
   
        #Write accuracy metrics to file
        if opts.verbose:
            print "Writing accuracy metrics to file:",opts.output_accuracy_metrics
   
        f = open(opts.output_accuracy_metrics,'w+')
        lines = ["metric\torganism\tvalue\n"]
        for organism in accuracy_metric_results.keys():
            for metric in accuracy_metric_results[organism].keys():
                lines.append('\t'.join([metric,organism,\
                  str(accuracy_metric_results[organism][metric])])+'\n')
        f.writelines(sorted(lines))
        f.close()


    if opts.verbose:
        print "Generating predictions using method:",opts.prediction_method

    if opts.weighting_method == 'exponential':
        #For now, use exponential weighting
        weight_fn = make_neg_exponential_weight_fn(e)
    elif opts.weighting_method == 'linear':
        #Linear weight function
        weight_fn = linear_weight
    elif opts.weighting_method == 'equal_weight':
        weight_fn = equal_weight

    variances=None #Overwritten by methods that calc variance

    if opts.prediction_method == 'asr_and_weighting': 
  
        if opts.reconstruction_confidence:
        # Perform predictions using reconstructed ancestral states
            predictions,variances =\
              predict_traits_from_ancestors(tree,nodes_to_predict,\
              trait_label=trait_label,\
              lower_bound_trait_label="lower_bound",\
              upper_bound_trait_label="upper_bound",\
              calc_confidence_intervals = True,\
              brownian_motion_parameter=brownian_motion_parameter,\
              use_self_in_prediction = True,\
              weight_fn =weight_fn,verbose=opts.verbose)
    
        else:
             predictions =\
              predict_traits_from_ancestors(tree,nodes_to_predict,\
              trait_label=trait_label,\
              use_self_in_prediction = True,\
              weight_fn =weight_fn,verbose=opts.verbose)
    
    elif opts.prediction_method == 'weighting_only':
        #Ignore ancestral information
        predictions =\
          weighted_average_tip_prediction(tree,nodes_to_predict,\
          trait_label=trait_label,\
          use_self_in_prediction = True,\
          weight_fn =weight_fn,verbose=opts.verbose)
        


    elif opts.prediction_method == 'nearest_neighbor':
        
        predictions = predict_nearest_neighbor(tree,nodes_to_predict,\
          trait_label=trait_label,\
          use_self_in_prediction = True, tips_only = True)

    elif opts.prediction_method == 'random_neighbor':
        
        predictions = predict_random_neighbor(tree,\
          nodes_to_predict,trait_label=trait_label,\
          use_self_in_prediction = True)
    else:
        error_template =\
          "Prediction method '%s' is not supported.  Valid methods are: %s'"
        
        error_text = error_template %(opts.prediction_method,\
          ", ".join(METHOD_CHOICES))

    if opts.verbose:
        print "Converting results to .biom format for output..."
    #convert to biom format (and transpose)
    biom_predictions=biom_table_from_predictions(predictions,table_headers)
    #In the .biom table, organisms are 'samples' and traits are 'observations 
    #(by analogy with a metagenomic sample)
    
    #Therefore, we associate the trait variances with the per-observation metadata
    
    #print "variances:",variances
    #print "BIOM observations:", [o for o in biom_predictions.iterObservations()] 
    #print "BIOM samples:", [s for s in biom_predictions.iterSamples()] 
    
    if variances is not None:
        if opts.verbose:
            print "Adding variance information to output .biom table, as per-observation metadata with key 'variance'..."
        biom_predictions.addSampleMetadata(variances)
    
    if accuracy_metric_results is not None:
        if opts.verbose:
            print "Adding accuracy metrics (%s) to biom table as per-observation metadata..." %(",".join(accuracy_metrics))
        biom_predictions.addSampleMetadata(accuracy_metric_results)
        
    #Add variance information as per observation metadata
    
    if opts.verbose:
        print "Writing biom format prediction results to file: ",opts.output_trait_table
    #write biom table to file
    make_output_dir_for_file(opts.output_trait_table)
    open(opts.output_trait_table,'w').write(\
     format_biom_table(biom_predictions))

コード例 #25

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: yucy207/picrust

    def test_weighted_average_tip_prediction(self):
        """Weighted average node prediction should predict node values"""
        
        
        # When the node is very close to I3, prediction should be approx. I3

        traits = self.PartialReconstructionTraits
        tree = assign_traits_to_tree(traits,self.CloseToI3Tree)
        
        node_to_predict = "A"
        node = tree.getNodeMatchingName(node_to_predict)
        most_recent_reconstructed_ancestor =\
          get_most_recent_reconstructed_ancestor(node)
        
        prediction = weighted_average_tip_prediction(tree=tree,\
          node=node,\
          most_recent_reconstructed_ancestor=\
          most_recent_reconstructed_ancestor)
            
        
        exp = traits["I3"]
        
        self.assertFloatEqual(around(prediction),exp)


        # When the node is very close to I1, prediction should be approx. I1


        traits = self.PartialReconstructionTraits
        tree = assign_traits_to_tree(traits,self.CloseToI1Tree)
        node_to_predict = "A"
        #print "tree:",tree.asciiArt()
        node = tree.getNodeMatchingName(node_to_predict)
        most_recent_reconstructed_ancestor =\
          get_most_recent_reconstructed_ancestor(node)
        prediction = weighted_average_tip_prediction(tree=tree,\
          node=node,\
          most_recent_reconstructed_ancestor=\
          most_recent_reconstructed_ancestor)
        exp = traits["I1"]
        #print "prediction:",prediction
        #print "exp:",exp
        a_node = tree.getNodeMatchingName('A')
        #for node in tree.preorder():
        #    print node.Name,node.distance(a_node),node.Reconstruction
        self.assertFloatEqual(around(prediction),exp)

        # Try out the B case with exponential weighting
        
        traits = self.PartialReconstructionTraits
        tree = assign_traits_to_tree(traits,self.CloseToI3Tree)
        weight_fn = make_neg_exponential_weight_fn(exp_base=e)
        
        
        node_to_predict = "A"
        node = tree.getNodeMatchingName(node_to_predict)
        most_recent_reconstructed_ancestor =\
          get_most_recent_reconstructed_ancestor(node)
        prediction = weighted_average_tip_prediction(tree=tree,\
          node=node,\
          most_recent_reconstructed_ancestor=\
          most_recent_reconstructed_ancestor)

        #prediction = weighted_average_tip_prediction(tree=tree,\
        #  node_to_predict=node_to_predict,weight_fn=weight_fn) 
        exp = traits["B"]
        self.assertFloatEqual(around(prediction),exp)

        # Try out the I1 case with exponential weighting
        
        traits = self.PartialReconstructionTraits
        tree = assign_traits_to_tree(traits,self.CloseToI1Tree)
        weight_fn = make_neg_exponential_weight_fn(exp_base=e)
        #weight_fn = linear_weight
        
        node_to_predict = "A"
        node = tree.getNodeMatchingName(node_to_predict)
        most_recent_reconstructed_ancestor =\
          get_most_recent_reconstructed_ancestor(node)
        prediction = weighted_average_tip_prediction(tree=tree,\
          node=node,\
          most_recent_reconstructed_ancestor=\
          most_recent_reconstructed_ancestor)

        exp = traits["I1"]
        self.assertFloatEqual(around(prediction),exp)

        # Try out the balanced case where children and ancestors 
        # should be weighted a equally with exponential weighting
        
        # We'll  try this with full gene count data to ensure 
        # that case is tested

        traits = self.GeneCountTraits
        tree = assign_traits_to_tree(traits,self.BetweenI3AndI1Tree)
        weight_fn = make_neg_exponential_weight_fn(exp_base=e)
        
        node_to_predict = "A"
        
        node = tree.getNodeMatchingName(node_to_predict)
        most_recent_reconstructed_ancestor =\
          get_most_recent_reconstructed_ancestor(node)
        prediction = weighted_average_tip_prediction(tree=tree,\
          node=node,\
          most_recent_reconstructed_ancestor=\
          most_recent_reconstructed_ancestor)


        
        
        
        #prediction = weighted_average_tip_prediction(tree=tree,\
        #  node_to_predict=node_to_predict,weight_fn=weight_fn) 
        
        exp = (array(traits["I1"]) + array(traits["I3"]))/2.0
        self.assertFloatEqual(prediction,exp)

コード例 #26

0

ファイルを表示

def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    #if we specify we want NSTI only then we have to calculate it first
    if opts.output_accuracy_metrics_only:
        opts.calculate_accuracy_metrics = True

    if opts.verbose:
        print "Loading tree from file:", opts.tree

    # Load Tree
    #tree = LoadTree(opts.tree)
    tree = load_picrust_tree(opts.tree, opts.verbose)

    table_headers = []
    traits = {}
    #load the asr trait table using the previous list of functions to order the arrays
    if opts.reconstructed_trait_table:
        table_headers,traits =\
                update_trait_dict_from_file(opts.reconstructed_trait_table)

        #Only load confidence intervals on the reconstruction
        #If we actually have ASR values in the analysis
        if opts.reconstruction_confidence:
            if opts.verbose:
                print "Loading ASR confidence data from file:",\
                opts.reconstruction_confidence
                print "Assuming confidence data is of type:", opts.confidence_format

            asr_confidence_output = open(opts.reconstruction_confidence)
            asr_min_vals,asr_max_vals, params,column_mapping =\
              parse_asr_confidence_output(asr_confidence_output,format=opts.confidence_format)
            if 'sigma' in params:
                brownian_motion_parameter = params['sigma'][0]
            else:
                brownian_motion_parameter = None

            if opts.verbose:
                print "Done. Loaded %i confidence interval values." % (
                    len(asr_max_vals))
                print "Brownian motion parameter:", brownian_motion_parameter
        else:
            brownian_motion_parameter = None

    #load the trait table into a dict with organism names as keys and arrays as functions
    table_headers,genome_traits =\
            update_trait_dict_from_file(opts.observed_trait_table,table_headers)

    #Combine the trait tables overwriting the asr ones if they exist in the genome trait table.
    traits.update(genome_traits)

    # Specify the attribute where we'll store the reconstructions
    trait_label = "Reconstruction"

    if opts.verbose:
        print "Assigning traits to tree..."

    # Decorate tree using the traits
    tree = assign_traits_to_tree(traits, tree, trait_label=trait_label)

    if opts.reconstruction_confidence:
        if opts.verbose:
            print "Assigning trait confidence intervals to tree..."
        tree = assign_traits_to_tree(asr_min_vals,tree,\
            trait_label="lower_bound")

        tree = assign_traits_to_tree(asr_max_vals,tree,\
            trait_label="upper_bound")

        if brownian_motion_parameter is None:

            if opts.verbose:
                print "No Brownian motion parameters loaded. Inferring these from 95% confidence intervals..."
            brownian_motion_parameter = get_brownian_motion_param_from_confidence_intervals(tree,\
                     upper_bound_trait_label="upper_bound",\
                     lower_bound_trait_label="lower_bound",\
                     trait_label=trait_label,\
                     confidence=0.95)
            if opts.verbose:
                print "Inferred the following rate parameters:", brownian_motion_parameter
    if opts.verbose:
        print "Collecting list of nodes to predict..."

    #Start by predict all tip nodes.
    nodes_to_predict = [tip.Name for tip in tree.tips()]

    if opts.verbose:
        print "Found %i nodes to predict." % len(nodes_to_predict)

    if opts.limit_predictions_to_organisms:
        organism_id_str = opts.limit_predictions_to_organisms
        ok_organism_ids = organism_id_str.split(',')
        ok_organism_ids = [n.strip() for n in ok_organism_ids]
        for f in set_label_conversion_fns(True, True):
            ok_organism_ids = [f(i) for i in ok_organism_ids]

        if opts.verbose:
            print "Limiting predictions to user-specified ids:",\
              ",".join(ok_organism_ids)

        if not ok_organism_ids:
            raise RuntimeError(\
              "Found no valid ids in input: %s. Were comma-separated ids specified on the command line?"\
              % opts.limit_predictions_to_organisms)

        nodes_to_predict =\
          [n for n in nodes_to_predict if n in ok_organism_ids]

        if not nodes_to_predict:
            raise RuntimeError(\
              "Filtering by user-specified ids resulted in an empty set of nodes to predict.   Are the ids on the commmand-line and tree ids in the same format?  Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],ok_organism_ids[0]))

        if opts.verbose:
            print "After filtering organisms to predict by the ids specified on the commandline, %i nodes remain to be predicted" % (
                len(nodes_to_predict))

    if opts.limit_predictions_by_otu_table:
        if opts.verbose:
            print "Limiting predictions to ids in user-specified OTU table:",\
              opts.limit_predictions_by_otu_table
        otu_table = open(opts.limit_predictions_by_otu_table, "U")
        #Parse OTU table for ids

        otu_ids =\
          extract_ids_from_table(otu_table.readlines(),delimiter="\t")

        if not otu_ids:
            raise RuntimeError(\
              "Found no valid ids in input OTU table: %s.  Is the path correct?"\
              % opts.limit_predictions_by_otu_table)

        nodes_to_predict =\
          [n for n in nodes_to_predict if n in otu_ids]

        if not nodes_to_predict:
            raise RuntimeError(\
              "Filtering by OTU table resulted in an empty set of nodes to predict.   Are the OTU ids and tree ids in the same format?  Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],otu_ids[0]))

        if opts.verbose:
            print "After filtering by OTU table, %i nodes remain to be predicted" % (
                len(nodes_to_predict))

    # Calculate accuracy of PICRUST for the given tree, sequenced genomes
    # and set of ndoes to predict
    accuracy_metrics = ['NSTI']
    accuracy_metric_results = None
    if opts.calculate_accuracy_metrics:
        if opts.verbose:
            print "Calculating accuracy metrics: %s" % (
                [",".join(accuracy_metrics)])
        accuracy_metric_results = {}
        if 'NSTI' in accuracy_metrics:

            nsti_result,min_distances =\
                calc_nearest_sequenced_taxon_index(tree,\
                limit_to_tips = nodes_to_predict,\
                trait_label = trait_label, verbose=opts.verbose)

            #accuracy_metric_results['NSTI'] = nsti_result
            for organism in min_distances.keys():
                accuracy_metric_results[organism] = {
                    'NSTI': min_distances[organism]
                }

            if opts.verbose:
                print "NSTI:", nsti_result

        if opts.output_accuracy_metrics_only:
            #Write accuracy metrics to file
            if opts.verbose:
                print "Writing accuracy metrics to file:", opts.output_accuracy_metrics

            f = open(opts.output_accuracy_metrics_only, 'w+')
            f.write("metric\torganism\tvalue\n")
            lines = []
            for organism in accuracy_metric_results.keys():
                for metric in accuracy_metric_results[organism].keys():
                    lines.append('\t'.join([metric,organism,\
                      str(accuracy_metric_results[organism][metric])])+'\n')
            f.writelines(sorted(lines))
            f.close()
            exit()

    if opts.verbose:
        print "Generating predictions using method:", opts.prediction_method

    if opts.weighting_method == 'exponential':
        #For now, use exponential weighting
        weight_fn = make_neg_exponential_weight_fn(e)

    variances = None  #Overwritten by methods that calc variance
    confidence_intervals = None  #Overwritten by methods that calc variance

    if opts.prediction_method == 'asr_and_weighting':
        # Perform predictions using reconstructed ancestral states

        if opts.reconstruction_confidence:
            predictions,variances,confidence_intervals =\
              predict_traits_from_ancestors(tree,nodes_to_predict,\
              trait_label=trait_label,\
              lower_bound_trait_label="lower_bound",\
              upper_bound_trait_label="upper_bound",\
              calc_confidence_intervals = True,\
              brownian_motion_parameter=brownian_motion_parameter,\
              weight_fn =weight_fn,verbose=opts.verbose)

        else:
            predictions =\
             predict_traits_from_ancestors(tree,nodes_to_predict,\
             trait_label=trait_label,\
             weight_fn =weight_fn,verbose=opts.verbose)

    elif opts.prediction_method == 'weighting_only':
        #Ignore ancestral information
        predictions =\
          weighted_average_tip_prediction(tree,nodes_to_predict,\
          trait_label=trait_label,\
          weight_fn =weight_fn,verbose=opts.verbose)

    elif opts.prediction_method == 'nearest_neighbor':

        predictions = predict_nearest_neighbor(tree,nodes_to_predict,\
          trait_label=trait_label,tips_only = True)

    elif opts.prediction_method == 'random_neighbor':

        predictions = predict_random_neighbor(tree,\
          nodes_to_predict,trait_label=trait_label)

    if opts.verbose:
        print "Done making predictions."

    make_output_dir_for_file(opts.output_trait_table)

    out_fh = open(opts.output_trait_table, 'w')
    #Generate the table of biom predictions
    if opts.verbose:
        print "Converting results to .biom format for output..."

    biom_predictions=biom_table_from_predictions(predictions,table_headers,\
                                                         observation_metadata=None,\
                                                         sample_metadata=accuracy_metric_results,convert_to_int=False)
    if opts.verbose:
        print "Writing prediction results to file: ", opts.output_trait_table

    if opts.output_precalc_file_in_biom:

        #write biom table to file
        write_biom_table(biom_predictions, opts.output_trait_table)

    else:
        #convert to precalc (tab-delimited) format

        out_fh = open(opts.output_trait_table, 'w')
        out_fh.write(convert_biom_to_precalc(biom_predictions))
        out_fh.close()

    #Write out variance information to file
    if variances:

        if opts.verbose:
            print "Converting variances to BIOM format"

        if opts.output_precalc_file_in_biom:
            suffix = '.biom'
        else:
            suffix = '.tab'

        biom_prediction_variances=biom_table_from_predictions({k:v['variance'] for k,v in variances.iteritems()},table_headers,\
        observation_metadata=None,\
        sample_metadata=None,convert_to_int=False)
        outfile_base, extension = splitext(opts.output_trait_table)
        variance_outfile = outfile_base + "_variances" + suffix
        make_output_dir_for_file(variance_outfile)

        if opts.verbose:
            print "Writing variance information to file:", variance_outfile

        if opts.output_precalc_file_in_biom:
            write_biom_table(biom_prediction_variances, variance_outfile)
        else:
            open(variance_outfile,'w').write(\
                convert_biom_to_precalc(biom_prediction_variances))

    if confidence_intervals:

        if opts.verbose:
            print "Converting upper confidence interval values to BIOM format"

        biom_prediction_upper_CI=biom_table_from_predictions({k:v['upper_CI'] for k,v in confidence_intervals.iteritems()},table_headers,\
          observation_metadata=None,\
          sample_metadata=None,convert_to_int=False)

        outfile_base, extension = splitext(opts.output_trait_table)
        upper_CI_outfile = outfile_base + "_upper_CI" + suffix
        make_output_dir_for_file(upper_CI_outfile)

        if opts.verbose:
            print "Writing upper confidence limit information to file:", upper_CI_outfile

        if opts.output_precalc_file_in_biom:
            write_biom_table(biom_prediction_upper_CI, upper_CI_outfile)
        else:
            open(upper_CI_outfile,'w').write(\
                convert_biom_to_precalc(biom_prediction_upper_CI))

        biom_prediction_lower_CI=biom_table_from_predictions({k:v['lower_CI'] for k,v in confidence_intervals.iteritems()},table_headers,\
          observation_metadata=None,\
          sample_metadata=None,convert_to_int=False)

        outfile_base, extension = splitext(opts.output_trait_table)
        lower_CI_outfile = outfile_base + "_lower_CI" + suffix
        make_output_dir_for_file(lower_CI_outfile)

        if opts.verbose:
            print "Writing lower confidence limit information to file", lower_CI_outfile

        if opts.output_precalc_file_in_biom:
            write_biom_table(biom_prediction_lower_CI, lower_CI_outfile)
        else:
            open(lower_CI_outfile,'w').write(\
                convert_biom_to_precalc(biom_prediction_lower_CI))

コード例 #27

0

ファイルを表示

ファイル: test_predict_traits.py プロジェクト: yucy207/picrust

    def setUp(self):
        self.SimpleTree = \
          DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;")
        
        
        #Set up a tree with obvious differences in the rate of gene content
        #evolution to test confidence interval estimation
        #Features:  
        # --trait 1 is has ~ 10 fold higher confidence intervals than trait 0. 
        # Trait 2 is 10 fold higher than trait 1
        
        # -- of predicted nodes B and D, D has a ~10 fold longer branch

        self.SimpleUnequalVarianceTree =\
          DndParser("((A:0.01,B:0.01)E:0.05,(C:0.01,D:0.10)F:0.05)root;")
        traits = {"A":[1.0,1.0,1.0],"C":[1.0,1.0,1.0],"E":[1.0,1.0,1.0],"F":[1.0,1.0,1.0]}
        self.SimpleUnequalVarianceTree = assign_traits_to_tree(traits,\
          self.SimpleUnequalVarianceTree,trait_label="Reconstruction")
        self.SimpleUnequalVarianceTree.getNodeMatchingName('E').upper_bound = [2.0,20.0,200.0]
        self.SimpleUnequalVarianceTree.getNodeMatchingName('E').lower_bound = [-1.0,-19.0,-199.0]
        self.SimpleUnequalVarianceTree.getNodeMatchingName('F').upper_bound = [2.0,20.0,200.0]
        self.SimpleUnequalVarianceTree.getNodeMatchingName('F').lower_bound = [-1.0,-19.0,-199.0]
        
        #Set up a tree with a three-way polytomy
        self.SimplePolytomyTree = \
          DndParser("((A:0.02,B:0.01,B_prime:0.03)E:0.05,(C:0.01,D:0.01)F:0.05)root;")
    
        self.SimpleTreeTraits =\
            {"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]}
        
        self.PartialReconstructionTree =\
                DndParser("((((B:0.01,C:0.01)I3:0.01,A:0.01)I2:0.01,D:0.01)I1:0.01)root;")

        self.CloseToI3Tree =\
                DndParser("((((B:0.01,C:0.95)I3:0.01,A:0.01)I2:0.95,D:0.05)I1:0.95)root;")
        
        self.CloseToI1Tree =\
                DndParser("((((B:0.95,C:0.95)I3:0.95,A:0.01)I2:0.02,D:0.05)I1:0.05)root;")

        self.BetweenI3AndI1Tree=\
                DndParser("((((B:0.01,C:0.1)I3:0.02,A:0.01)I2:0.02,D:0.05)I1:0.02)root;")


        self.PartialReconstructionTraits =\
                {"B":[1.0,1.0],"C":[1.0,1.0],"I3":[1.0,1.0],"I1":[0.0,1.0],"D":[0.0,1.0]}

        self.GeneCountTraits =\
                {"B":[1.0,1.0],"C":[1.0,2.0],"I3":[1.0,1.0],"I1":[0.0,3.0],"D":[0.0,5.0]}

        #create a tmp trait file
        self.in_trait1_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv')
        self.in_trait1_file=open(self.in_trait1_fp,'w')
        self.in_trait1_file.write(in_trait1)
        self.in_trait1_file.close()

        #create another tmp trait file (with columns in different order)
        self.in_trait2_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv')
        self.in_trait2_file=open(self.in_trait2_fp,'w')
        self.in_trait2_file.write(in_trait2)
        self.in_trait2_file.close()


        #create a tmp trait file with a incorrect trait name
        self.in_bad_trait_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv')
        self.in_bad_trait_file=open(self.in_bad_trait_fp,'w')
        self.in_bad_trait_file.write(in_bad_trait)
        self.in_bad_trait_file.close()

        self.files_to_remove = [self.in_trait1_fp,self.in_trait2_fp,self.in_bad_trait_fp]