def test_assign_traits_to_tree(self): """assign_traits_to_tree should map reconstructed traits to tree nodes""" # Test that the function assigns traits from a dict to a tree node traits = self.SimpleTreeTraits tree = self.SimpleTree # Test on simple tree result_tree = assign_traits_to_tree(traits,tree) # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name, None) self.assertEqual(obs,exp) # Test on polytomy tree tree = self.SimplePolytomyTree result_tree = assign_traits_to_tree(traits,tree) # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name, None) self.assertEqual(obs,exp)
def test_assign_traits_to_tree_quoted_node_name(self): """Assign_traits_to_tree should remove quotes from node names""" # Test that the function assigns traits from a dict to a tree node traits = self.SimpleTreeTraits tree = self.SimpleTree #Make one node quoted tree.getNodeMatchingName('A').Name="'A'" tree.getNodeMatchingName('B').Name='"B"' # Test on simple tree result_tree = assign_traits_to_tree(traits,tree,fix_bad_labels=True) #Setting fix_bad_labels to false produces NoneType predictions when #labels are quoted # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name.strip("'").strip('"'), None) self.assertEqual(obs,exp) # Test on polytomy tree tree = self.SimplePolytomyTree result_tree = assign_traits_to_tree(traits,tree) # Test that each node is assigned correctly for node in result_tree.preorder(): obs = node.Reconstruction exp = traits.get(node.Name, None) self.assertEqual(obs,exp)
def test_get_brownian_motion_param_from_confidence_intervals(self): """Get brownian motion parameters from confidence intervals""" #TODO: Ensure this works with arrays of brownian motions tree = self.SimpleTree #Test one-trait case traits = {"A": [1.0], "C": [2.0], "E": [1.0], "F": [1.0]} tree = assign_traits_to_tree(traits, tree, trait_label="Reconstruction") tree.getNodeMatchingName('E').upper_bound = [2.0] tree.getNodeMatchingName('F').upper_bound = [1.0] tree.getNodeMatchingName('E').lower_bound = [0.0] tree.getNodeMatchingName('F').lower_bound = [1.0] brownian_motion_parameter =\ get_brownian_motion_param_from_confidence_intervals(tree,\ upper_bound_trait_label="upper_bound",\ lower_bound_trait_label="lower_bound",\ trait_label="Reconstruction",\ confidence=0.95) #self.assertFloatEqual(brownian_motion_parameter,[1.0]) self.assertEqual(len(brownian_motion_parameter), 1) #Test two-trait case traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits, tree, trait_label="Reconstruction") true_brownian_motion_param = 5.0 #E_histogram = thresholded_brownian_probability(1.0,\ # true_brownian_motion_param,d=0.01) #E_true_lower,E_true_upper = get_bounds_from_histogram(E_histogram,test_bin_edges,confidence=0.95) #set up tree with confidence intervals #{"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]} #DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;") tree.getNodeMatchingName('E').upper_bound = [1.0, 1.0] tree.getNodeMatchingName('F').upper_bound = [1.0, 2.0] tree.getNodeMatchingName('E').lower_bound = [-2.0, -2.0] tree.getNodeMatchingName('F').lower_bound = [-1.0, 0.0] brownian_motion_parameter =\ get_brownian_motion_param_from_confidence_intervals(tree,\ upper_bound_trait_label="upper_bound",\ lower_bound_trait_label="lower_bound",\ trait_label="Reconstruction",\ confidence=0.95) #self.assertFloatEqual(brownian_motion_parameter,[1.0,1.0]) self.assertEqual(len(brownian_motion_parameter), 2)
def test_get_brownian_motion_param_from_confidence_intervals(self): """Get brownian motion parameters from confidence intervals""" #TODO: Ensure this works with arrays of brownian motions tree = self.SimpleTree #Test one-trait case traits = {"A":[1.0],"C":[2.0],"E":[1.0],"F":[1.0]} tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") tree.getNodeMatchingName('E').upper_bound = [2.0] tree.getNodeMatchingName('F').upper_bound = [1.0] tree.getNodeMatchingName('E').lower_bound = [0.0] tree.getNodeMatchingName('F').lower_bound = [1.0] brownian_motion_parameter =\ get_brownian_motion_param_from_confidence_intervals(tree,\ upper_bound_trait_label="upper_bound",\ lower_bound_trait_label="lower_bound",\ trait_label="Reconstruction",\ confidence=0.95) #self.assertFloatEqual(brownian_motion_parameter,[1.0]) self.assertEqual(len(brownian_motion_parameter),1) #Test two-trait case traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") true_brownian_motion_param = 5.0 #E_histogram = thresholded_brownian_probability(1.0,\ # true_brownian_motion_param,d=0.01) #E_true_lower,E_true_upper = get_bounds_from_histogram(E_histogram,test_bin_edges,confidence=0.95) #set up tree with confidence intervals #{"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]} #DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;") tree.getNodeMatchingName('E').upper_bound = [1.0,1.0] tree.getNodeMatchingName('F').upper_bound = [1.0,2.0] tree.getNodeMatchingName('E').lower_bound = [-2.0,-2.0] tree.getNodeMatchingName('F').lower_bound = [-1.0,0.0] brownian_motion_parameter =\ get_brownian_motion_param_from_confidence_intervals(tree,\ upper_bound_trait_label="upper_bound",\ lower_bound_trait_label="lower_bound",\ trait_label="Reconstruction",\ confidence=0.95) #self.assertFloatEqual(brownian_motion_parameter,[1.0,1.0]) self.assertEqual(len(brownian_motion_parameter),2)
def test_calc_nearest_sequenced_taxon_index(self): """calc_nearest_sequenced_taxon_index calculates the NSTI measure""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") #Expected distances: # A --> A 0.0 # B --> A 0.03 # C --> D 0.02 # D --> D 0.0 # = 0.05/4.0 = 0.0125 exp = 0.0125 verbose = False #Test with default options obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,verbose=verbose) self.assertFloatEqual(obs_nsti,exp) self.assertFloatEqual(obs_distances["A"],0.0) self.assertFloatEqual(obs_distances["B"],0.03) self.assertFloatEqual(obs_distances["C"],0.02) self.assertFloatEqual(obs_distances["D"],0.00) #Test calcing the index while #limiting prediction to B and C # B --> A 0.03 # C --> D 0.02 exp = 0.025 obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,\ limit_to_tips = ["B","C"],verbose=False) self.assertFloatEqual(obs_nsti,exp) self.assertFloatEqual(obs_distances["B"],0.03) self.assertFloatEqual(obs_distances["C"],0.02)
def test_nearest_neighbor_prediction(self): """nearest_neighbor_prediction predicts nearest neighbor's traits""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits, tree, trait_label="Reconstruction") #Test with default options results = predict_nearest_neighbor(tree, nodes_to_predict=["B", "C"]) self.assertEqual(results["B"], array([1.0, 1.0])) self.assertEqual(results["C"], array([0.0, 0.0])) #Test allowing ancestral NNs results = predict_nearest_neighbor(tree, nodes_to_predict =["B","C"],\ tips_only = False) self.assertEqual(results["C"], array([0.0, 1.0])) #Test allowing self to be NN AND Ancestral NNs results = predict_nearest_neighbor(tree, nodes_to_predict =["A","B","C","D"],\ tips_only = False,use_self_in_prediction=True) self.assertEqual(results["A"], array([1.0, 1.0])) self.assertEqual(results["B"], array([1.0, 1.0])) self.assertEqual(results["C"], array([0.0, 1.0])) self.assertEqual(results["D"], array([0.0, 0.0]))
def test_get_nearest_annotated_neightbor(self): """get_nearest_annotated_neighbor finds nearest relative with traits""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree) #Test ancestral NN matching nn = get_nearest_annotated_neighbor(tree,'A',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'E') nn = get_nearest_annotated_neighbor(tree,'B',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'E') nn = get_nearest_annotated_neighbor(tree,'C',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'F') nn = get_nearest_annotated_neighbor(tree,'D',\ tips_only=False, include_self=False) self.assertEqual(nn.Name,'F') #Test tip only, non-self matching nn = get_nearest_annotated_neighbor(tree,'A',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'D') nn = get_nearest_annotated_neighbor(tree,'B',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'A') nn = get_nearest_annotated_neighbor(tree,'C',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'D') nn = get_nearest_annotated_neighbor(tree,'D',\ tips_only=True, include_self=False) self.assertEqual(nn.Name,'A')
def test_biom_table_from_predictions(self): """format predictions into biom format""" traits = self.SimpleTreeTraits tree = self.SimpleTree # print "Starting tree:",tree.asciiArt() # Test on simple tree result_tree = assign_traits_to_tree(traits, tree) nodes_to_predict = [n.Name for n in result_tree.tips()] # print "Predicting nodes:", nodes_to_predict predictions = predict_traits_from_ancestors(result_tree, nodes_to_predict) biom_table = biom_table_from_predictions(predictions, ["trait1", "trait2"])
def test_biom_table_from_predictions(self): """format predictions into biom format""" traits = self.SimpleTreeTraits tree = self.SimpleTree #print "Starting tree:",tree.asciiArt() # Test on simple tree result_tree = assign_traits_to_tree(traits,tree) nodes_to_predict = [n.Name for n in result_tree.tips()] #print "Predicting nodes:", nodes_to_predict predictions = predict_traits_from_ancestors(result_tree,\ nodes_to_predict) biom_table=biom_table_from_predictions(predictions,["trait1","trait2"])
def test_get_nn_by_tree_descent(self): """calc_nearest_sequenced_taxon_index calculates the NSTI measure""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits, tree, trait_label="Reconstruction") #Expected distances: # A --> A 0.0 # B --> A 0.03 # C --> D 0.02 # D --> D 0.0 # = 0.05/4.0 = 0.0125 exp = 0.0125 #Test with default options nn, distance = get_nn_by_tree_descent(tree, "B", verbose=True) self.assertEqual(nn.Name, "A") self.assertFloatEqual(distance, 0.03) nn, distance = get_nn_by_tree_descent(tree, "A", verbose=True) self.assertEqual(nn.Name, "A") self.assertFloatEqual(distance, 0.00) nn, distance = get_nn_by_tree_descent(tree, "A", filter_by_property=False, verbose=True) self.assertEqual(nn.Name, "B") self.assertFloatEqual(distance, 0.03) nn, distance = get_nn_by_tree_descent(tree, "C", verbose=True) self.assertEqual(nn.Name, "D") self.assertFloatEqual(distance, 0.02) #self.assertFloatEqual(obs_distances["A"],0.0) #self.assertFloatEqual(obs_distances["B"],0.03) #self.assertFloatEqual(obs_distances["C"],0.02) #self.assertFloatEqual(obs_distances["D"],0.00) #Test calcing the index while #limiting prediction to B and C # B --> A 0.03 # C --> D 0.02 exp = 0.025 obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,\ limit_to_tips = ["B","C"],verbose=False) self.assertFloatEqual(obs_nsti, exp) self.assertFloatEqual(obs_distances["B"], 0.03) self.assertFloatEqual(obs_distances["C"], 0.02)
def test_predict_random_neighbor(self): """predict_random_neighbor predicts randomly""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree) #If there is only one other valid result, this #should always be predicted #self.SimpleTreeTraits =\ # {"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]} #If self predictions are disallowed, then the prediction for A should #always come from node D, and be 0,0. results = predict_random_neighbor(tree,['A'],\ trait_label = "Reconstruction",\ use_self_in_prediction=False) self.assertEqual(results['A'],[0.0,0.0]) #If use_self is True, ~50% of predictions should be [1.0,1.0] and # half should be [0.0,0.0] #Pick repeatedly and make sure frequencies are #reasonable. The technique is fast, so #many iterations are reasonable. iterations = 100000 a_predictions = 0 d_predictions = 0 for i in range(iterations): results = predict_random_neighbor(tree,['A'],\ trait_label = "Reconstruction",\ use_self_in_prediction=True) #print results if results['A'] == [1.0,1.0]: #print "A pred" a_predictions += 1 elif results['A'] == [0.0,0.0]: #print "D pred" d_predictions +=1 else: raise RuntimeError(\ "Bad prediction result: Neither node A nor node D traits used in prediction") #print "All a predictions:",a_predictions #print "All d predictions:",d_predictions ratio = float(a_predictions)/float(iterations) #print "Ratio:", ratio self.assertFloatEqual(ratio,0.5,eps=1e-2)
def test_predict_traits_from_ancestors(self): """predict_traits_from_ancestors should propagate ancestral states""" # When the node is very close to I3, prediction should be approx. I3 traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits, self.CloseToI3Tree) nodes_to_predict = ["A"] prediction = predict_traits_from_ancestors(tree=tree, nodes_to_predict=nodes_to_predict) exp = traits["I3"] # print "PREDICTION:",prediction for node in nodes_to_predict: self.assertFloatEqual(around(prediction[node]), exp)
def test_predict_traits_from_ancestors(self): """predict_traits_from_ancestors should propagate ancestral states""" # Testing the point predictions first (since these are easiest) # When the node is very close to I3, prediction should be approx. I3 traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI3Tree) nodes_to_predict = ['A'] prediction = predict_traits_from_ancestors(tree=tree,\ nodes_to_predict=nodes_to_predict) exp = traits["I3"] #print "PREDICTION:",prediction for node in nodes_to_predict: self.assertFloatEqual(around(prediction[node]),exp)
def test_get_nn_by_tree_descent(self): """calc_nearest_sequenced_taxon_index calculates the NSTI measure""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") #Expected distances: # A --> A 0.0 # B --> A 0.03 # C --> D 0.02 # D --> D 0.0 # = 0.05/4.0 = 0.0125 exp = 0.0125 #Test with default options nn,distance = get_nn_by_tree_descent(tree,"B",verbose=True) self.assertEqual(nn.Name,"A") self.assertFloatEqual(distance,0.03) nn,distance = get_nn_by_tree_descent(tree,"A",verbose=True) self.assertEqual(nn.Name,"A") self.assertFloatEqual(distance,0.00) nn,distance = get_nn_by_tree_descent(tree,"A",filter_by_property=False,verbose=True) self.assertEqual(nn.Name,"B") self.assertFloatEqual(distance,0.03) nn,distance = get_nn_by_tree_descent(tree,"C",verbose=True) self.assertEqual(nn.Name,"D") self.assertFloatEqual(distance,0.02) #self.assertFloatEqual(obs_distances["A"],0.0) #self.assertFloatEqual(obs_distances["B"],0.03) #self.assertFloatEqual(obs_distances["C"],0.02) #self.assertFloatEqual(obs_distances["D"],0.00) #Test calcing the index while #limiting prediction to B and C # B --> A 0.03 # C --> D 0.02 exp = 0.025 obs_nsti,obs_distances = calc_nearest_sequenced_taxon_index(tree,\ limit_to_tips = ["B","C"],verbose=False) self.assertFloatEqual(obs_nsti,exp) self.assertFloatEqual(obs_distances["B"],0.03) self.assertFloatEqual(obs_distances["C"],0.02)
def test_nearest_neighbor_prediction(self): """nearest_neighbor_prediction predicts nearest neighbor's traits""" traits = self.SimpleTreeTraits tree = self.SimpleTree result_tree = assign_traits_to_tree(traits,tree,trait_label="Reconstruction") #Test with default options results = predict_nearest_neighbor(tree, nodes_to_predict =["B","C"]) self.assertEqual(results["B"],array([1.0,1.0])) self.assertEqual(results["C"],array([0.0,0.0])) #Test allowing ancestral NNs results = predict_nearest_neighbor(tree, nodes_to_predict =["B","C"],\ tips_only = False) self.assertEqual(results["C"],array([0.0,1.0])) #Test allowing self to be NN AND Ancestral NNs results = predict_nearest_neighbor(tree, nodes_to_predict =["A","B","C","D"],\ tips_only = False,use_self_in_prediction=True) self.assertEqual(results["A"],array([1.0,1.0])) self.assertEqual(results["B"],array([1.0,1.0])) self.assertEqual(results["C"],array([0.0,1.0])) self.assertEqual(results["D"],array([0.0,0.0]))
def setUp(self): self.SimpleTree = \ DndParser("((A:0.02,B:0.01)E:0.05,(C:0.01,D:0.01)F:0.05)root;") #Set up a tree with obvious differences in the rate of gene content #evolution to test confidence interval estimation #Features: # --trait 1 is has ~ 10 fold higher confidence intervals than trait 0. # Trait 2 is 10 fold higher than trait 1 # -- of predicted nodes B and D, D has a ~10 fold longer branch self.SimpleUnequalVarianceTree =\ DndParser("((A:0.01,B:0.01)E:0.05,(C:0.01,D:0.10)F:0.05)root;") traits = {"A":[1.0,1.0,1.0],"C":[1.0,1.0,1.0],"E":[1.0,1.0,1.0],"F":[1.0,1.0,1.0]} self.SimpleUnequalVarianceTree = assign_traits_to_tree(traits,\ self.SimpleUnequalVarianceTree,trait_label="Reconstruction") self.SimpleUnequalVarianceTree.getNodeMatchingName('E').upper_bound = [2.0,20.0,200.0] self.SimpleUnequalVarianceTree.getNodeMatchingName('E').lower_bound = [-1.0,-19.0,-199.0] self.SimpleUnequalVarianceTree.getNodeMatchingName('F').upper_bound = [2.0,20.0,200.0] self.SimpleUnequalVarianceTree.getNodeMatchingName('F').lower_bound = [-1.0,-19.0,-199.0] #Set up a tree with a three-way polytomy self.SimplePolytomyTree = \ DndParser("((A:0.02,B:0.01,B_prime:0.03)E:0.05,(C:0.01,D:0.01)F:0.05)root;") self.SimpleTreeTraits =\ {"A":[1.0,1.0],"E":[1.0,1.0],"F":[0.0,1.0],"D":[0.0,0.0]} self.PartialReconstructionTree =\ DndParser("((((B:0.01,C:0.01)I3:0.01,A:0.01)I2:0.01,D:0.01)I1:0.01)root;") self.CloseToI3Tree =\ DndParser("((((B:0.01,C:0.95)I3:0.01,A:0.01)I2:0.95,D:0.05)I1:0.95)root;") self.CloseToI1Tree =\ DndParser("((((B:0.95,C:0.95)I3:0.95,A:0.01)I2:0.02,D:0.05)I1:0.05)root;") self.BetweenI3AndI1Tree=\ DndParser("((((B:0.01,C:0.1)I3:0.02,A:0.01)I2:0.02,D:0.05)I1:0.02)root;") self.PartialReconstructionTraits =\ {"B":[1.0,1.0],"C":[1.0,1.0],"I3":[1.0,1.0],"I1":[0.0,1.0],"D":[0.0,1.0]} self.GeneCountTraits =\ {"B":[1.0,1.0],"C":[1.0,2.0],"I3":[1.0,1.0],"I1":[0.0,3.0],"D":[0.0,5.0]} #create a tmp trait file self.in_trait1_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv') self.in_trait1_file=open(self.in_trait1_fp,'w') self.in_trait1_file.write(in_trait1) self.in_trait1_file.close() #create another tmp trait file (with columns in different order) self.in_trait2_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv') self.in_trait2_file=open(self.in_trait2_fp,'w') self.in_trait2_file.write(in_trait2) self.in_trait2_file.close() #create a tmp trait file with a incorrect trait name self.in_bad_trait_fp = get_tmp_filename(prefix='Predict_Traits_Tests',suffix='.tsv') self.in_bad_trait_file=open(self.in_bad_trait_fp,'w') self.in_bad_trait_file.write(in_bad_trait) self.in_bad_trait_file.close() self.files_to_remove = [self.in_trait1_fp,self.in_trait2_fp,self.in_bad_trait_fp]
def test_weighted_average_tip_prediction(self): """Weighted average node prediction should predict node values""" # When the node is very close to I3, prediction should be approx. I3 traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI3Tree) node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) exp = traits["I3"] self.assertFloatEqual(around(prediction),exp) # When the node is very close to I1, prediction should be approx. I1 traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI1Tree) node_to_predict = "A" #print "tree:",tree.asciiArt() node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) exp = traits["I1"] #print "prediction:",prediction #print "exp:",exp a_node = tree.getNodeMatchingName('A') #for node in tree.preorder(): # print node.Name,node.distance(a_node),node.Reconstruction self.assertFloatEqual(around(prediction),exp) # Try out the B case with exponential weighting traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI3Tree) weight_fn = make_neg_exponential_weight_fn(exp_base=e) node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) #prediction = weighted_average_tip_prediction(tree=tree,\ # node_to_predict=node_to_predict,weight_fn=weight_fn) exp = traits["B"] self.assertFloatEqual(around(prediction),exp) # Try out the I1 case with exponential weighting traits = self.PartialReconstructionTraits tree = assign_traits_to_tree(traits,self.CloseToI1Tree) weight_fn = make_neg_exponential_weight_fn(exp_base=e) #weight_fn = linear_weight node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) exp = traits["I1"] self.assertFloatEqual(around(prediction),exp) # Try out the balanced case where children and ancestors # should be weighted a equally with exponential weighting # We'll try this with full gene count data to ensure # that case is tested traits = self.GeneCountTraits tree = assign_traits_to_tree(traits,self.BetweenI3AndI1Tree) weight_fn = make_neg_exponential_weight_fn(exp_base=e) node_to_predict = "A" node = tree.getNodeMatchingName(node_to_predict) most_recent_reconstructed_ancestor =\ get_most_recent_reconstructed_ancestor(node) prediction = weighted_average_tip_prediction(tree=tree,\ node=node,\ most_recent_reconstructed_ancestor=\ most_recent_reconstructed_ancestor) #prediction = weighted_average_tip_prediction(tree=tree,\ # node_to_predict=node_to_predict,weight_fn=weight_fn) exp = (array(traits["I1"]) + array(traits["I3"]))/2.0 self.assertFloatEqual(prediction,exp)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) #if we specify we want NSTI only then we have to calculate it first if opts.output_accuracy_metrics_only: opts.calculate_accuracy_metrics=True if opts.verbose: print "Loading tree from file:", opts.tree if opts.no_round: round_opt = False else: round_opt = True # Load Tree tree = load_picrust_tree(opts.tree, opts.verbose) table_headers=[] traits={} #load the asr trait table using the previous list of functions to order the arrays if opts.reconstructed_trait_table: table_headers,traits =\ update_trait_dict_from_file(opts.reconstructed_trait_table) #Only load confidence intervals on the reconstruction #If we actually have ASR values in the analysis if opts.reconstruction_confidence: if opts.verbose: print "Loading ASR confidence data from file:",\ opts.reconstruction_confidence print "Assuming confidence data is of type:",opts.confidence_format asr_confidence_output = open(opts.reconstruction_confidence) asr_min_vals,asr_max_vals, params,column_mapping =\ parse_asr_confidence_output(asr_confidence_output,format=opts.confidence_format) if 'sigma' in params: brownian_motion_parameter = params['sigma'][0] else: brownian_motion_parameter = None if opts.verbose: print "Done. Loaded %i confidence interval values." %(len(asr_max_vals)) print "Brownian motion parameter:",brownian_motion_parameter else: brownian_motion_parameter = None #load the trait table into a dict with organism names as keys and arrays as functions table_headers,genome_traits =\ update_trait_dict_from_file(opts.observed_trait_table,table_headers) #Combine the trait tables overwriting the asr ones if they exist in the genome trait table. traits.update(genome_traits) # Specify the attribute where we'll store the reconstructions trait_label = "Reconstruction" if opts.verbose: print "Assigning traits to tree..." # Decorate tree using the traits tree = assign_traits_to_tree(traits,tree, trait_label=trait_label) if opts.reconstruction_confidence: if opts.verbose: print "Assigning trait confidence intervals to tree..." tree = assign_traits_to_tree(asr_min_vals,tree,\ trait_label="lower_bound") tree = assign_traits_to_tree(asr_max_vals,tree,\ trait_label="upper_bound") if brownian_motion_parameter is None: if opts.verbose: print "No Brownian motion parameters loaded. Inferring these from 95% confidence intervals..." brownian_motion_parameter = get_brownian_motion_param_from_confidence_intervals(tree,\ upper_bound_trait_label="upper_bound",\ lower_bound_trait_label="lower_bound",\ trait_label=trait_label,\ confidence=0.95) if opts.verbose: print "Inferred the following rate parameters:",brownian_motion_parameter if opts.verbose: print "Collecting list of nodes to predict..." #Start by predict all tip nodes. nodes_to_predict = [tip.Name for tip in tree.tips()] if opts.verbose: print "Found %i nodes to predict." % len(nodes_to_predict) if opts.limit_predictions_to_organisms: organism_id_str = opts.limit_predictions_to_organisms ok_organism_ids = organism_id_str.split(',') ok_organism_ids = [n.strip() for n in ok_organism_ids] for f in set_label_conversion_fns(True,True): ok_organism_ids = [f(i) for i in ok_organism_ids] if opts.verbose: print "Limiting predictions to user-specified ids:",\ ",".join(ok_organism_ids) if not ok_organism_ids: raise RuntimeError(\ "Found no valid ids in input: %s. Were comma-separated ids specified on the command line?"\ % opts.limit_predictions_to_organisms) nodes_to_predict =\ [n for n in nodes_to_predict if n in ok_organism_ids] if not nodes_to_predict: raise RuntimeError(\ "Filtering by user-specified ids resulted in an empty set of nodes to predict. Are the ids on the commmand-line and tree ids in the same format? Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],ok_organism_ids[0])) if opts.verbose: print "After filtering organisms to predict by the ids specified on the commandline, %i nodes remain to be predicted" %(len(nodes_to_predict)) if opts.limit_predictions_by_otu_table: if opts.verbose: print "Limiting predictions to ids in user-specified OTU table:",\ opts.limit_predictions_by_otu_table otu_table = open(opts.limit_predictions_by_otu_table,"U") #Parse OTU table for ids otu_ids =\ extract_ids_from_table(otu_table.readlines(),delimiter="\t") if not otu_ids: raise RuntimeError(\ "Found no valid ids in input OTU table: %s. Is the path correct?"\ % opts.limit_predictions_by_otu_table) nodes_to_predict =\ [n for n in nodes_to_predict if n in otu_ids] if not nodes_to_predict: raise RuntimeError(\ "Filtering by OTU table resulted in an empty set of nodes to predict. Are the OTU ids and tree ids in the same format? Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],otu_ids[0])) if opts.verbose: print "After filtering by OTU table, %i nodes remain to be predicted" %(len(nodes_to_predict)) # Calculate accuracy of PICRUST for the given tree, sequenced genomes # and set of ndoes to predict accuracy_metrics = ['NSTI'] accuracy_metric_results = None if opts.calculate_accuracy_metrics: if opts.verbose: print "Calculating accuracy metrics: %s" %([",".join(accuracy_metrics)]) accuracy_metric_results = {} if 'NSTI' in accuracy_metrics: nsti_result,min_distances =\ calc_nearest_sequenced_taxon_index(tree,\ limit_to_tips = nodes_to_predict,\ trait_label = trait_label, verbose=opts.verbose) #accuracy_metric_results['NSTI'] = nsti_result for organism in min_distances.keys(): accuracy_metric_results[organism] = {'NSTI': min_distances[organism]} if opts.verbose: print "NSTI:", nsti_result if opts.output_accuracy_metrics_only: #Write accuracy metrics to file if opts.verbose: print "Writing accuracy metrics to file:",opts.output_accuracy_metrics f = open(opts.output_accuracy_metrics_only,'w+') f.write("metric\torganism\tvalue\n") lines =[] for organism in accuracy_metric_results.keys(): for metric in accuracy_metric_results[organism].keys(): lines.append('\t'.join([metric,organism,\ str(accuracy_metric_results[organism][metric])])+'\n') f.writelines(sorted(lines)) f.close() exit() if opts.verbose: print "Generating predictions using method:",opts.prediction_method if opts.weighting_method == 'exponential': #For now, use exponential weighting weight_fn = make_neg_exponential_weight_fn(e) variances=None #Overwritten by methods that calc variance confidence_intervals=None #Overwritten by methods that calc variance if opts.prediction_method == 'asr_and_weighting': # Perform predictions using reconstructed ancestral states if opts.reconstruction_confidence: predictions,variances,confidence_intervals =\ predict_traits_from_ancestors(tree,nodes_to_predict,\ trait_label=trait_label,\ lower_bound_trait_label="lower_bound",\ upper_bound_trait_label="upper_bound",\ calc_confidence_intervals = True,\ brownian_motion_parameter=brownian_motion_parameter,\ weight_fn=weight_fn,verbose=opts.verbose, round_predictions=round_opt) else: predictions =\ predict_traits_from_ancestors(tree,nodes_to_predict,\ trait_label=trait_label,\ weight_fn =weight_fn,verbose=opts.verbose, round_predictions=round_opt) elif opts.prediction_method == 'weighting_only': #Ignore ancestral information predictions =\ weighted_average_tip_prediction(tree,nodes_to_predict,\ trait_label=trait_label,\ weight_fn =weight_fn,verbose=opts.verbose) elif opts.prediction_method == 'nearest_neighbor': predictions = predict_nearest_neighbor(tree,nodes_to_predict,\ trait_label=trait_label,tips_only = True) elif opts.prediction_method == 'random_neighbor': predictions = predict_random_neighbor(tree,\ nodes_to_predict,trait_label=trait_label) if opts.verbose: print "Done making predictions." make_output_dir_for_file(opts.output_trait_table) out_fh=open(opts.output_trait_table,'w') #Generate the table of biom predictions if opts.verbose: print "Converting results to .biom format for output..." biom_predictions=biom_table_from_predictions(predictions,table_headers,\ observation_metadata=None,\ sample_metadata=accuracy_metric_results,convert_to_int=False) if opts.verbose: print "Writing prediction results to file: ",opts.output_trait_table if opts.output_precalc_file_in_biom: #write biom table to file write_biom_table(biom_predictions, opts.output_trait_table) else: #convert to precalc (tab-delimited) format out_fh = open(opts.output_trait_table, 'w') out_fh.write(convert_biom_to_precalc(biom_predictions)) out_fh.close() #Write out variance information to file if variances: if opts.verbose: print "Converting variances to BIOM format" if opts.output_precalc_file_in_biom: suffix='.biom' else: suffix='.tab' biom_prediction_variances=biom_table_from_predictions({k:v['variance'] for k,v in variances.iteritems()},table_headers,\ observation_metadata=None,\ sample_metadata=None,convert_to_int=False) outfile_base,extension = splitext(opts.output_trait_table) variance_outfile = outfile_base+"_variances"+suffix make_output_dir_for_file(variance_outfile) if opts.verbose: print "Writing variance information to file:",variance_outfile if opts.output_precalc_file_in_biom: write_biom_table(biom_prediction_variances, variance_outfile) else: open(variance_outfile,'w').write(\ convert_biom_to_precalc(biom_prediction_variances)) if confidence_intervals: if opts.verbose: print "Converting upper confidence interval values to BIOM format" biom_prediction_upper_CI=biom_table_from_predictions({k:v['upper_CI'] for k,v in confidence_intervals.iteritems()},table_headers,\ observation_metadata=None,\ sample_metadata=None,convert_to_int=False) outfile_base,extension = splitext(opts.output_trait_table) upper_CI_outfile = outfile_base+"_upper_CI"+suffix make_output_dir_for_file(upper_CI_outfile) if opts.verbose: print "Writing upper confidence limit information to file:",upper_CI_outfile if opts.output_precalc_file_in_biom: write_biom_table(biom_prediction_upper_CI, upper_CI_outfile) else: open(upper_CI_outfile,'w').write(\ convert_biom_to_precalc(biom_prediction_upper_CI)) biom_prediction_lower_CI=biom_table_from_predictions({k:v['lower_CI'] for k,v in confidence_intervals.iteritems()},table_headers,\ observation_metadata=None,\ sample_metadata=None,convert_to_int=False) outfile_base,extension = splitext(opts.output_trait_table) lower_CI_outfile = outfile_base+"_lower_CI"+suffix make_output_dir_for_file(lower_CI_outfile) if opts.verbose: print "Writing lower confidence limit information to file",lower_CI_outfile if opts.output_precalc_file_in_biom: write_biom_table(biom_prediction_lower_CI, lower_CI_outfile) else: open(lower_CI_outfile,'w').write(\ convert_biom_to_precalc(biom_prediction_lower_CI))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.verbose: print "Loading tree from file:", opts.tree # Load Tree #tree = LoadTree(opts.tree) tree = load_picrust_tree(opts.tree, opts.verbose) table_headers =[] traits={} #load the asr trait table using the previous list of functions to order the arrays if opts.reconstructed_trait_table: table_headers,traits =\ update_trait_dict_from_file(opts.reconstructed_trait_table) #Only load confidence intervals on the reconstruction #If we actually have ASR values in the analysis if opts.reconstruction_confidence: if opts.verbose: print "Loading ASR confidence data from file:",\ opts.reconstruction_confidence asr_confidence_output = open(opts.reconstruction_confidence) asr_min_vals,asr_max_vals, params,column_mapping =\ parse_asr_confidence_output(asr_confidence_output) brownian_motion_parameter = params['sigma'][0] brownian_motion_error = params['sigma'][1] if opts.verbose: print "Done. Loaded %i confidence interval values." %(len(asr_max_vals)) print "Brownian motion parameter:",brownian_motion_parameter else: brownian_motion_parameter = None #load the trait table into a dict with organism names as keys and arrays as functions table_headers,genome_traits =\ update_trait_dict_from_file(opts.observed_trait_table,table_headers) #Combine the trait tables overwriting the asr ones if they exist in the genome trait table. traits.update(genome_traits) # Specify the attribute where we'll store the reconstructions trait_label = "Reconstruction" if opts.verbose: print "Assigning traits to tree..." # Decorate tree using the traits tree = assign_traits_to_tree(traits,tree, trait_label=trait_label) if opts.reconstruction_confidence: if opts.verbose: print "Assigning trait confidence intervals to tree..." tree = assign_traits_to_tree(asr_min_vals,tree,\ trait_label="lower_bound") tree = assign_traits_to_tree(asr_max_vals,tree,\ trait_label="upper_bound") if opts.verbose: print "Collecting list of nodes to predict..." #Start by predict all tip nodes. nodes_to_predict = [tip.Name for tip in tree.tips()] if opts.verbose: print "Found %i nodes to predict." % len(nodes_to_predict) if opts.limit_predictions_to_organisms: organism_id_str = opts.limit_predictions_to_organisms ok_organism_ids = organism_id_str.split(',') ok_organism_ids = [n.strip() for n in ok_organism_ids] for f in set_label_conversion_fns(True,True): ok_organism_ids = [f(i) for i in ok_organism_ids] if opts.verbose: print "Limiting predictions to user-specified ids:",\ ",".join(ok_organism_ids) if not ok_organism_ids: raise RuntimeError(\ "Found no valid ids in input: %s. Were comma-separated ids specified on the command line?"\ % opts.limit_predictions_to_organisms) nodes_to_predict =\ [n for n in nodes_to_predict if n in ok_organism_ids] if not nodes_to_predict: raise RuntimeError(\ "Filtering by user-specified ids resulted in an empty set of nodes to predict. Are the ids on the commmand-line and tree ids in the same format? Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],ok_organism_ids[0])) if opts.verbose: print "After filtering organisms to predict by the ids specified on the commandline, %i nodes remain to be predicted" %(len(nodes_to_predict)) if opts.limit_predictions_by_otu_table: if opts.verbose: print "Limiting predictions to ids in user-specified OTU table:",\ opts.limit_predictions_by_otu_table otu_table = open(opts.limit_predictions_by_otu_table,"U") #Parse OTU table for ids otu_ids =\ extract_ids_from_table(otu_table.readlines(),delimiter="\t") if not otu_ids: raise RuntimeError(\ "Found no valid ids in input OTU table: %s. Is the path correct?"\ % opts.limit_predictions_by_otu_table) nodes_to_predict =\ [n for n in nodes_to_predict if n in otu_ids] if not nodes_to_predict: raise RuntimeError(\ "Filtering by OTU table resulted in an empty set of nodes to predict. Are the OTU ids and tree ids in the same format? Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],otu_ids[0])) if opts.verbose: print "After filtering by OTU table, %i nodes remain to be predicted" %(len(nodes_to_predict)) # Calculate accuracy of PICRUST for the given tree, sequenced genomes # and set of ndoes to predict accuracy_metrics = ['NSTI'] accuracy_metric_results = None if opts.output_accuracy_metrics: if opts.verbose: print "Calculating accuracy metrics: %s" %([",".join(accuracy_metrics)]) accuracy_metric_results = {} if 'NSTI' in accuracy_metrics: nsti_result,min_distances =\ calc_nearest_sequenced_taxon_index(tree,\ limit_to_tips = nodes_to_predict,\ trait_label = trait_label, verbose=opts.verbose) #accuracy_metric_results['NSTI'] = nsti_result for organism in min_distances.keys(): accuracy_metric_results[organism] = {'NSTI': min_distances[organism]} if opts.verbose: print "NSTI:", nsti_result #Write accuracy metrics to file if opts.verbose: print "Writing accuracy metrics to file:",opts.output_accuracy_metrics f = open(opts.output_accuracy_metrics,'w+') lines = ["metric\torganism\tvalue\n"] for organism in accuracy_metric_results.keys(): for metric in accuracy_metric_results[organism].keys(): lines.append('\t'.join([metric,organism,\ str(accuracy_metric_results[organism][metric])])+'\n') f.writelines(sorted(lines)) f.close() if opts.verbose: print "Generating predictions using method:",opts.prediction_method if opts.weighting_method == 'exponential': #For now, use exponential weighting weight_fn = make_neg_exponential_weight_fn(e) elif opts.weighting_method == 'linear': #Linear weight function weight_fn = linear_weight elif opts.weighting_method == 'equal_weight': weight_fn = equal_weight variances=None #Overwritten by methods that calc variance if opts.prediction_method == 'asr_and_weighting': if opts.reconstruction_confidence: # Perform predictions using reconstructed ancestral states predictions,variances =\ predict_traits_from_ancestors(tree,nodes_to_predict,\ trait_label=trait_label,\ lower_bound_trait_label="lower_bound",\ upper_bound_trait_label="upper_bound",\ calc_confidence_intervals = True,\ brownian_motion_parameter=brownian_motion_parameter,\ use_self_in_prediction = True,\ weight_fn =weight_fn,verbose=opts.verbose) else: predictions =\ predict_traits_from_ancestors(tree,nodes_to_predict,\ trait_label=trait_label,\ use_self_in_prediction = True,\ weight_fn =weight_fn,verbose=opts.verbose) elif opts.prediction_method == 'weighting_only': #Ignore ancestral information predictions =\ weighted_average_tip_prediction(tree,nodes_to_predict,\ trait_label=trait_label,\ use_self_in_prediction = True,\ weight_fn =weight_fn,verbose=opts.verbose) elif opts.prediction_method == 'nearest_neighbor': predictions = predict_nearest_neighbor(tree,nodes_to_predict,\ trait_label=trait_label,\ use_self_in_prediction = True, tips_only = True) elif opts.prediction_method == 'random_neighbor': predictions = predict_random_neighbor(tree,\ nodes_to_predict,trait_label=trait_label,\ use_self_in_prediction = True) else: error_template =\ "Prediction method '%s' is not supported. Valid methods are: %s'" error_text = error_template %(opts.prediction_method,\ ", ".join(METHOD_CHOICES)) if opts.verbose: print "Converting results to .biom format for output..." #convert to biom format (and transpose) biom_predictions=biom_table_from_predictions(predictions,table_headers) #In the .biom table, organisms are 'samples' and traits are 'observations #(by analogy with a metagenomic sample) #Therefore, we associate the trait variances with the per-observation metadata #print "variances:",variances #print "BIOM observations:", [o for o in biom_predictions.iterObservations()] #print "BIOM samples:", [s for s in biom_predictions.iterSamples()] if variances is not None: if opts.verbose: print "Adding variance information to output .biom table, as per-observation metadata with key 'variance'..." biom_predictions.addSampleMetadata(variances) if accuracy_metric_results is not None: if opts.verbose: print "Adding accuracy metrics (%s) to biom table as per-observation metadata..." %(",".join(accuracy_metrics)) biom_predictions.addSampleMetadata(accuracy_metric_results) #Add variance information as per observation metadata if opts.verbose: print "Writing biom format prediction results to file: ",opts.output_trait_table #write biom table to file make_output_dir_for_file(opts.output_trait_table) open(opts.output_trait_table,'w').write(\ format_biom_table(biom_predictions))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) #if we specify we want NSTI only then we have to calculate it first if opts.output_accuracy_metrics_only: opts.calculate_accuracy_metrics = True if opts.verbose: print "Loading tree from file:", opts.tree # Load Tree #tree = LoadTree(opts.tree) tree = load_picrust_tree(opts.tree, opts.verbose) table_headers = [] traits = {} #load the asr trait table using the previous list of functions to order the arrays if opts.reconstructed_trait_table: table_headers,traits =\ update_trait_dict_from_file(opts.reconstructed_trait_table) #Only load confidence intervals on the reconstruction #If we actually have ASR values in the analysis if opts.reconstruction_confidence: if opts.verbose: print "Loading ASR confidence data from file:",\ opts.reconstruction_confidence print "Assuming confidence data is of type:", opts.confidence_format asr_confidence_output = open(opts.reconstruction_confidence) asr_min_vals,asr_max_vals, params,column_mapping =\ parse_asr_confidence_output(asr_confidence_output,format=opts.confidence_format) if 'sigma' in params: brownian_motion_parameter = params['sigma'][0] else: brownian_motion_parameter = None if opts.verbose: print "Done. Loaded %i confidence interval values." % ( len(asr_max_vals)) print "Brownian motion parameter:", brownian_motion_parameter else: brownian_motion_parameter = None #load the trait table into a dict with organism names as keys and arrays as functions table_headers,genome_traits =\ update_trait_dict_from_file(opts.observed_trait_table,table_headers) #Combine the trait tables overwriting the asr ones if they exist in the genome trait table. traits.update(genome_traits) # Specify the attribute where we'll store the reconstructions trait_label = "Reconstruction" if opts.verbose: print "Assigning traits to tree..." # Decorate tree using the traits tree = assign_traits_to_tree(traits, tree, trait_label=trait_label) if opts.reconstruction_confidence: if opts.verbose: print "Assigning trait confidence intervals to tree..." tree = assign_traits_to_tree(asr_min_vals,tree,\ trait_label="lower_bound") tree = assign_traits_to_tree(asr_max_vals,tree,\ trait_label="upper_bound") if brownian_motion_parameter is None: if opts.verbose: print "No Brownian motion parameters loaded. Inferring these from 95% confidence intervals..." brownian_motion_parameter = get_brownian_motion_param_from_confidence_intervals(tree,\ upper_bound_trait_label="upper_bound",\ lower_bound_trait_label="lower_bound",\ trait_label=trait_label,\ confidence=0.95) if opts.verbose: print "Inferred the following rate parameters:", brownian_motion_parameter if opts.verbose: print "Collecting list of nodes to predict..." #Start by predict all tip nodes. nodes_to_predict = [tip.Name for tip in tree.tips()] if opts.verbose: print "Found %i nodes to predict." % len(nodes_to_predict) if opts.limit_predictions_to_organisms: organism_id_str = opts.limit_predictions_to_organisms ok_organism_ids = organism_id_str.split(',') ok_organism_ids = [n.strip() for n in ok_organism_ids] for f in set_label_conversion_fns(True, True): ok_organism_ids = [f(i) for i in ok_organism_ids] if opts.verbose: print "Limiting predictions to user-specified ids:",\ ",".join(ok_organism_ids) if not ok_organism_ids: raise RuntimeError(\ "Found no valid ids in input: %s. Were comma-separated ids specified on the command line?"\ % opts.limit_predictions_to_organisms) nodes_to_predict =\ [n for n in nodes_to_predict if n in ok_organism_ids] if not nodes_to_predict: raise RuntimeError(\ "Filtering by user-specified ids resulted in an empty set of nodes to predict. Are the ids on the commmand-line and tree ids in the same format? Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],ok_organism_ids[0])) if opts.verbose: print "After filtering organisms to predict by the ids specified on the commandline, %i nodes remain to be predicted" % ( len(nodes_to_predict)) if opts.limit_predictions_by_otu_table: if opts.verbose: print "Limiting predictions to ids in user-specified OTU table:",\ opts.limit_predictions_by_otu_table otu_table = open(opts.limit_predictions_by_otu_table, "U") #Parse OTU table for ids otu_ids =\ extract_ids_from_table(otu_table.readlines(),delimiter="\t") if not otu_ids: raise RuntimeError(\ "Found no valid ids in input OTU table: %s. Is the path correct?"\ % opts.limit_predictions_by_otu_table) nodes_to_predict =\ [n for n in nodes_to_predict if n in otu_ids] if not nodes_to_predict: raise RuntimeError(\ "Filtering by OTU table resulted in an empty set of nodes to predict. Are the OTU ids and tree ids in the same format? Example tree tip name: %s, example OTU id name: %s" %([tip.Name for tip in tree.tips()][0],otu_ids[0])) if opts.verbose: print "After filtering by OTU table, %i nodes remain to be predicted" % ( len(nodes_to_predict)) # Calculate accuracy of PICRUST for the given tree, sequenced genomes # and set of ndoes to predict accuracy_metrics = ['NSTI'] accuracy_metric_results = None if opts.calculate_accuracy_metrics: if opts.verbose: print "Calculating accuracy metrics: %s" % ( [",".join(accuracy_metrics)]) accuracy_metric_results = {} if 'NSTI' in accuracy_metrics: nsti_result,min_distances =\ calc_nearest_sequenced_taxon_index(tree,\ limit_to_tips = nodes_to_predict,\ trait_label = trait_label, verbose=opts.verbose) #accuracy_metric_results['NSTI'] = nsti_result for organism in min_distances.keys(): accuracy_metric_results[organism] = { 'NSTI': min_distances[organism] } if opts.verbose: print "NSTI:", nsti_result if opts.output_accuracy_metrics_only: #Write accuracy metrics to file if opts.verbose: print "Writing accuracy metrics to file:", opts.output_accuracy_metrics f = open(opts.output_accuracy_metrics_only, 'w+') f.write("metric\torganism\tvalue\n") lines = [] for organism in accuracy_metric_results.keys(): for metric in accuracy_metric_results[organism].keys(): lines.append('\t'.join([metric,organism,\ str(accuracy_metric_results[organism][metric])])+'\n') f.writelines(sorted(lines)) f.close() exit() if opts.verbose: print "Generating predictions using method:", opts.prediction_method if opts.weighting_method == 'exponential': #For now, use exponential weighting weight_fn = make_neg_exponential_weight_fn(e) variances = None #Overwritten by methods that calc variance confidence_intervals = None #Overwritten by methods that calc variance if opts.prediction_method == 'asr_and_weighting': # Perform predictions using reconstructed ancestral states if opts.reconstruction_confidence: predictions,variances,confidence_intervals =\ predict_traits_from_ancestors(tree,nodes_to_predict,\ trait_label=trait_label,\ lower_bound_trait_label="lower_bound",\ upper_bound_trait_label="upper_bound",\ calc_confidence_intervals = True,\ brownian_motion_parameter=brownian_motion_parameter,\ weight_fn =weight_fn,verbose=opts.verbose) else: predictions =\ predict_traits_from_ancestors(tree,nodes_to_predict,\ trait_label=trait_label,\ weight_fn =weight_fn,verbose=opts.verbose) elif opts.prediction_method == 'weighting_only': #Ignore ancestral information predictions =\ weighted_average_tip_prediction(tree,nodes_to_predict,\ trait_label=trait_label,\ weight_fn =weight_fn,verbose=opts.verbose) elif opts.prediction_method == 'nearest_neighbor': predictions = predict_nearest_neighbor(tree,nodes_to_predict,\ trait_label=trait_label,tips_only = True) elif opts.prediction_method == 'random_neighbor': predictions = predict_random_neighbor(tree,\ nodes_to_predict,trait_label=trait_label) if opts.verbose: print "Done making predictions." make_output_dir_for_file(opts.output_trait_table) out_fh = open(opts.output_trait_table, 'w') #Generate the table of biom predictions if opts.verbose: print "Converting results to .biom format for output..." biom_predictions=biom_table_from_predictions(predictions,table_headers,\ observation_metadata=None,\ sample_metadata=accuracy_metric_results,convert_to_int=False) if opts.verbose: print "Writing prediction results to file: ", opts.output_trait_table if opts.output_precalc_file_in_biom: #write biom table to file write_biom_table(biom_predictions, opts.output_trait_table) else: #convert to precalc (tab-delimited) format out_fh = open(opts.output_trait_table, 'w') out_fh.write(convert_biom_to_precalc(biom_predictions)) out_fh.close() #Write out variance information to file if variances: if opts.verbose: print "Converting variances to BIOM format" if opts.output_precalc_file_in_biom: suffix = '.biom' else: suffix = '.tab' biom_prediction_variances=biom_table_from_predictions({k:v['variance'] for k,v in variances.iteritems()},table_headers,\ observation_metadata=None,\ sample_metadata=None,convert_to_int=False) outfile_base, extension = splitext(opts.output_trait_table) variance_outfile = outfile_base + "_variances" + suffix make_output_dir_for_file(variance_outfile) if opts.verbose: print "Writing variance information to file:", variance_outfile if opts.output_precalc_file_in_biom: write_biom_table(biom_prediction_variances, variance_outfile) else: open(variance_outfile,'w').write(\ convert_biom_to_precalc(biom_prediction_variances)) if confidence_intervals: if opts.verbose: print "Converting upper confidence interval values to BIOM format" biom_prediction_upper_CI=biom_table_from_predictions({k:v['upper_CI'] for k,v in confidence_intervals.iteritems()},table_headers,\ observation_metadata=None,\ sample_metadata=None,convert_to_int=False) outfile_base, extension = splitext(opts.output_trait_table) upper_CI_outfile = outfile_base + "_upper_CI" + suffix make_output_dir_for_file(upper_CI_outfile) if opts.verbose: print "Writing upper confidence limit information to file:", upper_CI_outfile if opts.output_precalc_file_in_biom: write_biom_table(biom_prediction_upper_CI, upper_CI_outfile) else: open(upper_CI_outfile,'w').write(\ convert_biom_to_precalc(biom_prediction_upper_CI)) biom_prediction_lower_CI=biom_table_from_predictions({k:v['lower_CI'] for k,v in confidence_intervals.iteritems()},table_headers,\ observation_metadata=None,\ sample_metadata=None,convert_to_int=False) outfile_base, extension = splitext(opts.output_trait_table) lower_CI_outfile = outfile_base + "_lower_CI" + suffix make_output_dir_for_file(lower_CI_outfile) if opts.verbose: print "Writing lower confidence limit information to file", lower_CI_outfile if opts.output_precalc_file_in_biom: write_biom_table(biom_prediction_lower_CI, lower_CI_outfile) else: open(lower_CI_outfile,'w').write(\ convert_biom_to_precalc(biom_prediction_lower_CI))