def process_snp(snp, full_tree_string): if sum(1 for x in snp.column if x) < 7: return '\t'.join(str(x) for x in (snp.variant_id, '*', '*')) pruned_tree = snp.get_pruned_tree(full_tree_string) # define the map from the taxon to the amino acid taxon_aa_pairs = zip(g_ordered_taxon_names, snp.column) taxon_to_aa_letter = dict((t, aa) for t, aa in taxon_aa_pairs if aa) # get the weights of the taxa taxon_weight_pairs = LeafWeights.get_stone_weights(pruned_tree) # calculate the standardized physicochemical property table standardized_property_array = MAPP.get_standardized_property_array( MAPP.g_property_array) # calculate the physicochemical property correlation matrix correlation_matrix = MAPP.get_property_correlation_matrix( standardized_property_array) # estimate the amino acid distribution for the column, # taking into account the tree and a uniform prior. weights = [] aa_indices = [] for taxon, weight in taxon_weight_pairs: weights.append(weight) aa_index = aa_letter_to_aa_index(taxon_to_aa_letter[taxon]) aa_indices.append(aa_index) aa_distribution = MAPP.estimate_aa_distribution(weights, aa_indices) # estimate the mean and variance of each physicochemical property est_pc_means = MAPP.estimate_property_means( standardized_property_array, aa_distribution) est_pc_variances = MAPP.estimate_property_variances( standardized_property_array, aa_distribution) # calculate the deviation from each property mean # for each possible amino acid deviations = MAPP.get_deviations( est_pc_means, est_pc_variances, standardized_property_array) # calculate the impact scores impact_scores = MAPP.get_impact_scores(correlation_matrix, deviations) # calculate the p-values p_values = [] for score in impact_scores: ntaxa = len(taxon_weight_pairs) p_values.append(MAPP.get_p_value(score, ntaxa)) # show the impact score and p-value for the mutant letter_to_impact = dict(zip(Codon.g_aa_letters, impact_scores)) letter_to_pvalue = dict(zip(Codon.g_aa_letters, p_values)) impact = letter_to_impact[snp.mutant_aa] pvalue = letter_to_pvalue[snp.mutant_aa] return '\t'.join(str(x) for x in (snp.variant_id, impact, pvalue))
def process_snp(snp, full_tree_string): if sum(1 for x in snp.column if x) < 7: return '\t'.join(str(x) for x in (snp.variant_id, '*', '*')) pruned_tree = snp.get_pruned_tree(full_tree_string) # define the map from the taxon to the amino acid taxon_aa_pairs = zip(g_ordered_taxon_names, snp.column) taxon_to_aa_letter = dict((t, aa) for t, aa in taxon_aa_pairs if aa) # get the weights of the taxa taxon_weight_pairs = LeafWeights.get_stone_weights(pruned_tree) # calculate the standardized physicochemical property table standardized_property_array = MAPP.get_standardized_property_array( MAPP.g_property_array) # calculate the physicochemical property correlation matrix correlation_matrix = MAPP.get_property_correlation_matrix( standardized_property_array) # estimate the amino acid distribution for the column, # taking into account the tree and a uniform prior. weights = [] aa_indices = [] for taxon, weight in taxon_weight_pairs: weights.append(weight) aa_index = aa_letter_to_aa_index(taxon_to_aa_letter[taxon]) aa_indices.append(aa_index) aa_distribution = MAPP.estimate_aa_distribution(weights, aa_indices) # estimate the mean and variance of each physicochemical property est_pc_means = MAPP.estimate_property_means(standardized_property_array, aa_distribution) est_pc_variances = MAPP.estimate_property_variances( standardized_property_array, aa_distribution) # calculate the deviation from each property mean # for each possible amino acid deviations = MAPP.get_deviations(est_pc_means, est_pc_variances, standardized_property_array) # calculate the impact scores impact_scores = MAPP.get_impact_scores(correlation_matrix, deviations) # calculate the p-values p_values = [] for score in impact_scores: ntaxa = len(taxon_weight_pairs) p_values.append(MAPP.get_p_value(score, ntaxa)) # show the impact score and p-value for the mutant letter_to_impact = dict(zip(Codon.g_aa_letters, impact_scores)) letter_to_pvalue = dict(zip(Codon.g_aa_letters, p_values)) impact = letter_to_impact[snp.mutant_aa] pvalue = letter_to_pvalue[snp.mutant_aa] return '\t'.join(str(x) for x in (snp.variant_id, impact, pvalue))
def get_response_content(fs): # start writing the html response out = StringIO() print >> out, '<html>' print >> out, '<body>' # get the tree and the column sent by the user pruned_tree, taxon_to_aa_letter = get_tree_and_column(fs) # get the weights of the taxa taxon_weight_pairs = LeafWeights.get_stone_weights(pruned_tree) # show the raw physicochemical property table if fs.show_raw_pc_table: print >> out, 'raw physicochemical property table:' print >> out, '<br/>' col_labels = MAPP.g_property_names row_labels = Codon.g_aa_letters table = MAPP.g_property_array print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # calculate the standardized physicochemical property table standardized_property_array = MAPP.get_standardized_property_array( MAPP.g_property_array) # show the standardized physicochemical property table if fs.show_standardized_pc_table: print >> out, 'standardized physicochemical property table:' print >> out, '<br/>' col_labels = MAPP.g_property_names row_labels = Codon.g_aa_letters table = standardized_property_array print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # calculate the physicochemical property correlation matrix correlation_matrix = MAPP.get_property_correlation_matrix( standardized_property_array) # show the physicochemical property correlation matrix if fs.show_pc_correlation_matrix: print >> out, 'physicochemical property correlation matrix:' print >> out, '<br/>' col_labels = MAPP.g_property_names row_labels = MAPP.g_property_names table = correlation_matrix print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # show the pruned tree if fs.show_tree: tree_string = NewickIO.get_narrow_newick_string(pruned_tree, 80) lines = StringIO(tree_string).readlines() lines = [line.rstrip() for line in lines] print >> out, 'pruned phylogenetic tree in newick format:' print >> out, '<pre>' for line in lines: print >> out, cgi.escape(line) print >> out, '</pre>' print >> out, '<br/>' # show the weights if fs.show_weights: taxa, weights = zip(*taxon_weight_pairs) table = [weights] row_labels = ['weight'] col_labels = taxa print >> out, 'taxon weights:' print >> out, '<br/>' print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # estimate the amino acid distribution for the column, # taking into account the tree and a uniform prior. weights = [] aa_indices = [] for taxon, weight in taxon_weight_pairs: weights.append(weight) aa_indices.append(aa_letter_to_aa_index(taxon_to_aa_letter[taxon])) aa_distribution = MAPP.estimate_aa_distribution(weights, aa_indices) # show the estimated amino acid distribution if fs.show_aa_distribution: table = [aa_distribution] row_labels = ['weight'] col_labels = Codon.g_aa_letters print >> out, 'estimated amino acid distribution:' print >> out, '<br/>' print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # estimate the mean and variance of each physicochemical property est_pc_means = MAPP.estimate_property_means( standardized_property_array, aa_distribution) est_pc_variances = MAPP.estimate_property_variances( standardized_property_array, aa_distribution) # show the estimated mean and variance of each physicochemical property if fs.show_pc_distribution: table = [est_pc_means, est_pc_variances] row_labels = ['mean', 'variance'] col_labels = MAPP.g_property_names print >> out, 'estimated physicochemical property moments:' print >> out, '<br/>' print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # calculate the deviation from each property mean # for each possible amino acid deviations = MAPP.get_deviations( est_pc_means, est_pc_variances, standardized_property_array) # show the deviation from each property mean for each possible amino acid if fs.show_deviations: print >> out, 'deviations of amino acids from the normal distribution' print >> out, 'estimated for each property:' print >> out, '<br/>' col_labels = MAPP.g_property_names row_labels = Codon.g_aa_letters table = deviations print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # calculate the impact scores impact_scores = MAPP.get_impact_scores(correlation_matrix, deviations) # show the impact scores if fs.show_impact_scores: table = [impact_scores] row_labels = ['impact'] col_labels = Codon.g_aa_letters print >> out, 'impact scores:' print >> out, '<br/>' print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # calculate the p-values p_values = [] for score in impact_scores: ntaxa = len(taxon_weight_pairs) p_values.append(MAPP.get_p_value(score, ntaxa)) # show the p-values if fs.show_p_values: table = [p_values] row_labels = ['p-value'] col_labels = Codon.g_aa_letters print >> out, 'p-values:' print >> out, '<br/>' print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # write the html footer print >> out, '</body>' print >> out, '</html>' # return the response return out.getvalue()
def get_response_content(fs): # get the upper case amino acid letter aa_of_interest = fs.aminoacid.upper() if aa_of_interest not in Codon.g_aa_letters: raise HandlingError('invalid amino acid: ' + fs.aminoacid) # get the newick tree tree = NewickIO.parse(fs.tree, Newick.NewickTree) # get the alignment out = StringIO() try: finder = KGEA.Finder(g_index_dir, g_valid_chromosome_strings_pathname, g_fasta_dir) # note that some of these amino acids can be gaps taxon_aa_pairs = list( finder.gen_taxon_aa_pairs(fs.chromosome, fs.position)) lines = [taxon + '\t' + aa for taxon, aa in taxon_aa_pairs] if taxon_aa_pairs: # get the map from the taxon to the amino acid taxon_to_aa_letter = dict((taxon, aa) for taxon, aa in taxon_aa_pairs if aa in Codon.g_aa_letters) selected_taxa = set(taxon_to_aa_letter) ntaxa = len(selected_taxa) # assert that we have enough taxa # to calculate the p-value from the MAPP statistic mintaxa = 7 if ntaxa < mintaxa: raise HandlingError( 'this column has only %d aligned amino acids but we want at least %d' % (ntaxa, mintaxa)) # modify the tree so that we keep only the taxa of interest modify_tree(tree, selected_taxa) # get the taxon weights from the tree taxon_weight_pairs = LeafWeights.get_stone_weights(tree) # calculate the standardized physicochemical property table standardized_property_array = MAPP.get_standardized_property_array( MAPP.g_property_array) # calculate the physicochemical property correlation matrix correlation_matrix = MAPP.get_property_correlation_matrix( standardized_property_array) # estimate the amino acid distribution for the column weights = [] aa_indices = [] for taxon, weight in taxon_weight_pairs: weights.append(weight) aa_indices.append( aa_letter_to_aa_index(taxon_to_aa_letter[taxon])) aa_distribution = MAPP.estimate_aa_distribution( weights, aa_indices) # estimate the mean and variance of each physicochemical property est_pc_means = MAPP.estimate_property_means( standardized_property_array, aa_distribution) est_pc_variances = MAPP.estimate_property_variances( standardized_property_array, aa_distribution) # calculate the deviation from each property mean for each possible amino acid deviations = MAPP.get_deviations(est_pc_means, est_pc_variances, standardized_property_array) # calculate the impact scores impact_scores = MAPP.get_impact_scores(correlation_matrix, deviations) # calculate the p-values p_values = [ MAPP.get_p_value(score, ntaxa) for score in impact_scores ] # show the p-value of the amino acid of interest print >> out, 'MAPP p-value:' print >> out, p_values[aa_letter_to_aa_index(aa_of_interest)] print >> out # show the MAPP statistic of the amino acid of interest print >> out, 'MAPP statistic:' print >> out, impact_scores[aa_letter_to_aa_index(aa_of_interest)] print >> out # show the aligned column print >> out, 'aligned column:' print >> out, '\n'.join(lines) else: print >> out, 'no aligned amino acids were found at this position' except KGEA.KGEAError as e: print >> out, e return out.getvalue()
def get_response_content(fs): # get the upper case amino acid letter aa_of_interest = fs.aminoacid.upper() if aa_of_interest not in Codon.g_aa_letters: raise HandlingError('invalid amino acid: ' + fs.aminoacid) # get the newick tree tree = NewickIO.parse(fs.tree, Newick.NewickTree) # get the alignment out = StringIO() try: finder = KGEA.Finder( g_index_dir, g_valid_chromosome_strings_pathname, g_fasta_dir) # note that some of these amino acids can be gaps taxon_aa_pairs = list( finder.gen_taxon_aa_pairs(fs.chromosome, fs.position)) lines = [taxon + '\t' + aa for taxon, aa in taxon_aa_pairs] if taxon_aa_pairs: # get the map from the taxon to the amino acid taxon_to_aa_letter = dict((taxon, aa) for taxon, aa in taxon_aa_pairs if aa in Codon.g_aa_letters) selected_taxa = set(taxon_to_aa_letter) ntaxa = len(selected_taxa) # assert that we have enough taxa # to calculate the p-value from the MAPP statistic mintaxa = 7 if ntaxa < mintaxa: raise HandlingError('this column has only %d aligned amino acids but we want at least %d' % (ntaxa, mintaxa)) # modify the tree so that we keep only the taxa of interest modify_tree(tree, selected_taxa) # get the taxon weights from the tree taxon_weight_pairs = LeafWeights.get_stone_weights(tree) # calculate the standardized physicochemical property table standardized_property_array = MAPP.get_standardized_property_array(MAPP.g_property_array) # calculate the physicochemical property correlation matrix correlation_matrix = MAPP.get_property_correlation_matrix(standardized_property_array) # estimate the amino acid distribution for the column weights = [] aa_indices = [] for taxon, weight in taxon_weight_pairs: weights.append(weight) aa_indices.append(aa_letter_to_aa_index(taxon_to_aa_letter[taxon])) aa_distribution = MAPP.estimate_aa_distribution(weights, aa_indices) # estimate the mean and variance of each physicochemical property est_pc_means = MAPP.estimate_property_means(standardized_property_array, aa_distribution) est_pc_variances = MAPP.estimate_property_variances(standardized_property_array, aa_distribution) # calculate the deviation from each property mean for each possible amino acid deviations = MAPP.get_deviations(est_pc_means, est_pc_variances, standardized_property_array) # calculate the impact scores impact_scores = MAPP.get_impact_scores(correlation_matrix, deviations) # calculate the p-values p_values = [MAPP.get_p_value(score, ntaxa) for score in impact_scores] # show the p-value of the amino acid of interest print >> out, 'MAPP p-value:' print >> out, p_values[aa_letter_to_aa_index(aa_of_interest)] print >> out # show the MAPP statistic of the amino acid of interest print >> out, 'MAPP statistic:' print >> out, impact_scores[aa_letter_to_aa_index(aa_of_interest)] print >> out # show the aligned column print >> out, 'aligned column:' print >> out, '\n'.join(lines) else: print >> out, 'no aligned amino acids were found at this position' except KGEA.KGEAError as e: print >> out, e return out.getvalue()
def get_response_content(fs): # start writing the html response out = StringIO() print >> out, '<html>' print >> out, '<body>' # get the tree and the column sent by the user pruned_tree, taxon_to_aa_letter = get_tree_and_column(fs) # get the weights of the taxa taxon_weight_pairs = LeafWeights.get_stone_weights(pruned_tree) # show the raw physicochemical property table if fs.show_raw_pc_table: print >> out, 'raw physicochemical property table:' print >> out, '<br/>' col_labels = MAPP.g_property_names row_labels = Codon.g_aa_letters table = MAPP.g_property_array print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # calculate the standardized physicochemical property table standardized_property_array = MAPP.get_standardized_property_array( MAPP.g_property_array) # show the standardized physicochemical property table if fs.show_standardized_pc_table: print >> out, 'standardized physicochemical property table:' print >> out, '<br/>' col_labels = MAPP.g_property_names row_labels = Codon.g_aa_letters table = standardized_property_array print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # calculate the physicochemical property correlation matrix correlation_matrix = MAPP.get_property_correlation_matrix( standardized_property_array) # show the physicochemical property correlation matrix if fs.show_pc_correlation_matrix: print >> out, 'physicochemical property correlation matrix:' print >> out, '<br/>' col_labels = MAPP.g_property_names row_labels = MAPP.g_property_names table = correlation_matrix print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # show the pruned tree if fs.show_tree: tree_string = NewickIO.get_narrow_newick_string(pruned_tree, 80) lines = StringIO(tree_string).readlines() lines = [line.rstrip() for line in lines] print >> out, 'pruned phylogenetic tree in newick format:' print >> out, '<pre>' for line in lines: print >> out, cgi.escape(line) print >> out, '</pre>' print >> out, '<br/>' # show the weights if fs.show_weights: taxa, weights = zip(*taxon_weight_pairs) table = [weights] row_labels = ['weight'] col_labels = taxa print >> out, 'taxon weights:' print >> out, '<br/>' print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # estimate the amino acid distribution for the column, # taking into account the tree and a uniform prior. weights = [] aa_indices = [] for taxon, weight in taxon_weight_pairs: weights.append(weight) aa_indices.append(aa_letter_to_aa_index(taxon_to_aa_letter[taxon])) aa_distribution = MAPP.estimate_aa_distribution(weights, aa_indices) # show the estimated amino acid distribution if fs.show_aa_distribution: table = [aa_distribution] row_labels = ['weight'] col_labels = Codon.g_aa_letters print >> out, 'estimated amino acid distribution:' print >> out, '<br/>' print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # estimate the mean and variance of each physicochemical property est_pc_means = MAPP.estimate_property_means(standardized_property_array, aa_distribution) est_pc_variances = MAPP.estimate_property_variances( standardized_property_array, aa_distribution) # show the estimated mean and variance of each physicochemical property if fs.show_pc_distribution: table = [est_pc_means, est_pc_variances] row_labels = ['mean', 'variance'] col_labels = MAPP.g_property_names print >> out, 'estimated physicochemical property moments:' print >> out, '<br/>' print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # calculate the deviation from each property mean # for each possible amino acid deviations = MAPP.get_deviations(est_pc_means, est_pc_variances, standardized_property_array) # show the deviation from each property mean for each possible amino acid if fs.show_deviations: print >> out, 'deviations of amino acids from the normal distribution' print >> out, 'estimated for each property:' print >> out, '<br/>' col_labels = MAPP.g_property_names row_labels = Codon.g_aa_letters table = deviations print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # calculate the impact scores impact_scores = MAPP.get_impact_scores(correlation_matrix, deviations) # show the impact scores if fs.show_impact_scores: table = [impact_scores] row_labels = ['impact'] col_labels = Codon.g_aa_letters print >> out, 'impact scores:' print >> out, '<br/>' print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # calculate the p-values p_values = [] for score in impact_scores: ntaxa = len(taxon_weight_pairs) p_values.append(MAPP.get_p_value(score, ntaxa)) # show the p-values if fs.show_p_values: table = [p_values] row_labels = ['p-value'] col_labels = Codon.g_aa_letters print >> out, 'p-values:' print >> out, '<br/>' print >> out, HtmlTable.get_labeled_table_string( col_labels, row_labels, table) print >> out, '<br/><br/>' # write the html footer print >> out, '</body>' print >> out, '</html>' # return the response return out.getvalue()