Beispiel #1
0
def process_snp(snp, full_tree_string):
    if sum(1 for x in snp.column if x) < 7:
        return '\t'.join(str(x) for x in (snp.variant_id, '*', '*'))
    pruned_tree = snp.get_pruned_tree(full_tree_string)
    # define the map from the taxon to the amino acid
    taxon_aa_pairs = zip(g_ordered_taxon_names, snp.column)
    taxon_to_aa_letter = dict((t, aa) for t, aa in taxon_aa_pairs if aa)
    # get the weights of the taxa
    taxon_weight_pairs = LeafWeights.get_stone_weights(pruned_tree)
    # calculate the standardized physicochemical property table
    standardized_property_array = MAPP.get_standardized_property_array(
            MAPP.g_property_array)
    # calculate the physicochemical property correlation matrix
    correlation_matrix = MAPP.get_property_correlation_matrix(
            standardized_property_array)
    # estimate the amino acid distribution for the column,
    # taking into account the tree and a uniform prior.
    weights = []
    aa_indices = []
    for taxon, weight in taxon_weight_pairs:
        weights.append(weight)
        aa_index = aa_letter_to_aa_index(taxon_to_aa_letter[taxon])
        aa_indices.append(aa_index)
    aa_distribution = MAPP.estimate_aa_distribution(weights, aa_indices)
    # estimate the mean and variance of each physicochemical property
    est_pc_means = MAPP.estimate_property_means(
            standardized_property_array, aa_distribution)
    est_pc_variances = MAPP.estimate_property_variances(
            standardized_property_array, aa_distribution)
    # calculate the deviation from each property mean
    # for each possible amino acid
    deviations = MAPP.get_deviations(
            est_pc_means, est_pc_variances, standardized_property_array)
    # calculate the impact scores
    impact_scores = MAPP.get_impact_scores(correlation_matrix, deviations)
    # calculate the p-values
    p_values = []
    for score in impact_scores:
        ntaxa = len(taxon_weight_pairs)
        p_values.append(MAPP.get_p_value(score, ntaxa))
    # show the impact score and p-value for the mutant
    letter_to_impact = dict(zip(Codon.g_aa_letters, impact_scores))
    letter_to_pvalue = dict(zip(Codon.g_aa_letters, p_values))
    impact = letter_to_impact[snp.mutant_aa]
    pvalue = letter_to_pvalue[snp.mutant_aa]
    return '\t'.join(str(x) for x in (snp.variant_id, impact, pvalue))
Beispiel #2
0
def process_snp(snp, full_tree_string):
    if sum(1 for x in snp.column if x) < 7:
        return '\t'.join(str(x) for x in (snp.variant_id, '*', '*'))
    pruned_tree = snp.get_pruned_tree(full_tree_string)
    # define the map from the taxon to the amino acid
    taxon_aa_pairs = zip(g_ordered_taxon_names, snp.column)
    taxon_to_aa_letter = dict((t, aa) for t, aa in taxon_aa_pairs if aa)
    # get the weights of the taxa
    taxon_weight_pairs = LeafWeights.get_stone_weights(pruned_tree)
    # calculate the standardized physicochemical property table
    standardized_property_array = MAPP.get_standardized_property_array(
        MAPP.g_property_array)
    # calculate the physicochemical property correlation matrix
    correlation_matrix = MAPP.get_property_correlation_matrix(
        standardized_property_array)
    # estimate the amino acid distribution for the column,
    # taking into account the tree and a uniform prior.
    weights = []
    aa_indices = []
    for taxon, weight in taxon_weight_pairs:
        weights.append(weight)
        aa_index = aa_letter_to_aa_index(taxon_to_aa_letter[taxon])
        aa_indices.append(aa_index)
    aa_distribution = MAPP.estimate_aa_distribution(weights, aa_indices)
    # estimate the mean and variance of each physicochemical property
    est_pc_means = MAPP.estimate_property_means(standardized_property_array,
                                                aa_distribution)
    est_pc_variances = MAPP.estimate_property_variances(
        standardized_property_array, aa_distribution)
    # calculate the deviation from each property mean
    # for each possible amino acid
    deviations = MAPP.get_deviations(est_pc_means, est_pc_variances,
                                     standardized_property_array)
    # calculate the impact scores
    impact_scores = MAPP.get_impact_scores(correlation_matrix, deviations)
    # calculate the p-values
    p_values = []
    for score in impact_scores:
        ntaxa = len(taxon_weight_pairs)
        p_values.append(MAPP.get_p_value(score, ntaxa))
    # show the impact score and p-value for the mutant
    letter_to_impact = dict(zip(Codon.g_aa_letters, impact_scores))
    letter_to_pvalue = dict(zip(Codon.g_aa_letters, p_values))
    impact = letter_to_impact[snp.mutant_aa]
    pvalue = letter_to_pvalue[snp.mutant_aa]
    return '\t'.join(str(x) for x in (snp.variant_id, impact, pvalue))
Beispiel #3
0
def get_response_content(fs):
    # start writing the html response
    out = StringIO()
    print >> out, '<html>'
    print >> out, '<body>'
    # get the tree and the column sent by the user
    pruned_tree, taxon_to_aa_letter = get_tree_and_column(fs)
    # get the weights of the taxa
    taxon_weight_pairs = LeafWeights.get_stone_weights(pruned_tree)
    # show the raw physicochemical property table
    if fs.show_raw_pc_table:
        print >> out, 'raw physicochemical property table:'
        print >> out, '<br/>'
        col_labels = MAPP.g_property_names
        row_labels = Codon.g_aa_letters
        table = MAPP.g_property_array
        print >> out, HtmlTable.get_labeled_table_string(
                col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # calculate the standardized physicochemical property table
    standardized_property_array = MAPP.get_standardized_property_array(
            MAPP.g_property_array)
    # show the standardized physicochemical property table
    if fs.show_standardized_pc_table:
        print >> out, 'standardized physicochemical property table:'
        print >> out, '<br/>'
        col_labels = MAPP.g_property_names
        row_labels = Codon.g_aa_letters
        table = standardized_property_array
        print >> out, HtmlTable.get_labeled_table_string(
                col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # calculate the physicochemical property correlation matrix
    correlation_matrix = MAPP.get_property_correlation_matrix(
            standardized_property_array)
    # show the physicochemical property correlation matrix
    if fs.show_pc_correlation_matrix:
        print >> out, 'physicochemical property correlation matrix:'
        print >> out, '<br/>'
        col_labels = MAPP.g_property_names
        row_labels = MAPP.g_property_names
        table = correlation_matrix
        print >> out, HtmlTable.get_labeled_table_string(
                col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # show the pruned tree
    if fs.show_tree:
        tree_string = NewickIO.get_narrow_newick_string(pruned_tree, 80)
        lines = StringIO(tree_string).readlines()
        lines = [line.rstrip() for line in lines]
        print >> out, 'pruned phylogenetic tree in newick format:'
        print >> out, '<pre>'
        for line in lines:
            print >> out, cgi.escape(line)
        print >> out, '</pre>'
        print >> out, '<br/>'
    # show the weights
    if fs.show_weights:
        taxa, weights = zip(*taxon_weight_pairs)
        table = [weights]
        row_labels = ['weight']
        col_labels = taxa
        print >> out, 'taxon weights:'
        print >> out, '<br/>'
        print >> out, HtmlTable.get_labeled_table_string(
                col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # estimate the amino acid distribution for the column,
    # taking into account the tree and a uniform prior.
    weights = []
    aa_indices = []
    for taxon, weight in taxon_weight_pairs:
        weights.append(weight)
        aa_indices.append(aa_letter_to_aa_index(taxon_to_aa_letter[taxon]))
    aa_distribution = MAPP.estimate_aa_distribution(weights, aa_indices)
    # show the estimated amino acid distribution
    if fs.show_aa_distribution:
        table = [aa_distribution]
        row_labels = ['weight']
        col_labels = Codon.g_aa_letters
        print >> out, 'estimated amino acid distribution:'
        print >> out, '<br/>'
        print >> out, HtmlTable.get_labeled_table_string(
                col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # estimate the mean and variance of each physicochemical property
    est_pc_means = MAPP.estimate_property_means(
            standardized_property_array, aa_distribution)
    est_pc_variances = MAPP.estimate_property_variances(
            standardized_property_array, aa_distribution)
    # show the estimated mean and variance of each physicochemical property
    if fs.show_pc_distribution:
        table = [est_pc_means, est_pc_variances]
        row_labels = ['mean', 'variance']
        col_labels = MAPP.g_property_names
        print >> out, 'estimated physicochemical property moments:'
        print >> out, '<br/>'
        print >> out, HtmlTable.get_labeled_table_string(
                col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # calculate the deviation from each property mean
    # for each possible amino acid
    deviations = MAPP.get_deviations(
            est_pc_means, est_pc_variances, standardized_property_array)
    # show the deviation from each property mean for each possible amino acid
    if fs.show_deviations:
        print >> out, 'deviations of amino acids from the normal distribution'
        print >> out, 'estimated for each property:'
        print >> out, '<br/>'
        col_labels = MAPP.g_property_names
        row_labels = Codon.g_aa_letters
        table = deviations
        print >> out, HtmlTable.get_labeled_table_string(
                col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # calculate the impact scores
    impact_scores = MAPP.get_impact_scores(correlation_matrix, deviations)
    # show the impact scores
    if fs.show_impact_scores:
        table = [impact_scores]
        row_labels = ['impact']
        col_labels = Codon.g_aa_letters
        print >> out, 'impact scores:'
        print >> out, '<br/>'
        print >> out, HtmlTable.get_labeled_table_string(
                col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # calculate the p-values
    p_values = []
    for score in impact_scores:
        ntaxa = len(taxon_weight_pairs)
        p_values.append(MAPP.get_p_value(score, ntaxa))
    # show the p-values
    if fs.show_p_values:
        table = [p_values]
        row_labels = ['p-value']
        col_labels = Codon.g_aa_letters
        print >> out, 'p-values:'
        print >> out, '<br/>'
        print >> out, HtmlTable.get_labeled_table_string(
                col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # write the html footer
    print >> out, '</body>'
    print >> out, '</html>'
    # return the response
    return out.getvalue()
Beispiel #4
0
def get_response_content(fs):
    # get the upper case amino acid letter
    aa_of_interest = fs.aminoacid.upper()
    if aa_of_interest not in Codon.g_aa_letters:
        raise HandlingError('invalid amino acid: ' + fs.aminoacid)
    # get the newick tree
    tree = NewickIO.parse(fs.tree, Newick.NewickTree)
    # get the alignment
    out = StringIO()
    try:
        finder = KGEA.Finder(g_index_dir, g_valid_chromosome_strings_pathname,
                             g_fasta_dir)
        # note that some of these amino acids can be gaps
        taxon_aa_pairs = list(
            finder.gen_taxon_aa_pairs(fs.chromosome, fs.position))
        lines = [taxon + '\t' + aa for taxon, aa in taxon_aa_pairs]
        if taxon_aa_pairs:
            # get the map from the taxon to the amino acid
            taxon_to_aa_letter = dict((taxon, aa)
                                      for taxon, aa in taxon_aa_pairs
                                      if aa in Codon.g_aa_letters)
            selected_taxa = set(taxon_to_aa_letter)
            ntaxa = len(selected_taxa)
            # assert that we have enough taxa
            # to calculate the p-value from the MAPP statistic
            mintaxa = 7
            if ntaxa < mintaxa:
                raise HandlingError(
                    'this column has only %d aligned amino acids but we want at least %d'
                    % (ntaxa, mintaxa))
            # modify the tree so that we keep only the taxa of interest
            modify_tree(tree, selected_taxa)
            # get the taxon weights from the tree
            taxon_weight_pairs = LeafWeights.get_stone_weights(tree)
            # calculate the standardized physicochemical property table
            standardized_property_array = MAPP.get_standardized_property_array(
                MAPP.g_property_array)
            # calculate the physicochemical property correlation matrix
            correlation_matrix = MAPP.get_property_correlation_matrix(
                standardized_property_array)
            # estimate the amino acid distribution for the column
            weights = []
            aa_indices = []
            for taxon, weight in taxon_weight_pairs:
                weights.append(weight)
                aa_indices.append(
                    aa_letter_to_aa_index(taxon_to_aa_letter[taxon]))
            aa_distribution = MAPP.estimate_aa_distribution(
                weights, aa_indices)
            # estimate the mean and variance of each physicochemical property
            est_pc_means = MAPP.estimate_property_means(
                standardized_property_array, aa_distribution)
            est_pc_variances = MAPP.estimate_property_variances(
                standardized_property_array, aa_distribution)
            # calculate the deviation from each property mean for each possible amino acid
            deviations = MAPP.get_deviations(est_pc_means, est_pc_variances,
                                             standardized_property_array)
            # calculate the impact scores
            impact_scores = MAPP.get_impact_scores(correlation_matrix,
                                                   deviations)
            # calculate the p-values
            p_values = [
                MAPP.get_p_value(score, ntaxa) for score in impact_scores
            ]
            # show the p-value of the amino acid of interest
            print >> out, 'MAPP p-value:'
            print >> out, p_values[aa_letter_to_aa_index(aa_of_interest)]
            print >> out
            # show the MAPP statistic of the amino acid of interest
            print >> out, 'MAPP statistic:'
            print >> out, impact_scores[aa_letter_to_aa_index(aa_of_interest)]
            print >> out
            # show the aligned column
            print >> out, 'aligned column:'
            print >> out, '\n'.join(lines)
        else:
            print >> out, 'no aligned amino acids were found at this position'
    except KGEA.KGEAError as e:
        print >> out, e
    return out.getvalue()
Beispiel #5
0
def get_response_content(fs):
    # get the upper case amino acid letter
    aa_of_interest = fs.aminoacid.upper()
    if aa_of_interest not in Codon.g_aa_letters:
        raise HandlingError('invalid amino acid: ' + fs.aminoacid)
    # get the newick tree
    tree = NewickIO.parse(fs.tree, Newick.NewickTree)
    # get the alignment
    out = StringIO()
    try:
        finder = KGEA.Finder(
                g_index_dir, g_valid_chromosome_strings_pathname, g_fasta_dir)
        # note that some of these amino acids can be gaps
        taxon_aa_pairs = list(
                finder.gen_taxon_aa_pairs(fs.chromosome, fs.position))
        lines = [taxon + '\t' + aa for taxon, aa in taxon_aa_pairs]
        if taxon_aa_pairs:
            # get the map from the taxon to the amino acid
            taxon_to_aa_letter = dict((taxon, aa) for taxon, aa in taxon_aa_pairs if aa in Codon.g_aa_letters)
            selected_taxa = set(taxon_to_aa_letter)
            ntaxa = len(selected_taxa)
            # assert that we have enough taxa
            # to calculate the p-value from the MAPP statistic
            mintaxa = 7
            if ntaxa < mintaxa:
                raise HandlingError('this column has only %d aligned amino acids but we want at least %d' % (ntaxa, mintaxa))
            # modify the tree so that we keep only the taxa of interest
            modify_tree(tree, selected_taxa)
            # get the taxon weights from the tree
            taxon_weight_pairs = LeafWeights.get_stone_weights(tree)
            # calculate the standardized physicochemical property table
            standardized_property_array = MAPP.get_standardized_property_array(MAPP.g_property_array)
            # calculate the physicochemical property correlation matrix
            correlation_matrix = MAPP.get_property_correlation_matrix(standardized_property_array)
            # estimate the amino acid distribution for the column
            weights = []
            aa_indices = []
            for taxon, weight in taxon_weight_pairs:
                weights.append(weight)
                aa_indices.append(aa_letter_to_aa_index(taxon_to_aa_letter[taxon]))
            aa_distribution = MAPP.estimate_aa_distribution(weights, aa_indices)
            # estimate the mean and variance of each physicochemical property
            est_pc_means = MAPP.estimate_property_means(standardized_property_array, aa_distribution)
            est_pc_variances = MAPP.estimate_property_variances(standardized_property_array, aa_distribution)
            # calculate the deviation from each property mean for each possible amino acid
            deviations = MAPP.get_deviations(est_pc_means, est_pc_variances, standardized_property_array)
            # calculate the impact scores
            impact_scores = MAPP.get_impact_scores(correlation_matrix, deviations)
            # calculate the p-values
            p_values = [MAPP.get_p_value(score, ntaxa) for score in impact_scores]
            # show the p-value of the amino acid of interest
            print >> out, 'MAPP p-value:'
            print >> out, p_values[aa_letter_to_aa_index(aa_of_interest)]
            print >> out
            # show the MAPP statistic of the amino acid of interest
            print >> out, 'MAPP statistic:'
            print >> out, impact_scores[aa_letter_to_aa_index(aa_of_interest)]
            print >> out
            # show the aligned column
            print >> out, 'aligned column:'
            print >> out, '\n'.join(lines)
        else:
            print >> out, 'no aligned amino acids were found at this position'
    except KGEA.KGEAError as e:
        print >> out, e
    return out.getvalue()
Beispiel #6
0
def get_response_content(fs):
    # start writing the html response
    out = StringIO()
    print >> out, '<html>'
    print >> out, '<body>'
    # get the tree and the column sent by the user
    pruned_tree, taxon_to_aa_letter = get_tree_and_column(fs)
    # get the weights of the taxa
    taxon_weight_pairs = LeafWeights.get_stone_weights(pruned_tree)
    # show the raw physicochemical property table
    if fs.show_raw_pc_table:
        print >> out, 'raw physicochemical property table:'
        print >> out, '<br/>'
        col_labels = MAPP.g_property_names
        row_labels = Codon.g_aa_letters
        table = MAPP.g_property_array
        print >> out, HtmlTable.get_labeled_table_string(
            col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # calculate the standardized physicochemical property table
    standardized_property_array = MAPP.get_standardized_property_array(
        MAPP.g_property_array)
    # show the standardized physicochemical property table
    if fs.show_standardized_pc_table:
        print >> out, 'standardized physicochemical property table:'
        print >> out, '<br/>'
        col_labels = MAPP.g_property_names
        row_labels = Codon.g_aa_letters
        table = standardized_property_array
        print >> out, HtmlTable.get_labeled_table_string(
            col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # calculate the physicochemical property correlation matrix
    correlation_matrix = MAPP.get_property_correlation_matrix(
        standardized_property_array)
    # show the physicochemical property correlation matrix
    if fs.show_pc_correlation_matrix:
        print >> out, 'physicochemical property correlation matrix:'
        print >> out, '<br/>'
        col_labels = MAPP.g_property_names
        row_labels = MAPP.g_property_names
        table = correlation_matrix
        print >> out, HtmlTable.get_labeled_table_string(
            col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # show the pruned tree
    if fs.show_tree:
        tree_string = NewickIO.get_narrow_newick_string(pruned_tree, 80)
        lines = StringIO(tree_string).readlines()
        lines = [line.rstrip() for line in lines]
        print >> out, 'pruned phylogenetic tree in newick format:'
        print >> out, '<pre>'
        for line in lines:
            print >> out, cgi.escape(line)
        print >> out, '</pre>'
        print >> out, '<br/>'
    # show the weights
    if fs.show_weights:
        taxa, weights = zip(*taxon_weight_pairs)
        table = [weights]
        row_labels = ['weight']
        col_labels = taxa
        print >> out, 'taxon weights:'
        print >> out, '<br/>'
        print >> out, HtmlTable.get_labeled_table_string(
            col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # estimate the amino acid distribution for the column,
    # taking into account the tree and a uniform prior.
    weights = []
    aa_indices = []
    for taxon, weight in taxon_weight_pairs:
        weights.append(weight)
        aa_indices.append(aa_letter_to_aa_index(taxon_to_aa_letter[taxon]))
    aa_distribution = MAPP.estimate_aa_distribution(weights, aa_indices)
    # show the estimated amino acid distribution
    if fs.show_aa_distribution:
        table = [aa_distribution]
        row_labels = ['weight']
        col_labels = Codon.g_aa_letters
        print >> out, 'estimated amino acid distribution:'
        print >> out, '<br/>'
        print >> out, HtmlTable.get_labeled_table_string(
            col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # estimate the mean and variance of each physicochemical property
    est_pc_means = MAPP.estimate_property_means(standardized_property_array,
                                                aa_distribution)
    est_pc_variances = MAPP.estimate_property_variances(
        standardized_property_array, aa_distribution)
    # show the estimated mean and variance of each physicochemical property
    if fs.show_pc_distribution:
        table = [est_pc_means, est_pc_variances]
        row_labels = ['mean', 'variance']
        col_labels = MAPP.g_property_names
        print >> out, 'estimated physicochemical property moments:'
        print >> out, '<br/>'
        print >> out, HtmlTable.get_labeled_table_string(
            col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # calculate the deviation from each property mean
    # for each possible amino acid
    deviations = MAPP.get_deviations(est_pc_means, est_pc_variances,
                                     standardized_property_array)
    # show the deviation from each property mean for each possible amino acid
    if fs.show_deviations:
        print >> out, 'deviations of amino acids from the normal distribution'
        print >> out, 'estimated for each property:'
        print >> out, '<br/>'
        col_labels = MAPP.g_property_names
        row_labels = Codon.g_aa_letters
        table = deviations
        print >> out, HtmlTable.get_labeled_table_string(
            col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # calculate the impact scores
    impact_scores = MAPP.get_impact_scores(correlation_matrix, deviations)
    # show the impact scores
    if fs.show_impact_scores:
        table = [impact_scores]
        row_labels = ['impact']
        col_labels = Codon.g_aa_letters
        print >> out, 'impact scores:'
        print >> out, '<br/>'
        print >> out, HtmlTable.get_labeled_table_string(
            col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # calculate the p-values
    p_values = []
    for score in impact_scores:
        ntaxa = len(taxon_weight_pairs)
        p_values.append(MAPP.get_p_value(score, ntaxa))
    # show the p-values
    if fs.show_p_values:
        table = [p_values]
        row_labels = ['p-value']
        col_labels = Codon.g_aa_letters
        print >> out, 'p-values:'
        print >> out, '<br/>'
        print >> out, HtmlTable.get_labeled_table_string(
            col_labels, row_labels, table)
        print >> out, '<br/><br/>'
    # write the html footer
    print >> out, '</body>'
    print >> out, '</html>'
    # return the response
    return out.getvalue()