Beispiel #1
0
def preprocess_otu_table(otu_sample_ids,
                         otu_table,
                         lineages,
                         coords_data,
                         coords_headers,
                         N=0):
    """Preprocess the OTU table to to generate the required data for the biplots

    Input:
    otu_sample_ids: sample identifiers for the otu_table
    otu_table: contingency table
    lineages: taxonomic assignments for the OTUs in the otu_table
    coords_data: principal coordinates data where the taxa will be mapped
    N: number of most prevalent taxa to keep, by default will use all

    Output:
    otu_coords: coordinates representing the N most prevalent taxa in otu_table
    otu_table: N most prevalent OTUs from the input otu_table
    otu_lineages: taxonomic assignments corresponding to the N most prevalent
    OTUs
    otu_prevalence: vector with the prevalence scores of the N highest values
    lines: coords where the N most prevalent taxa will be positioned in the
    biplot
    """

    # return empty values if any of the taxa data is empty
    if (otu_sample_ids == []) or (otu_table == array([])) or (lineages == []):
        return [], [], [], [], ''

    # this means there's only one or fewer rows in the contingency table
    if len(otu_table) <= 1 or len(lineages) <= 1:
        raise EmperorUnsupportedComputation, "Biplots are not supported for "+\
            "contingency tables with one or fewer rows"

    # if this element is a list take the first headers and coordinates
    # both of these will be the master coordinates, i. e. where data is centered
    if type(coords_data) == list and type(coords_headers) == list:
        coords_data = coords_data[0]
        coords_headers = coords_headers[0]

    # re-arrange the otu table so it matches the order of the samples in the
    # coordinates data & remove any sample that is not in the coordinates header
    otu_sample_ids, otu_table = sort_taxa_table_by_pcoa_coords(
        coords_headers, otu_table, otu_sample_ids)

    # retrieve the prevalence and the coords prior the filtering
    prevalence = get_taxa_prevalence(otu_table)
    bi_plot_coords = get_taxa_coords(otu_table, coords_data)

    o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence =\
        extract_taxa_data(bi_plot_coords, otu_table, lineages, prevalence, N)

    lines = '\n'.join(
        make_biplot_scores_output({
            'coord': o_otu_coords,
            'lineages': o_otu_lineages
        }))

    return o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence, lines
Beispiel #2
0
    def test_get_taxa_coords(self):
        otu_table = np.array([[2, 0, 0, 1], [1, 1, 1, 1], [0, 2, 2, 1]], float)
        sample_names = list('WXYZ')
        otu_names = list('abc')

        res = bp.get_taxa_coords(otu_table, [.4, .2, .1, .9])
        otu_coords = range(3)
        otu_coords[0] = .4 * 2 / 3 + .9 * 1 / 3
        otu_coords[1] = .4 * 1 / 4 + .2 * 1 / 4 + .1 * 1 / 4 + .9 * 1 / 4
        otu_coords[2] = .4 * 0 / 5 + .2 * 2 / 5 + .1 * 2 / 5 + .9 * 1 / 5
        assert_almost_equal(res, otu_coords)
Beispiel #3
0
def preprocess_otu_table(otu_sample_ids, otu_table, lineages,
                        coords_data, coords_headers, N=0):
    """Preprocess the OTU table to to generate the required data for the biplots

    Input:
    otu_sample_ids: sample identifiers for the otu_table
    otu_table: contingency table
    lineages: taxonomic assignments for the OTUs in the otu_table
    coords_data: principal coordinates data where the taxa will be mapped
    N: number of most prevalent taxa to keep, by default will use all

    Output:
    otu_coords: coordinates representing the N most prevalent taxa in otu_table
    otu_table: N most prevalent OTUs from the input otu_table
    otu_lineages: taxonomic assignments corresponding to the N most prevalent
    OTUs
    otu_prevalence: vector with the prevalence scores of the N highest values
    lines: coords where the N most prevalent taxa will be positioned in the
    biplot
    """

    # return empty values if any of the taxa data is empty
    if (otu_sample_ids == []) or (otu_table == array([])) or (lineages == []):
        return [], [], [], [], ''

    # this means there's only one or fewer rows in the contingency table
    if len(otu_table) <= 1 or len(lineages) <= 1:
        raise EmperorUnsupportedComputation, "Biplots are not supported for "+\
            "contingency tables with one or fewer rows"

    # if this element is a list take the first headers and coordinates
    # both of these will be the master coordinates, i. e. where data is centered
    if type(coords_data) == list and type(coords_headers) == list:
        coords_data = coords_data[0]
        coords_headers = coords_headers[0]

    # re-arrange the otu table so it matches the order of the samples in the
    # coordinates data & remove any sample that is not in the coordinates header
    otu_sample_ids, otu_table = sort_taxa_table_by_pcoa_coords(coords_headers,
        otu_table, otu_sample_ids)

    # retrieve the prevalence and the coords prior the filtering
    prevalence = get_taxa_prevalence(otu_table)
    bi_plot_coords = get_taxa_coords(otu_table, coords_data)

    o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence =\
        extract_taxa_data(bi_plot_coords, otu_table, lineages, prevalence, N)

    lines = '\n'.join(make_biplot_scores_output({'coord': o_otu_coords,
        'lineages': o_otu_lineages}))

    return o_otu_coords, o_otu_table, o_otu_lineages, o_prevalence, lines
 def test_get_taxa_coords(self):
     otu_table = np.array([  [2,0,0,1],
                             [1,1,1,1],
                             [0,2,2,1]],float)
     sample_names = list('WXYZ')
     otu_names = list('abc')
 
     res = bp.get_taxa_coords(otu_table, [.4,.2,.1,.9])
     otu_coords= range(3)
     otu_coords[0] = .4*2/3 + .9*1/3
     otu_coords[1] = .4*1/4 + .2*1/4 + .1*1/4 + .9*1/4
     otu_coords[2] = .4*0/5 + .2*2/5 + .1*2/5 + .9*1/5
     assert_almost_equal(res, otu_coords)