コード例 #1
0
 def test_get_custom_coords(self):
     """get_custom_coords: Gets custom axis coords from the mapping file."""
     exp = 1
     custom_axes = ['Height', 'Weight']
     coords = [self.coord_header, self.coords]
     get_custom_coords(custom_axes, self.mapping2, coords)
     exp = array([[10, 60, -0.219044992, 0.079674486, 0.09233683],
                  [20, 55, -0.042258081, 0.000204041, 0.024837603],
                  [30, 50, 0.080504323, -0.212014503, -0.088353435]])
     assert_almost_equal(coords[1], exp)
コード例 #2
0
 def test_get_custom_coords(self):
     """get_custom_coords: Gets custom axis coords from the mapping file."""
     exp = 1
     custom_axes = ['Height','Weight']
     coords = [self.coord_header, self.coords]
     get_custom_coords(custom_axes, self.mapping2, coords)
     exp = array([[10,60,-0.219044992,0.079674486,0.09233683],
                        [20,55,-0.042258081, 0.000204041,0.024837603],
                        [30,50,0.080504323,-0.212014503,-0.088353435]])
     assert_almost_equal(coords[1],exp)
コード例 #3
0
ファイル: util.py プロジェクト: jairideout/emperor
def preprocess_coords_file(coords_header, coords_data, coords_eigenvals,
                        coords_pct, mapping_header, mapping_data,
                        custom_axes=None, jackknifing_method=None,
                        is_comparison=False):
    """Process a PCoA data and handle customizations in the contents

    Inputs:
    coords_header: list of sample identifiers in the PCoA file _or_ list of
    lists with sample identifiers for each coordinate file (if jackknifing or
    comparing plots)
    coords_data: matrix of coordinates in the PCoA file _or_ list of numpy
    arrays with coordinates for each file (if jackknifing or comparing plots)
    coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_
    list of numpy arrays with the eigenvalues (if jackknifing or comparing plots
    )
    coords_pct: numpy array with a the percent explained by each principal
    coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or
    comparing plots)
    mapping_header: mapping file headers names
    mapping_data: mapping file data
    custom_axes: name of the mapping data fields to add to coords_data
    jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info
    see qiime.util.summarize_pcoas
    is_comparison: whether or not the inputs should be considered as the ones
    for a comparison plot

    Outputs:
    coords_header: list of sample identifiers in the PCoA file
    coords_data: matrix of coordinates in the PCoA file with custom_axes if
    provided
    coords_eigenvalues: either the eigenvalues of the input coordinates or the
    average eigenvalues of the multiple coords that were passed in
    coords_pct: list of percents explained by each axis as given by the master
    coordinates i. e. the center around where the values revolve
    coords_low: coordinates representing the lower edges of an ellipse; None if
    no jackknifing is applied
    coords_high: coordinates representing the highere edges of an ellipse; None
    if no jackknifing is applied
    clones: total number of input files

    This controller function handles any customization that has to be done to
    the PCoA data prior to the formatting. Note that the first element in each
    list (coords, headers, eigenvalues & percents) will be considered the master
    set of coordinates.

    Raises: AssertionError if a comparison plot is requested but a list of data
    is not passed as input
    """

    # prevent obscure and obfuscated errors
    if is_comparison:
        assert type(coords_data) == list, "Cannot process a comparison with "+\
            "the data from a single coordinates file"

    mapping_file = [mapping_header] + mapping_data
    coords_file = [coords_header, coords_data]

    # number PCoA files; zero for any case except for comparison plots
    clones = 0

    if custom_axes and type(coords_data) == ndarray:
            # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)
    elif type(coords_data) == list and is_comparison == False:
        # take the first pcoa file as the master set of coordinates
        master_pcoa = [coords_header[0], coords_data[0],
            coords_eigenvals[0], coords_pct[0]]

        # support pcoas must be a list of lists where each list contain
        # all the elements that compose a coordinates file
        support_pcoas = [[h, d, e, p] for h, d, e, p in zip(coords_header,
            coords_data, coords_eigenvals, coords_pct)]

        # do not apply procrustes, at least not for now
        coords_data, coords_low, coords_high, eigenvalues_average,\
            identifiers = summarize_pcoas(master_pcoa, support_pcoas,
                method=jackknifing_method, apply_procrustes=False)

        # custom axes and jackknifing is a tricky thing to do, you only have to
        # add the custom values to the master file which is represented as the
        # coords_data return value. Since there is really no variation in that
        # axis then you have to change the values of coords_high and of
        # coords_low to something really small so that WebGL work properly
        if custom_axes:
            coords_file = [master_pcoa[0], coords_data]
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

            # this opens support for as many custom axes as needed
            axes = len(custom_axes)
            coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes])
            coords_high[:, 0:axes] = ones([coords_high.shape[0], axes])*0.00001
            coords_data = coords_file[1]

        # return a value containing coords_low and coords_high
        return identifiers, coords_data, eigenvalues_average, master_pcoa[3],\
            coords_low, coords_high, clones
    # comparison plots are processed almost individually
    elif type(coords_data) == list and is_comparison:

        # indicates the number of files that were totally processed so other
        # functions/APIs are aware of how many times to replicate the metadata
        clones = len(coords_data)
        out_headers, out_coords = [], []

        for index in range(0, clones):
            headers_i = coords_header[index]
            coords_i = coords_data[index]

            # tag each header with the the number in which those coords came in
            out_headers.extend([element+'_%d' % index for element in headers_i])

            if index == 0:
                # numpy can only stack things if they have the same shape
                out_coords = coords_i

                # the eigenvalues and percents explained are really the ones
                # belonging to the the first set of coordinates that was passed
                coords_eigenvals = coords_eigenvals[index]
                coords_pct = coords_pct[index]
            else:
                out_coords = vstack((out_coords, coords_i))

        coords_file = [out_headers, out_coords]

        if custom_axes:
            # this condition deals with the fact that in order for the custom
            # axes to be added into the original coordinates, we have to add the
            # suffix for the sample identifiers that the coordinates have
            if clones:
                out_data = []
                for index in range(0, clones):
                    out_data.extend([[element[0]+'_%d' % index]+element[1::]
                        for element in mapping_data])
                mapping_file = [mapping_header] + out_data

            # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

    # if no coords summary is applied, return None in the corresponding values
    # note that the value of clones will be != 0 for a comparison plot
    return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None,\
        None, clones
コード例 #4
0
ファイル: util.py プロジェクト: thermokarst-forks/emperor
def preprocess_coords_file(coords_header,
                           coords_data,
                           coords_eigenvals,
                           coords_pct,
                           mapping_header,
                           mapping_data,
                           custom_axes=None,
                           jackknifing_method=None,
                           is_comparison=False,
                           pct_variation_below_one=False):
    """Process a PCoA data and handle customizations in the contents

    This controller function handles any customization that has to be done to
    the PCoA data prior to the formatting. Note that the first element in each
    list (coords, headers, eigenvalues & percents) will be considered the
    master set of coordinates.

    Parameters
    ----------
    coords_header: 1d or 2d array of str
        If 1d array of str, the sample identifiers in the PCoA file
        If 2d array of str, the sample identifiers for each coordinate
        file (if jackknifing or comparing plots)
    coords_data: 2d array of float or list of 2d array of float
        If 2d array of float, matrix of coordinates in the PCoA file
        If list of 2d array of float,  with coordinates for each file
        (if jackknifing or comparing plots)
    coords_eigenvals: 1d or 2d array of float
        If 1d array, eigenvalues for the coordinates file
        If 2d array, list of  arrays with the eigenvalues
        (if jackknifing or comparing plots)
    coords_pct: 1d or 2d array of float
        If 1d array, the percent explained by each principal coordinates axis
        If 2d array, a list of lists with numpy arrays (if jackknifing or
        comparing plots)
    mapping_header: list of str
        mapping file headers names
    mapping_data: list of lists of str
        mapping file data
    custom_axes: str, optional
        name of the mapping data fields to add to coords_data. Default: None
    jackknifing_method: {'sdev', 'IRQ', None}, optional
        For more info see qiime.util.summarize_pcoas. Default: None
    is_comparison: bool, optional
        whether or not the inputs should be considered as the ones for a
        comparison plot. Default: false
    pct_variation_below_one: bool, optional
        boolean to allow percet variation of the axes be under one.
        Default: false

    Returns
    -------
    coords_header: list of str
        Sample identifiers in the PCoA file
    coords_data: 2d array of float
        matrix of coordinates in the PCoA file with custom_axes if provided
    coords_eigenvals: array of float
        either the eigenvalues of the input coordinates or the average
        eigenvalues of the multiple coords that were passed in
    coords_pct: array of float
        list of percents explained by each axis as given by the master
        coordinates i. e. the center around where the values revolve
    coords_low: 2d array of float
        coordinates representing the lower edges of an ellipse; None if no
        jackknifing is applied
    coords_high: 2d array of float
        coordinates representing the highere edges of an ellipse; None if no
        jackknifing is applied
    clones: int
        total number of input files

    Raises
    ------
    AssertionError
        if a comparison plot is requested but a list of data is not passed
        as input
    """

    # prevent obscure and obfuscated errors
    if is_comparison:
        assert type(coords_data) == list, ("Cannot process a comparison with "
                                           "the data from a single "
                                           "coordinates file")

    mapping_file = [mapping_header] + mapping_data
    coords_file = [coords_header, coords_data]

    # number PCoA files; zero for any case except for comparison plots
    clones = 0

    if custom_axes and type(coords_data) == np.ndarray:
        # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
        get_custom_coords(custom_axes, mapping_file, coords_file)
        remove_nans(coords_file)
        scale_custom_coords(custom_axes, coords_file)
    elif type(coords_data) == list and not is_comparison:
        # take the first pcoa file as the master set of coordinates
        master_pcoa = [
            coords_header[0], coords_data[0], coords_eigenvals[0],
            coords_pct[0]
        ]

        # support pcoas must be a list of lists where each list contain
        # all the elements that compose a coordinates file
        support_pcoas = [[h, d, e, p] for h, d, e, p in zip(
            coords_header, coords_data, coords_eigenvals, coords_pct)]

        # do not apply procrustes, at least not for now
        coords_data, coords_low, coords_high, eigenvalues_average,\
            identifiers = summarize_pcoas(master_pcoa, support_pcoas,
                                          method=jackknifing_method,
                                          apply_procrustes=False)

        # custom axes and jackknifing is a tricky thing to do, you only have to
        # add the custom values to the master file which is represented as the
        # coords_data return value. Since there is really no variation in that
        # axis then you have to change the values of coords_high and of
        # coords_low to something really small so that WebGL work properly
        if custom_axes:
            coords_file = [master_pcoa[0], coords_data]
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

            # this opens support for as many custom axes as needed
            axes = len(custom_axes)

            coords_low = np.hstack(
                [np.zeros((coords_low.shape[0], axes)), coords_low])
            coords_high = np.hstack([
                np.full((coords_low.shape[0], axes), fill_value=0.00001),
                coords_high
            ])

            coords_data = coords_file[1]

        if master_pcoa[3][0] < 1.0 and not pct_variation_below_one:
            master_pcoa[3] = master_pcoa[3] * 100

        # return a value containing coords_low and coords_high
        return identifiers, coords_data, eigenvalues_average, master_pcoa[3],\
            coords_low, coords_high, clones
    # comparison plots are processed almost individually
    elif type(coords_data) == list and is_comparison:

        # indicates the number of files that were totally processed so other
        # functions/APIs are aware of how many times to replicate the metadata
        clones = len(coords_data)
        out_headers, out_coords = [], []

        for index in range(0, clones):
            headers_i = coords_header[index]
            coords_i = coords_data[index]

            # tag each header with the the number in which those coords came in
            out_headers.extend(
                [element + '_%d' % index for element in headers_i])

            if index == 0:
                # numpy can only stack things if they have the same shape
                out_coords = coords_i

                # the eigenvalues and percents explained are really the ones
                # belonging to the the first set of coordinates that was passed
                coords_eigenvals = coords_eigenvals[index]
                coords_pct = coords_pct[index]
            else:
                out_coords = np.vstack((out_coords, coords_i))

        coords_file = [out_headers, out_coords]

        if custom_axes:
            # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

    if coords_pct[0] < 1.0 and not pct_variation_below_one:
        coords_pct = coords_pct * 100

    # if no coords summary is applied, return None in the corresponding values
    # note that the value of clones will be != 0 for a comparison plot
    return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None,\
        None, clones
コード例 #5
0
def preprocess_coords_file(coords_header,
                           coords_data,
                           coords_eigenvals,
                           coords_pct,
                           mapping_header,
                           mapping_data,
                           custom_axes=None,
                           jackknifing_method=None,
                           is_comparison=False):
    """Process a PCoA data and handle customizations in the contents

    Inputs:
    coords_header: list of sample identifiers in the PCoA file _or_ list of
    lists with sample identifiers for each coordinate file (if jackknifing or
    comparing plots)
    coords_data: matrix of coordinates in the PCoA file _or_ list of numpy
    arrays with coordinates for each file (if jackknifing or comparing plots)
    coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_
    list of numpy arrays with the eigenvalues (if jackknifing or comparing plots
    )
    coords_pct: numpy array with a the percent explained by each principal
    coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or
    comparing plots)
    mapping_header: mapping file headers names
    mapping_data: mapping file data
    custom_axes: name of the mapping data fields to add to coords_data
    jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info
    see qiime.util.summarize_pcoas
    is_comparison: whether or not the inputs should be considered as the ones
    for a comparison plot

    Outputs:
    coords_header: list of sample identifiers in the PCoA file
    coords_data: matrix of coordinates in the PCoA file with custom_axes if
    provided
    coords_eigenvalues: either the eigenvalues of the input coordinates or the
    average eigenvalues of the multiple coords that were passed in
    coords_pct: list of percents explained by each axis as given by the master
    coordinates i. e. the center around where the values revolve
    coords_low: coordinates representing the lower edges of an ellipse; None if
    no jackknifing is applied
    coords_high: coordinates representing the highere edges of an ellipse; None
    if no jackknifing is applied
    clones: total number of input files

    This controller function handles any customization that has to be done to
    the PCoA data prior to the formatting. Note that the first element in each
    list (coords, headers, eigenvalues & percents) will be considered the master
    set of coordinates.

    Raises: AssertionError if a comparison plot is requested but a list of data
    is not passed as input
    """

    # prevent obscure and obfuscated errors
    if is_comparison:
        assert type(coords_data) == list, "Cannot process a comparison with "+\
            "the data from a single coordinates file"

    mapping_file = [mapping_header] + mapping_data
    coords_file = [coords_header, coords_data]

    # number PCoA files; zero for any case except for comparison plots
    clones = 0

    if custom_axes and type(coords_data) == ndarray:
        # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
        get_custom_coords(custom_axes, mapping_file, coords_file)
        remove_nans(coords_file)
        scale_custom_coords(custom_axes, coords_file)
    elif type(coords_data) == list and is_comparison == False:
        # take the first pcoa file as the master set of coordinates
        master_pcoa = [
            coords_header[0], coords_data[0], coords_eigenvals[0],
            coords_pct[0]
        ]

        # support pcoas must be a list of lists where each list contain
        # all the elements that compose a coordinates file
        support_pcoas = [[h, d, e, p] for h, d, e, p in zip(
            coords_header, coords_data, coords_eigenvals, coords_pct)]

        # do not apply procrustes, at least not for now
        coords_data, coords_low, coords_high, eigenvalues_average,\
            identifiers = summarize_pcoas(master_pcoa, support_pcoas,
                method=jackknifing_method, apply_procrustes=False)

        # custom axes and jackknifing is a tricky thing to do, you only have to
        # add the custom values to the master file which is represented as the
        # coords_data return value. Since there is really no variation in that
        # axis then you have to change the values of coords_high and of
        # coords_low to something really small so that WebGL work properly
        if custom_axes:
            coords_file = [master_pcoa[0], coords_data]
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

            # this opens support for as many custom axes as needed
            axes = len(custom_axes)
            coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes])
            coords_high[:,
                        0:axes] = ones([coords_high.shape[0], axes]) * 0.00001
            coords_data = coords_file[1]

        # return a value containing coords_low and coords_high
        return identifiers, coords_data, eigenvalues_average, master_pcoa[3],\
            coords_low, coords_high, clones
    # comparison plots are processed almost individually
    elif type(coords_data) == list and is_comparison:

        # indicates the number of files that were totally processed so other
        # functions/APIs are aware of how many times to replicate the metadata
        clones = len(coords_data)
        out_headers, out_coords = [], []

        for index in range(0, clones):
            headers_i = coords_header[index]
            coords_i = coords_data[index]

            # tag each header with the the number in which those coords came in
            out_headers.extend(
                [element + '_%d' % index for element in headers_i])

            if index == 0:
                # numpy can only stack things if they have the same shape
                out_coords = coords_i

                # the eigenvalues and percents explained are really the ones
                # belonging to the the first set of coordinates that was passed
                coords_eigenvals = coords_eigenvals[index]
                coords_pct = coords_pct[index]
            else:
                out_coords = vstack((out_coords, coords_i))

        coords_file = [out_headers, out_coords]

        if custom_axes:
            # this condition deals with the fact that in order for the custom
            # axes to be added into the original coordinates, we have to add the
            # suffix for the sample identifiers that the coordinates have
            if clones:
                out_data = []
                for index in range(0, clones):
                    out_data.extend([[element[0] + '_%d' % index] +
                                     element[1::] for element in mapping_data])
                mapping_file = [mapping_header] + out_data

            # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351
            get_custom_coords(custom_axes, mapping_file, coords_file)
            remove_nans(coords_file)
            scale_custom_coords(custom_axes, coords_file)

    # if no coords summary is applied, return None in the corresponding values
    # note that the value of clones will be != 0 for a comparison plot
    return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None,\
        None, clones