def test_get_custom_coords(self): """get_custom_coords: Gets custom axis coords from the mapping file.""" exp = 1 custom_axes = ['Height', 'Weight'] coords = [self.coord_header, self.coords] get_custom_coords(custom_axes, self.mapping2, coords) exp = array([[10, 60, -0.219044992, 0.079674486, 0.09233683], [20, 55, -0.042258081, 0.000204041, 0.024837603], [30, 50, 0.080504323, -0.212014503, -0.088353435]]) assert_almost_equal(coords[1], exp)
def test_get_custom_coords(self): """get_custom_coords: Gets custom axis coords from the mapping file.""" exp = 1 custom_axes = ['Height','Weight'] coords = [self.coord_header, self.coords] get_custom_coords(custom_axes, self.mapping2, coords) exp = array([[10,60,-0.219044992,0.079674486,0.09233683], [20,55,-0.042258081, 0.000204041,0.024837603], [30,50,0.080504323,-0.212014503,-0.088353435]]) assert_almost_equal(coords[1],exp)
def preprocess_coords_file(coords_header, coords_data, coords_eigenvals, coords_pct, mapping_header, mapping_data, custom_axes=None, jackknifing_method=None, is_comparison=False): """Process a PCoA data and handle customizations in the contents Inputs: coords_header: list of sample identifiers in the PCoA file _or_ list of lists with sample identifiers for each coordinate file (if jackknifing or comparing plots) coords_data: matrix of coordinates in the PCoA file _or_ list of numpy arrays with coordinates for each file (if jackknifing or comparing plots) coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_ list of numpy arrays with the eigenvalues (if jackknifing or comparing plots ) coords_pct: numpy array with a the percent explained by each principal coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or comparing plots) mapping_header: mapping file headers names mapping_data: mapping file data custom_axes: name of the mapping data fields to add to coords_data jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info see qiime.util.summarize_pcoas is_comparison: whether or not the inputs should be considered as the ones for a comparison plot Outputs: coords_header: list of sample identifiers in the PCoA file coords_data: matrix of coordinates in the PCoA file with custom_axes if provided coords_eigenvalues: either the eigenvalues of the input coordinates or the average eigenvalues of the multiple coords that were passed in coords_pct: list of percents explained by each axis as given by the master coordinates i. e. the center around where the values revolve coords_low: coordinates representing the lower edges of an ellipse; None if no jackknifing is applied coords_high: coordinates representing the highere edges of an ellipse; None if no jackknifing is applied clones: total number of input files This controller function handles any customization that has to be done to the PCoA data prior to the formatting. Note that the first element in each list (coords, headers, eigenvalues & percents) will be considered the master set of coordinates. Raises: AssertionError if a comparison plot is requested but a list of data is not passed as input """ # prevent obscure and obfuscated errors if is_comparison: assert type(coords_data) == list, "Cannot process a comparison with "+\ "the data from a single coordinates file" mapping_file = [mapping_header] + mapping_data coords_file = [coords_header, coords_data] # number PCoA files; zero for any case except for comparison plots clones = 0 if custom_axes and type(coords_data) == ndarray: # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) elif type(coords_data) == list and is_comparison == False: # take the first pcoa file as the master set of coordinates master_pcoa = [coords_header[0], coords_data[0], coords_eigenvals[0], coords_pct[0]] # support pcoas must be a list of lists where each list contain # all the elements that compose a coordinates file support_pcoas = [[h, d, e, p] for h, d, e, p in zip(coords_header, coords_data, coords_eigenvals, coords_pct)] # do not apply procrustes, at least not for now coords_data, coords_low, coords_high, eigenvalues_average,\ identifiers = summarize_pcoas(master_pcoa, support_pcoas, method=jackknifing_method, apply_procrustes=False) # custom axes and jackknifing is a tricky thing to do, you only have to # add the custom values to the master file which is represented as the # coords_data return value. Since there is really no variation in that # axis then you have to change the values of coords_high and of # coords_low to something really small so that WebGL work properly if custom_axes: coords_file = [master_pcoa[0], coords_data] get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # this opens support for as many custom axes as needed axes = len(custom_axes) coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes]) coords_high[:, 0:axes] = ones([coords_high.shape[0], axes])*0.00001 coords_data = coords_file[1] # return a value containing coords_low and coords_high return identifiers, coords_data, eigenvalues_average, master_pcoa[3],\ coords_low, coords_high, clones # comparison plots are processed almost individually elif type(coords_data) == list and is_comparison: # indicates the number of files that were totally processed so other # functions/APIs are aware of how many times to replicate the metadata clones = len(coords_data) out_headers, out_coords = [], [] for index in range(0, clones): headers_i = coords_header[index] coords_i = coords_data[index] # tag each header with the the number in which those coords came in out_headers.extend([element+'_%d' % index for element in headers_i]) if index == 0: # numpy can only stack things if they have the same shape out_coords = coords_i # the eigenvalues and percents explained are really the ones # belonging to the the first set of coordinates that was passed coords_eigenvals = coords_eigenvals[index] coords_pct = coords_pct[index] else: out_coords = vstack((out_coords, coords_i)) coords_file = [out_headers, out_coords] if custom_axes: # this condition deals with the fact that in order for the custom # axes to be added into the original coordinates, we have to add the # suffix for the sample identifiers that the coordinates have if clones: out_data = [] for index in range(0, clones): out_data.extend([[element[0]+'_%d' % index]+element[1::] for element in mapping_data]) mapping_file = [mapping_header] + out_data # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # if no coords summary is applied, return None in the corresponding values # note that the value of clones will be != 0 for a comparison plot return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None,\ None, clones
def preprocess_coords_file(coords_header, coords_data, coords_eigenvals, coords_pct, mapping_header, mapping_data, custom_axes=None, jackknifing_method=None, is_comparison=False, pct_variation_below_one=False): """Process a PCoA data and handle customizations in the contents This controller function handles any customization that has to be done to the PCoA data prior to the formatting. Note that the first element in each list (coords, headers, eigenvalues & percents) will be considered the master set of coordinates. Parameters ---------- coords_header: 1d or 2d array of str If 1d array of str, the sample identifiers in the PCoA file If 2d array of str, the sample identifiers for each coordinate file (if jackknifing or comparing plots) coords_data: 2d array of float or list of 2d array of float If 2d array of float, matrix of coordinates in the PCoA file If list of 2d array of float, with coordinates for each file (if jackknifing or comparing plots) coords_eigenvals: 1d or 2d array of float If 1d array, eigenvalues for the coordinates file If 2d array, list of arrays with the eigenvalues (if jackknifing or comparing plots) coords_pct: 1d or 2d array of float If 1d array, the percent explained by each principal coordinates axis If 2d array, a list of lists with numpy arrays (if jackknifing or comparing plots) mapping_header: list of str mapping file headers names mapping_data: list of lists of str mapping file data custom_axes: str, optional name of the mapping data fields to add to coords_data. Default: None jackknifing_method: {'sdev', 'IRQ', None}, optional For more info see qiime.util.summarize_pcoas. Default: None is_comparison: bool, optional whether or not the inputs should be considered as the ones for a comparison plot. Default: false pct_variation_below_one: bool, optional boolean to allow percet variation of the axes be under one. Default: false Returns ------- coords_header: list of str Sample identifiers in the PCoA file coords_data: 2d array of float matrix of coordinates in the PCoA file with custom_axes if provided coords_eigenvals: array of float either the eigenvalues of the input coordinates or the average eigenvalues of the multiple coords that were passed in coords_pct: array of float list of percents explained by each axis as given by the master coordinates i. e. the center around where the values revolve coords_low: 2d array of float coordinates representing the lower edges of an ellipse; None if no jackknifing is applied coords_high: 2d array of float coordinates representing the highere edges of an ellipse; None if no jackknifing is applied clones: int total number of input files Raises ------ AssertionError if a comparison plot is requested but a list of data is not passed as input """ # prevent obscure and obfuscated errors if is_comparison: assert type(coords_data) == list, ("Cannot process a comparison with " "the data from a single " "coordinates file") mapping_file = [mapping_header] + mapping_data coords_file = [coords_header, coords_data] # number PCoA files; zero for any case except for comparison plots clones = 0 if custom_axes and type(coords_data) == np.ndarray: # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) elif type(coords_data) == list and not is_comparison: # take the first pcoa file as the master set of coordinates master_pcoa = [ coords_header[0], coords_data[0], coords_eigenvals[0], coords_pct[0] ] # support pcoas must be a list of lists where each list contain # all the elements that compose a coordinates file support_pcoas = [[h, d, e, p] for h, d, e, p in zip( coords_header, coords_data, coords_eigenvals, coords_pct)] # do not apply procrustes, at least not for now coords_data, coords_low, coords_high, eigenvalues_average,\ identifiers = summarize_pcoas(master_pcoa, support_pcoas, method=jackknifing_method, apply_procrustes=False) # custom axes and jackknifing is a tricky thing to do, you only have to # add the custom values to the master file which is represented as the # coords_data return value. Since there is really no variation in that # axis then you have to change the values of coords_high and of # coords_low to something really small so that WebGL work properly if custom_axes: coords_file = [master_pcoa[0], coords_data] get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # this opens support for as many custom axes as needed axes = len(custom_axes) coords_low = np.hstack( [np.zeros((coords_low.shape[0], axes)), coords_low]) coords_high = np.hstack([ np.full((coords_low.shape[0], axes), fill_value=0.00001), coords_high ]) coords_data = coords_file[1] if master_pcoa[3][0] < 1.0 and not pct_variation_below_one: master_pcoa[3] = master_pcoa[3] * 100 # return a value containing coords_low and coords_high return identifiers, coords_data, eigenvalues_average, master_pcoa[3],\ coords_low, coords_high, clones # comparison plots are processed almost individually elif type(coords_data) == list and is_comparison: # indicates the number of files that were totally processed so other # functions/APIs are aware of how many times to replicate the metadata clones = len(coords_data) out_headers, out_coords = [], [] for index in range(0, clones): headers_i = coords_header[index] coords_i = coords_data[index] # tag each header with the the number in which those coords came in out_headers.extend( [element + '_%d' % index for element in headers_i]) if index == 0: # numpy can only stack things if they have the same shape out_coords = coords_i # the eigenvalues and percents explained are really the ones # belonging to the the first set of coordinates that was passed coords_eigenvals = coords_eigenvals[index] coords_pct = coords_pct[index] else: out_coords = np.vstack((out_coords, coords_i)) coords_file = [out_headers, out_coords] if custom_axes: # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) if coords_pct[0] < 1.0 and not pct_variation_below_one: coords_pct = coords_pct * 100 # if no coords summary is applied, return None in the corresponding values # note that the value of clones will be != 0 for a comparison plot return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None,\ None, clones
def preprocess_coords_file(coords_header, coords_data, coords_eigenvals, coords_pct, mapping_header, mapping_data, custom_axes=None, jackknifing_method=None, is_comparison=False): """Process a PCoA data and handle customizations in the contents Inputs: coords_header: list of sample identifiers in the PCoA file _or_ list of lists with sample identifiers for each coordinate file (if jackknifing or comparing plots) coords_data: matrix of coordinates in the PCoA file _or_ list of numpy arrays with coordinates for each file (if jackknifing or comparing plots) coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_ list of numpy arrays with the eigenvalues (if jackknifing or comparing plots ) coords_pct: numpy array with a the percent explained by each principal coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or comparing plots) mapping_header: mapping file headers names mapping_data: mapping file data custom_axes: name of the mapping data fields to add to coords_data jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info see qiime.util.summarize_pcoas is_comparison: whether or not the inputs should be considered as the ones for a comparison plot Outputs: coords_header: list of sample identifiers in the PCoA file coords_data: matrix of coordinates in the PCoA file with custom_axes if provided coords_eigenvalues: either the eigenvalues of the input coordinates or the average eigenvalues of the multiple coords that were passed in coords_pct: list of percents explained by each axis as given by the master coordinates i. e. the center around where the values revolve coords_low: coordinates representing the lower edges of an ellipse; None if no jackknifing is applied coords_high: coordinates representing the highere edges of an ellipse; None if no jackknifing is applied clones: total number of input files This controller function handles any customization that has to be done to the PCoA data prior to the formatting. Note that the first element in each list (coords, headers, eigenvalues & percents) will be considered the master set of coordinates. Raises: AssertionError if a comparison plot is requested but a list of data is not passed as input """ # prevent obscure and obfuscated errors if is_comparison: assert type(coords_data) == list, "Cannot process a comparison with "+\ "the data from a single coordinates file" mapping_file = [mapping_header] + mapping_data coords_file = [coords_header, coords_data] # number PCoA files; zero for any case except for comparison plots clones = 0 if custom_axes and type(coords_data) == ndarray: # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) elif type(coords_data) == list and is_comparison == False: # take the first pcoa file as the master set of coordinates master_pcoa = [ coords_header[0], coords_data[0], coords_eigenvals[0], coords_pct[0] ] # support pcoas must be a list of lists where each list contain # all the elements that compose a coordinates file support_pcoas = [[h, d, e, p] for h, d, e, p in zip( coords_header, coords_data, coords_eigenvals, coords_pct)] # do not apply procrustes, at least not for now coords_data, coords_low, coords_high, eigenvalues_average,\ identifiers = summarize_pcoas(master_pcoa, support_pcoas, method=jackknifing_method, apply_procrustes=False) # custom axes and jackknifing is a tricky thing to do, you only have to # add the custom values to the master file which is represented as the # coords_data return value. Since there is really no variation in that # axis then you have to change the values of coords_high and of # coords_low to something really small so that WebGL work properly if custom_axes: coords_file = [master_pcoa[0], coords_data] get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # this opens support for as many custom axes as needed axes = len(custom_axes) coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes]) coords_high[:, 0:axes] = ones([coords_high.shape[0], axes]) * 0.00001 coords_data = coords_file[1] # return a value containing coords_low and coords_high return identifiers, coords_data, eigenvalues_average, master_pcoa[3],\ coords_low, coords_high, clones # comparison plots are processed almost individually elif type(coords_data) == list and is_comparison: # indicates the number of files that were totally processed so other # functions/APIs are aware of how many times to replicate the metadata clones = len(coords_data) out_headers, out_coords = [], [] for index in range(0, clones): headers_i = coords_header[index] coords_i = coords_data[index] # tag each header with the the number in which those coords came in out_headers.extend( [element + '_%d' % index for element in headers_i]) if index == 0: # numpy can only stack things if they have the same shape out_coords = coords_i # the eigenvalues and percents explained are really the ones # belonging to the the first set of coordinates that was passed coords_eigenvals = coords_eigenvals[index] coords_pct = coords_pct[index] else: out_coords = vstack((out_coords, coords_i)) coords_file = [out_headers, out_coords] if custom_axes: # this condition deals with the fact that in order for the custom # axes to be added into the original coordinates, we have to add the # suffix for the sample identifiers that the coordinates have if clones: out_data = [] for index in range(0, clones): out_data.extend([[element[0] + '_%d' % index] + element[1::] for element in mapping_data]) mapping_file = [mapping_header] + out_data # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # if no coords summary is applied, return None in the corresponding values # note that the value of clones will be != 0 for a comparison plot return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None,\ None, clones