def test_summarize_pcoas(self): """summarize_pcoas works """ master_pcoa = [['1', '2', '3'], \ array([[-1.0, 0.0, 1.0], [2.0, 4.0, -4.0]]), \ array([.76, .24])] jn1 = [['1', '2', '3'], \ array([[1.2, 0.1, -1.2],[-2.5, -4.0, 4.5]]), \ array([0.80, .20])] jn2 = [['1', '2', '3'], \ array([[-1.4, 0.05, 1.3],[2.6, 4.1, -4.7]]), \ array([0.76, .24])] jn3 = [['1', '2', '3'], \ array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \ array([0.84, .16])] jn4 = [['1', '2', '3'], \ array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \ array([0.84, .16])] support_pcoas = [jn1, jn2, jn3, jn4] #test with the ideal_fourths option matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, 'ideal_fourths', apply_procrustes=False) self.assertEqual(m_names, ['1', '2', '3']) self.assertFloatEqual(matrix_average[(0, 0)], -1.4) self.assertFloatEqual(matrix_average[(0, 1)], 0.0125) self.assertFloatEqual(matrix_low[(0, 0)], -1.5) self.assertFloatEqual(matrix_high[(0, 0)], -1.28333333) self.assertFloatEqual(matrix_low[(0, 1)], -0.0375) self.assertFloatEqual(matrix_high[(0, 1)], 0.05) self.assertFloatEqual(eigval_average[0], 0.81) self.assertFloatEqual(eigval_average[1], 0.19) #test with the IQR option matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, method='IQR', apply_procrustes=False) self.assertFloatEqual(matrix_low[(0, 0)], -1.5) self.assertFloatEqual(matrix_high[(0, 0)], -1.3) #test with procrustes option followed by sdev m, m1, msq = procrustes(master_pcoa[1], jn1[1]) m, m2, msq = procrustes(master_pcoa[1], jn2[1]) m, m3, msq = procrustes(master_pcoa[1], jn3[1]) m, m4, msq = procrustes(master_pcoa[1], jn4[1]) matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, method='sdev', apply_procrustes=True) x = array([m1[0, 0], m2[0, 0], m3[0, 0], m4[0, 0]]) self.assertEqual(x.mean(), matrix_average[0, 0]) self.assertEqual(-x.std(ddof=1) / 2, matrix_low[0, 0]) self.assertEqual(x.std(ddof=1) / 2, matrix_high[0, 0])
def test_summarize_pcoas(self): """summarize_pcoas works """ master_pcoa = [['1', '2', '3'], \ array([[-1.0, 0.0, 1.0], [2.0, 4.0, -4.0]]), \ array([.76, .24])] jn1 = [['1', '2', '3'], \ array([[1.2, 0.1, -1.2],[-2.5, -4.0, 4.5]]), \ array([0.80, .20])] jn2 = [['1', '2', '3'], \ array([[-1.4, 0.05, 1.3],[2.6, 4.1, -4.7]]), \ array([0.76, .24])] jn3 = [['1', '2', '3'], \ array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \ array([0.84, .16])] jn4 = [['1', '2', '3'], \ array([[-1.5, 0.05, 1.6],[2.4, 4.0, -4.8]]), \ array([0.84, .16])] support_pcoas = [jn1, jn2, jn3, jn4] #test with the ideal_fourths option matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, 'ideal_fourths', apply_procrustes=False) self.assertEqual(m_names, ['1', '2', '3']) self.assertFloatEqual(matrix_average[(0,0)], -1.4) self.assertFloatEqual(matrix_average[(0,1)], 0.0125) self.assertFloatEqual(matrix_low[(0,0)], -1.5) self.assertFloatEqual(matrix_high[(0,0)], -1.28333333) self.assertFloatEqual(matrix_low[(0,1)], -0.0375) self.assertFloatEqual(matrix_high[(0,1)], 0.05) self.assertFloatEqual(eigval_average[0], 0.81) self.assertFloatEqual(eigval_average[1], 0.19) #test with the IQR option matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, method='IQR', apply_procrustes=False) self.assertFloatEqual(matrix_low[(0,0)], -1.5) self.assertFloatEqual(matrix_high[(0,0)], -1.3) #test with procrustes option followed by sdev m, m1, msq = procrustes(master_pcoa[1],jn1[1]) m, m2, msq = procrustes(master_pcoa[1],jn2[1]) m, m3, msq = procrustes(master_pcoa[1],jn3[1]) m, m4, msq = procrustes(master_pcoa[1],jn4[1]) matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(master_pcoa, support_pcoas, method='sdev', apply_procrustes=True) x = array([m1[0,0],m2[0,0],m3[0,0],m4[0,0]]) self.assertEqual(x.mean(),matrix_average[0,0]) self.assertEqual(-x.std(ddof=1)/2,matrix_low[0,0]) self.assertEqual(x.std(ddof=1)/2,matrix_high[0,0])
def get_coord(coord_fname, method="IQR"): """Opens and returns coords location matrix and metadata. Also two spread matrices (+/-) if passed a dir of coord files. If only a single coord file, spread matrices are returned as None. """ if not os.path.isdir(coord_fname): try: coord_f = open(coord_fname, 'U') except (TypeError, IOError): raise MissingFileError('Coord file required for this analysis') coord_header, coords, eigvals, pct_var = parse_coords(coord_f) return [coord_header, coords, eigvals, pct_var, None, None] else: master_pcoa, support_pcoas = load_pcoa_files(coord_fname) # get Summary statistics coords, coords_low, coords_high, eigval_average, coord_header = \ summarize_pcoas(master_pcoa, support_pcoas, method=method) pct_var = master_pcoa[3] # should be getting this from an average # make_3d_plots expects coord_header to be a python list coord_header = list(master_pcoa[0]) return ([ coord_header, coords, eigval_average, pct_var, coords_low, coords_high ])
def get_coord(coord_fname, method="IQR"): """Opens and returns coords location matrix and metadata. Also two spread matrices (+/-) if passed a dir of coord files. If only a single coord file, spread matrices are returned as None. """ if not os.path.isdir(coord_fname): try: coord_f = open(coord_fname, 'U') except (TypeError, IOError): raise MissingFileError('Coord file required for this analysis') coord_header, coords, eigvals, pct_var = parse_coords(coord_f) return [coord_header, coords, eigvals, pct_var, None, None] else: master_pcoa, support_pcoas = load_pcoa_files(coord_fname) # get Summary statistics coords, coords_low, coords_high, eigval_average, coord_header = \ summarize_pcoas(master_pcoa, support_pcoas, method=method) pct_var = master_pcoa[3] # should be getting this from an average # make_3d_plots expects coord_header to be a python list coord_header = list(master_pcoa[0]) return ( [coord_header, coords, eigval_average, pct_var, coords_low, coords_high] )
def generate_2d_plots(prefs,data,html_dir_path,data_dir_path,filename, background_color,label_color,generate_scree): """Generate interactive 2D scatterplots""" coord_tups = [("1", "2"), ("3", "2"), ("1", "3")] mapping=data['map'] out_table='' #Iterate through prefs and generate html files for each colorby option #Sort by the column name first sample_location={} groups_and_colors=iter_color_groups(mapping,prefs) groups_and_colors=list(groups_and_colors) radiobuttons = [] for i in range(len(groups_and_colors)): labelname=groups_and_colors[i][0] #'EnvoID' groups=groups_and_colors[i][1] #defaultdict(<type 'list'>, {'mangrove biome/marine habitat/ocean water': ['BBA.number1.filt..660397', 'BBA.number2.filt..660380', ...} colors=groups_and_colors[i][2] #{'mangrove biome/marine habitat/ocean water': 'red5', 'Small lake biome/marine habitat/saline lake sediment': 'cyan1', data_colors=groups_and_colors[i][3]#{'orange1': <qiime.colors.Color object at 0x25f1210>, 'orange3': data_color_order=groups_and_colors[i][4]#['red1', 'blue1', 'orange1', 'green1', 'purple1', 'yellow1', 'cyan1', 'pink1', 'teal1', ...] data_file_dir_path = get_random_directory_name(output_dir=data_dir_path) new_link=os.path.split(data_file_dir_path) data_file_link=os.path.join('.', os.path.split(new_link[-2])[-1], \ new_link[-1]) new_col_name=labelname img_data = {} plot_label=labelname if data.has_key('support_pcoas'): matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(data['coord'], data['support_pcoas'], method=data['ellipsoid_method']) data['coord'] = \ (m_names,matrix_average,data['coord'][2],data['coord'][3]) for i in range(len(m_names)): sample_location[m_names[i]]=i else: matrix_average = None matrix_low = None matrix_high = None eigval_average = None m_names = None iterator=0 for coord_tup in coord_tups: # change, if you want more thatn one PCoA plot! (i.e involving PC3) if isarray(matrix_low) and isarray(matrix_high) and \ isarray(matrix_average): coord_1r=asarray(matrix_low) coord_2r=asarray(matrix_high) mat_ave=asarray(matrix_average) else: coord_1r=None coord_2r=None mat_ave=None sample_location=None coord_1, coord_2 = coord_tup img_data[coord_tup] = draw_pcoa_graph(plot_label,data_file_dir_path, data_file_link,coord_1,coord_2, coord_1r, coord_2r, mat_ave,\ sample_location, data,prefs,groups,colors, background_color,label_color, data_colors,data_color_order, generate_eps=True) radiobuttons.append(RADIO % (data_file_link, labelname)) if i == 0: ## only create first table! out_table += TABLE_HTML % (labelname, "<br>".join(img_data[("1", "2")]), "<br>".join(img_data[("3", "2")]), "<br>".join(img_data[("1", "3")])) if generate_scree: data_file_dir_path = get_random_directory_name(output_dir = data_dir_path) new_link = os.path.split(data_file_dir_path) data_file_link = os.path.join('.', os.path.split(new_link[-2])[-1], new_link[-1]) img_src, download_link = draw_scree_graph(data_file_dir_path, data_file_link, background_color, label_color, generate_eps = True, data = data) out_table += SCREE_TABLE_HTML % ("<br>".join((img_src, download_link))) out_table = "\n".join(radiobuttons) + out_table outfile = create_html_filename(filename,'.html') outfile = os.path.join(html_dir_path,outfile) write_html_file(out_table,outfile)
def generate_2d_plots(prefs, data, html_dir_path, data_dir_path, filename, background_color, label_color, generate_scree): """Generate interactive 2D scatterplots""" coord_tups = [("1", "2"), ("3", "2"), ("1", "3")] mapping = data['map'] out_table = '' #Iterate through prefs and generate html files for each colorby option #Sort by the column name first sample_location = {} groups_and_colors = iter_color_groups(mapping, prefs) groups_and_colors = list(groups_and_colors) for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] data_color_order = groups_and_colors[i][4] data_file_dir_path = get_random_directory_name( output_dir=data_dir_path) new_link = os.path.split(data_file_dir_path) data_file_link=os.path.join('.', os.path.split(new_link[-2])[-1], \ new_link[-1]) new_col_name = labelname img_data = {} plot_label = labelname if data.has_key('support_pcoas'): matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(data['coord'], data['support_pcoas'], method=data['ellipsoid_method']) data['coord'] = \ (m_names,matrix_average,data['coord'][2],data['coord'][3]) for i in range(len(m_names)): sample_location[m_names[i]] = i else: matrix_average = None matrix_low = None matrix_high = None eigval_average = None m_names = None iterator = 0 for coord_tup in coord_tups: if isarray(matrix_low) and isarray(matrix_high) and \ isarray(matrix_average): coord_1r = asarray(matrix_low) coord_2r = asarray(matrix_high) mat_ave = asarray(matrix_average) else: coord_1r = None coord_2r = None mat_ave = None sample_location = None coord_1, coord_2 = coord_tup img_data[coord_tup] = draw_pcoa_graph(plot_label,data_file_dir_path, data_file_link,coord_1,coord_2, coord_1r, coord_2r, mat_ave,\ sample_location, data,prefs,groups,colors, background_color,label_color, data_colors,data_color_order, generate_eps=True) out_table += TABLE_HTML % (labelname, "<br>".join( img_data[("1", "2")]), "<br>".join( img_data[("3", "2")]), "<br>".join(img_data[("1", "3")])) if generate_scree: data_file_dir_path = get_random_directory_name( output_dir=data_dir_path) new_link = os.path.split(data_file_dir_path) data_file_link = os.path.join('.', os.path.split(new_link[-2])[-1], new_link[-1]) img_src, download_link = draw_scree_graph(data_file_dir_path, data_file_link, background_color, label_color, generate_eps=True, data=data) out_table += SCREE_TABLE_HTML % ("<br>".join((img_src, download_link))) outfile = create_html_filename(filename, '.html') outfile = os.path.join(html_dir_path, outfile) write_html_file(out_table, outfile)
def generate_2d_plots(prefs, data, html_dir_path, data_dir_path, filename, background_color, label_color, generate_scree): """Generate interactive 2D scatterplots""" coord_tups = [("1", "2"), ("3", "2"), ("1", "3")] mapping = data['map'] out_table = '' # Iterate through prefs and generate html files for each colorby option # Sort by the column name first sample_location = {} groups_and_colors = iter_color_groups(mapping, prefs) groups_and_colors = list(groups_and_colors) for i in range(len(groups_and_colors)): labelname = groups_and_colors[i][0] groups = groups_and_colors[i][1] colors = groups_and_colors[i][2] data_colors = groups_and_colors[i][3] data_color_order = groups_and_colors[i][4] data_file_dir_path = mkdtemp(dir=data_dir_path) new_link = os.path.split(data_file_dir_path) data_file_link = os.path.join('.', os.path.split(new_link[-2])[-1], new_link[-1]) new_col_name = labelname img_data = {} plot_label = labelname if 'support_pcoas' in data: matrix_average, matrix_low, matrix_high, eigval_average, m_names = \ summarize_pcoas(data['coord'], data['support_pcoas'], method=data['ellipsoid_method']) data['coord'] = \ (m_names, matrix_average, data['coord'][2], data['coord'][3]) for i in range(len(m_names)): sample_location[m_names[i]] = i else: matrix_average = None matrix_low = None matrix_high = None eigval_average = None m_names = None iterator = 0 for coord_tup in coord_tups: if isarray(matrix_low) and isarray(matrix_high) and \ isarray(matrix_average): coord_1r = asarray(matrix_low) coord_2r = asarray(matrix_high) mat_ave = asarray(matrix_average) else: coord_1r = None coord_2r = None mat_ave = None sample_location = None coord_1, coord_2 = coord_tup img_data[coord_tup] = draw_pcoa_graph( plot_label, data_file_dir_path, data_file_link, coord_1, coord_2, coord_1r, coord_2r, mat_ave, sample_location, data, prefs, groups, colors, background_color, label_color, data_colors, data_color_order, generate_eps=True) out_table += TABLE_HTML % (labelname, "<br>".join(img_data[("1", "2")]), "<br>".join(img_data[("3", "2")]), "<br>".join(img_data[("1", "3")])) if generate_scree: data_file_dir_path = mkdtemp(dir=data_dir_path) new_link = os.path.split(data_file_dir_path) data_file_link = os.path.join( '.', os.path.split(new_link[-2])[-1], new_link[-1]) img_src, download_link = draw_scree_graph( data_file_dir_path, data_file_link, background_color, label_color, generate_eps=True, data=data) out_table += SCREE_TABLE_HTML % ("<br>".join((img_src, download_link))) outfile = create_html_filename(filename, '.html') outfile = os.path.join(html_dir_path, outfile) write_html_file(out_table, outfile)
def preprocess_coords_file(coords_header, coords_data, coords_eigenvals, coords_pct, mapping_header, mapping_data, custom_axes=None, jackknifing_method=None, is_comparison=False): """Process a PCoA data and handle customizations in the contents Inputs: coords_header: list of sample identifiers in the PCoA file _or_ list of lists with sample identifiers for each coordinate file (if jackknifing or comparing plots) coords_data: matrix of coordinates in the PCoA file _or_ list of numpy arrays with coordinates for each file (if jackknifing or comparing plots) coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_ list of numpy arrays with the eigenvalues (if jackknifing or comparing plots ) coords_pct: numpy array with a the percent explained by each principal coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or comparing plots) mapping_header: mapping file headers names mapping_data: mapping file data custom_axes: name of the mapping data fields to add to coords_data jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info see qiime.util.summarize_pcoas is_comparison: whether or not the inputs should be considered as the ones for a comparison plot Outputs: coords_header: list of sample identifiers in the PCoA file coords_data: matrix of coordinates in the PCoA file with custom_axes if provided coords_eigenvalues: either the eigenvalues of the input coordinates or the average eigenvalues of the multiple coords that were passed in coords_pct: list of percents explained by each axis as given by the master coordinates i. e. the center around where the values revolve coords_low: coordinates representing the lower edges of an ellipse; None if no jackknifing is applied coords_high: coordinates representing the highere edges of an ellipse; None if no jackknifing is applied clones: total number of input files This controller function handles any customization that has to be done to the PCoA data prior to the formatting. Note that the first element in each list (coords, headers, eigenvalues & percents) will be considered the master set of coordinates. Raises: AssertionError if a comparison plot is requested but a list of data is not passed as input """ # prevent obscure and obfuscated errors if is_comparison: assert type(coords_data) == list, "Cannot process a comparison with "+\ "the data from a single coordinates file" mapping_file = [mapping_header] + mapping_data coords_file = [coords_header, coords_data] # number PCoA files; zero for any case except for comparison plots clones = 0 if custom_axes and type(coords_data) == ndarray: # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) elif type(coords_data) == list and is_comparison == False: # take the first pcoa file as the master set of coordinates master_pcoa = [ coords_header[0], coords_data[0], coords_eigenvals[0], coords_pct[0] ] # support pcoas must be a list of lists where each list contain # all the elements that compose a coordinates file support_pcoas = [[h, d, e, p] for h, d, e, p in zip( coords_header, coords_data, coords_eigenvals, coords_pct)] # do not apply procrustes, at least not for now coords_data, coords_low, coords_high, eigenvalues_average,\ identifiers = summarize_pcoas(master_pcoa, support_pcoas, method=jackknifing_method, apply_procrustes=False) # custom axes and jackknifing is a tricky thing to do, you only have to # add the custom values to the master file which is represented as the # coords_data return value. Since there is really no variation in that # axis then you have to change the values of coords_high and of # coords_low to something really small so that WebGL work properly if custom_axes: coords_file = [master_pcoa[0], coords_data] get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # this opens support for as many custom axes as needed axes = len(custom_axes) coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes]) coords_high[:, 0:axes] = ones([coords_high.shape[0], axes]) * 0.00001 coords_data = coords_file[1] # return a value containing coords_low and coords_high return identifiers, coords_data, eigenvalues_average, master_pcoa[3],\ coords_low, coords_high, clones # comparison plots are processed almost individually elif type(coords_data) == list and is_comparison: # indicates the number of files that were totally processed so other # functions/APIs are aware of how many times to replicate the metadata clones = len(coords_data) out_headers, out_coords = [], [] for index in range(0, clones): headers_i = coords_header[index] coords_i = coords_data[index] # tag each header with the the number in which those coords came in out_headers.extend( [element + '_%d' % index for element in headers_i]) if index == 0: # numpy can only stack things if they have the same shape out_coords = coords_i # the eigenvalues and percents explained are really the ones # belonging to the the first set of coordinates that was passed coords_eigenvals = coords_eigenvals[index] coords_pct = coords_pct[index] else: out_coords = vstack((out_coords, coords_i)) coords_file = [out_headers, out_coords] if custom_axes: # this condition deals with the fact that in order for the custom # axes to be added into the original coordinates, we have to add the # suffix for the sample identifiers that the coordinates have if clones: out_data = [] for index in range(0, clones): out_data.extend([[element[0] + '_%d' % index] + element[1::] for element in mapping_data]) mapping_file = [mapping_header] + out_data # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # if no coords summary is applied, return None in the corresponding values # note that the value of clones will be != 0 for a comparison plot return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None,\ None, clones
def preprocess_coords_file( coords_header, coords_data, coords_eigenvals, coords_pct, mapping_header, mapping_data, custom_axes=None, jackknifing_method=None, is_comparison=False, ): """Process a PCoA data and handle customizations in the contents Inputs: coords_header: list of sample identifiers in the PCoA file _or_ list of lists with sample identifiers for each coordinate file (if jackknifing or comparing plots) coords_data: matrix of coordinates in the PCoA file _or_ list of numpy arrays with coordinates for each file (if jackknifing or comparing plots) coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_ list of numpy arrays with the eigenvalues (if jackknifing or comparing plots ) coords_pct: numpy array with a the percent explained by each principal coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or comparing plots) mapping_header: mapping file headers names mapping_data: mapping file data custom_axes: name of the mapping data fields to add to coords_data jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info see qiime.util.summarize_pcoas is_comparison: whether or not the inputs should be considered as the ones for a comparison plot Outputs: coords_header: list of sample identifiers in the PCoA file coords_data: matrix of coordinates in the PCoA file with custom_axes if provided coords_eigenvalues: either the eigenvalues of the input coordinates or the average eigenvalues of the multiple coords that were passed in coords_pct: list of percents explained by each axis as given by the master coordinates i. e. the center around where the values revolve coords_low: coordinates representing the lower edges of an ellipse; None if no jackknifing is applied coords_high: coordinates representing the highere edges of an ellipse; None if no jackknifing is applied clones: total number of input files This controller function handles any customization that has to be done to the PCoA data prior to the formatting. Note that the first element in each list (coords, headers, eigenvalues & percents) will be considered the master set of coordinates. Raises: AssertionError if a comparison plot is requested but a list of data is not passed as input """ # prevent obscure and obfuscated errors if is_comparison: assert type(coords_data) == list, ( "Cannot process a comparison with " + "the data from a single coordinates file" ) mapping_file = [mapping_header] + mapping_data coords_file = [coords_header, coords_data] # number PCoA files; zero for any case except for comparison plots clones = 0 if custom_axes and type(coords_data) == ndarray: # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) elif type(coords_data) == list and is_comparison == False: # take the first pcoa file as the master set of coordinates master_pcoa = [coords_header[0], coords_data[0], coords_eigenvals[0], coords_pct[0]] # support pcoas must be a list of lists where each list contain # all the elements that compose a coordinates file support_pcoas = [[h, d, e, p] for h, d, e, p in zip(coords_header, coords_data, coords_eigenvals, coords_pct)] # do not apply procrustes, at least not for now coords_data, coords_low, coords_high, eigenvalues_average, identifiers = summarize_pcoas( master_pcoa, support_pcoas, method=jackknifing_method, apply_procrustes=False ) # custom axes and jackknifing is a tricky thing to do, you only have to # add the custom values to the master file which is represented as the # coords_data return value. Since there is really no variation in that # axis then you have to change the values of coords_high and of # coords_low to something really small so that WebGL work properly if custom_axes: coords_file = [master_pcoa[0], coords_data] get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # this opens support for as many custom axes as needed axes = len(custom_axes) coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes]) coords_high[:, 0:axes] = ones([coords_high.shape[0], axes]) * 0.00001 coords_data = coords_file[1] # return a value containing coords_low and coords_high return identifiers, coords_data, eigenvalues_average, master_pcoa[3], coords_low, coords_high, clones # comparison plots are processed almost individually elif type(coords_data) == list and is_comparison: # indicates the number of files that were totally processed so other # functions/APIs are aware of how many times to replicate the metadata clones = len(coords_data) out_headers, out_coords = [], [] for index in range(0, clones): headers_i = coords_header[index] coords_i = coords_data[index] # tag each header with the the number in which those coords came in out_headers.extend([element + "_%d" % index for element in headers_i]) if index == 0: # numpy can only stack things if they have the same shape out_coords = coords_i # the eigenvalues and percents explained are really the ones # belonging to the the first set of coordinates that was passed coords_eigenvals = coords_eigenvals[index] coords_pct = coords_pct[index] else: out_coords = vstack((out_coords, coords_i)) coords_file = [out_headers, out_coords] if custom_axes: # this condition deals with the fact that in order for the custom # axes to be added into the original coordinates, we have to add the # suffix for the sample identifiers that the coordinates have if clones: out_data = [] for index in range(0, clones): out_data.extend([[element[0] + "_%d" % index] + element[1::] for element in mapping_data]) mapping_file = [mapping_header] + out_data # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # if no coords summary is applied, return None in the corresponding values # note that the value of clones will be != 0 for a comparison plot return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None, None, clones