def test_remove_nans(self): """remove_nans: Deletes any samples with NANs in their coordinates""" coord_data = array([[10,60,-0.219044992,0.079674486,0.09233683], [20,55,-0.042258081, nan,0.024837603], [30,50,0.080504323,-0.212014503,-0.088353435]]) coords = [self.coord_header, coord_data] remove_nans(coords) exp_header = ["Sample1","Sample3"] exp_coords = array([[10,60,-0.219044992,0.079674486,0.09233683], [30,50,0.080504323,-0.212014503,-0.088353435]]) self.assertEqual(coords[0],exp_header) self.assertEqual(coords[1],exp_coords)
def test_remove_nans(self): """remove_nans: Deletes any samples with NANs in their coordinates""" coord_data = array([[10, 60, -0.219044992, 0.079674486, 0.09233683], [20, 55, -0.042258081, nan, 0.024837603], [30, 50, 0.080504323, -0.212014503, -0.088353435]]) coords = [self.coord_header, coord_data] remove_nans(coords) exp_header = ["Sample1", "Sample3"] exp_coords = array([[10, 60, -0.219044992, 0.079674486, 0.09233683], [30, 50, 0.080504323, -0.212014503, -0.088353435]]) self.assertEqual(coords[0], exp_header) self.assertEqual(coords[1], exp_coords)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) prefs, data, background_color, label_color, ball_scale, arrow_colors = \ sample_color_prefs_and_map_data_from_options(opts) if len(opts.coord_fnames.split(',')) < 2 and opts.edges_file is None: option_parser.error('Please provide at least two ' +\ 'coordinate files or a custom edges file') #Open and get coord data (for multiple coords files) coord_files = process_coord_filenames(opts.coord_fnames) coord_files_valid = validate_coord_files(coord_files) if not coord_files_valid: option_parser.error('Every line of every coord file must ' +\ 'have the same number of columns.') num_coord_files = len(coord_files) data['edges'], data['coord'] = \ get_multiple_coords(coord_files, opts.edges_file, opts.serial) # if the edges file wasn't supplied, we appended _i to each file's samples # therefore we now add duplicated samples with _0, _1,... to mapping file if opts.edges_file is None: newmap = [data['map'][0]] for i in xrange(len(coord_files)): for sample in data['map'][1:]: newsample = ['%s_%d' %(sample[0],i)] newsample.extend(sample[1:]) newmap.append(newsample) data['map'] = newmap # remove any samples not present in mapping file remove_unmapped_samples(data['map'],data['coord'],data['edges']) if(len(data['coord'][1]) == 0): raise ValueError, '\n\nError: None of the sample IDs in the coordinates files were present in the mapping file.\n' # process custom axes, if present. custom_axes = None if opts.custom_axes: custom_axes = process_custom_axes(opts.custom_axes) get_custom_coords(custom_axes, data['map'], data['coord']) remove_nans(data['coord']) scale_custom_coords(custom_axes,data['coord']) # Generate random output file name and create directories if opts.output_dir: create_dir(opts.output_dir) dir_path = opts.output_dir else: dir_path='./' qiime_dir=get_qiime_project_dir() jar_path=os.path.join(qiime_dir,'qiime/support_files/jar/') data_dir_path = get_random_directory_name(output_dir=dir_path, return_absolute_path=False) try: os.mkdir(data_dir_path) except OSError: pass jar_dir_path = os.path.join(dir_path,'jar') try: os.mkdir(jar_dir_path) except OSError: pass shutil.copyfile(os.path.join(jar_path,'king.jar'), os.path.join(jar_dir_path,'king.jar')) filepath=coord_files[0] filename=filepath.strip().split('/')[-1] try: action = generate_3d_plots except NameError: action = None #Place this outside try/except so we don't mask NameError in action if action: generate_3d_plots(prefs, data, custom_axes, background_color, label_color, dir_path, data_dir_path, filename, ball_scale=ball_scale, arrow_colors=arrow_colors, user_supplied_edges=not(opts.edges_file is None))
def main(): print "\nWarning: make_3d_plots.py is being deprecated in favor of make_emperor.py, and will no longer be available in QIIME 1.8.0-dev.\n" option_parser, opts, args = parse_command_line_parameters(**script_info) prefs, data, background_color, label_color, ball_scale, arrow_colors= \ sample_color_prefs_and_map_data_from_options(opts) plot_scaled= 'scaled' in opts.scaling_method plot_unscaled= 'unscaled' in opts.scaling_method if opts.output_format == 'invue': # validating the number of points for interpolation if (opts.interpolation_points<0): option_parser.error('The --interpolation_points should be ' +\ 'greater or equal to 0.') # make sure that coord file has internally consistent # of columns coord_files_valid = validate_coord_files(opts.coord_fname) if not coord_files_valid: option_parser.error('Every line of every coord file must ' +\ 'have the same number of columns.') #Open and get coord data data['coord'] = get_coord(opts.coord_fname, opts.ellipsoid_method) # remove any samples not present in mapping file remove_unmapped_samples(data['map'],data['coord']) # if no samples overlapped between mapping file and otu table, exit if len(data['coord'][0]) == 0: print "\nError: OTU table and mapping file had no samples in common\n" exit(1) if opts.output_dir: create_dir(opts.output_dir,False) dir_path=opts.output_dir else: dir_path='./' filepath=opts.coord_fname if os.path.isdir(filepath): coord_files = [fname for fname in os.listdir(filepath) if not \ fname.startswith('.')] filename = os.path.split(coord_files[0])[-1] else: filename = os.path.split(filepath)[-1] generate_3d_plots_invue(prefs, data, dir_path, filename, \ opts.interpolation_points, opts.polyhedron_points, \ opts.polyhedron_offset) #finish script return # Potential conflicts if not opts.custom_axes is None and os.path.isdir(opts.coord_fname): # can't do averaged pcoa plots _and_ custom axes in the same plot option_parser.error("Please supply either custom axes or multiple coordinate \ files, but not both.") # check that smoothness is an integer between 0 and 3 try: ellipsoid_smoothness = int(opts.ellipsoid_smoothness) except: option_parser.error("Please supply an integer ellipsoid smoothness \ value.") if ellipsoid_smoothness < 0 or ellipsoid_smoothness > 3: option_parser.error("Please supply an ellipsoid smoothness value \ between 0 and 3.") # check that opacity is a float between 0 and 1 try: ellipsoid_alpha = float(opts.ellipsoid_opacity) except: option_parser.error("Please supply a number for ellipsoid opacity.") if ellipsoid_alpha < 0 or ellipsoid_alpha > 1: option_parser.error("Please supply an ellipsoid opacity value \ between 0 and 1.") # check that ellipsoid method is valid ellipsoid_methods = ['IQR','sdev'] if not opts.ellipsoid_method in ellipsoid_methods: option_parser.error("Please supply a valid ellipsoid method. \ Valid methods are: " + ', '.join(ellipsoid_methods) + ".") # gather ellipsoid drawing preferences ellipsoid_prefs = {} ellipsoid_prefs["smoothness"] = ellipsoid_smoothness ellipsoid_prefs["alpha"] = ellipsoid_alpha # make sure that coord file has internally consistent # of columns coord_files_valid = validate_coord_files(opts.coord_fname) if not coord_files_valid: option_parser.error('Every line of every coord file must ' +\ 'have the same number of columns.') #Open and get coord data data['coord'] = get_coord(opts.coord_fname, opts.ellipsoid_method) # remove any samples not present in mapping file remove_unmapped_samples(data['map'],data['coord']) # if no samples overlapped between mapping file and otu table, exit if len(data['coord'][0]) == 0: print "\nError: OTU table and mapping file had no samples in common\n" exit(1) # process custom axes, if present. custom_axes = None if opts.custom_axes: custom_axes = process_custom_axes(opts.custom_axes) get_custom_coords(custom_axes, data['map'], data['coord']) remove_nans(data['coord']) scale_custom_coords(custom_axes,data['coord']) # process vectors if requested if opts.add_vectors: add_vectors={} add_vectors['vectors'] = opts.add_vectors.split(',') add_vectors['weight_by_vector'] = opts.weight_by_vector if len(add_vectors)>3: raise ValueError, 'You must add maximum 3 columns but %s' % opts.add_vectors # Validating Vectors values if opts.vectors_algorithm: axes_number = len(data['coord'][1][1]) if opts.vectors_axes<0 or opts.vectors_axes>axes_number: raise ValueError, 'vectors_algorithm should be between 0 and the max number' +\ 'of samples/pcoa-axes: %d' % len(data['coord'][1][1]) if opts.vectors_axes == 0: opts.vectors_axes = axes_number add_vectors['vectors_axes'] = opts.vectors_axes valid_chars = '_.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' for c in opts.vectors_path: if c not in valid_chars: raise ValueError, 'vectors_path (%s) has invalid chars' % opts.vectors_path add_vectors['vectors_output'] = {} add_vectors['vectors_algorithm']=opts.vectors_algorithm add_vectors['eigvals'] = data['coord'][3] add_vectors['window_size'] = None # checks specific for the modified first difference algorithm if add_vectors['vectors_algorithm'] == 'wdiff': try: add_vectors['window_size'] = int(opts.window_size) except TypeError: raise TypeError, 'Specify --window_size as an integer' # sanity check as the value can only be greater or equal to one if add_vectors['window_size'] < 1: raise ValueError, 'The value of window_size is invalid, '+\ 'the value must be greater than zero, not %d' % add_vectors['window_size'] else: add_vectors['vectors_algorithm'] = None add_vectors['vectors_path'] = opts.vectors_path else: add_vectors = None if opts.taxa_fname != None: # get taxonomy counts # get list of sample_ids that haven't been removed sample_ids = data['coord'][0] # get taxa summaries for all sample_ids lineages, taxa_counts = get_taxa(opts.taxa_fname, sample_ids) data['taxa'] = {} data['taxa']['lineages'] = lineages data['taxa']['counts'] = taxa_counts # get average relative abundance of taxa data['taxa']['prevalence'] = get_taxa_prevalence(data['taxa']['counts']) # get coordinates of taxa (weighted mean of sample scores) data['taxa']['coord'] = get_taxa_coords(data['taxa']['counts'], data['coord'][1]) # trim results, do NOT change order # check: https://github.com/qiime/qiime/issues/677 remove_rare_taxa(data['taxa'],nkeep=opts.n_taxa_keep) # write taxa coords if requested if not opts.biplot_output_file is None: output = make_biplot_scores_output(data['taxa']) fout = open(opts.biplot_output_file,'w') fout.write('\n'.join(output)) fout.close() if opts.output_dir: create_dir(opts.output_dir,False) dir_path=opts.output_dir else: dir_path='./' qiime_dir=get_qiime_project_dir() jar_path=os.path.join(qiime_dir,'qiime/support_files/jar/') data_dir_path = get_random_directory_name(output_dir=dir_path, return_absolute_path=False) try: os.mkdir(data_dir_path) except OSError: pass data_file_path=data_dir_path jar_dir_path = os.path.join(dir_path,'jar') try: os.mkdir(jar_dir_path) except OSError: pass shutil.copyfile(os.path.join(jar_path,'king.jar'), os.path.join(jar_dir_path,'king.jar')) filepath=opts.coord_fname if os.path.isdir(filepath): coord_files = [fname for fname in os.listdir(filepath) if not \ fname.startswith('.')] filename = os.path.split(coord_files[0])[-1] else: filename = os.path.split(filepath)[-1] try: action = generate_3d_plots except NameError: action = None #Place this outside try/except so we don't mask NameError in action if action: action(prefs,data,custom_axes,background_color,label_color,dir_path, \ data_file_path,filename,ellipsoid_prefs=ellipsoid_prefs, \ add_vectors=add_vectors, plot_scaled=plot_scaled, \ plot_unscaled=plot_unscaled)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) prefs, data, background_color, label_color, ball_scale, arrow_colors = \ sample_color_prefs_and_map_data_from_options(opts) if len(opts.coord_fnames) < 2 and opts.edges_file is None: option_parser.error('Please provide at least two ' +\ 'coordinate files or a custom edges file') #Open and get coord data (for multiple coords files) coord_files = opts.coord_fnames coord_files_valid = validate_coord_files(coord_files) if not coord_files_valid: option_parser.error('Every line of every coord file must ' +\ 'have the same number of columns.') num_coord_files = len(coord_files) data['edges'], data['coord'] = \ get_multiple_coords(coord_files, opts.edges_file, opts.serial) # if the edges file wasn't supplied, we appended _i to each file's samples # therefore we now add duplicated samples with _0, _1,... to mapping file if opts.edges_file is None: newmap = [data['map'][0]] for i in xrange(len(coord_files)): for sample in data['map'][1:]: newsample = ['%s_%d' %(sample[0],i)] newsample.extend(sample[1:]) newmap.append(newsample) data['map'] = newmap # remove any samples not present in mapping file remove_unmapped_samples(data['map'],data['coord'],data['edges']) if(len(data['coord'][1]) == 0): raise ValueError, '\n\nError: None of the sample IDs in the coordinates files were present in the mapping file.\n' # process custom axes, if present. custom_axes = None if opts.custom_axes: custom_axes = process_custom_axes(opts.custom_axes) get_custom_coords(custom_axes, data['map'], data['coord']) remove_nans(data['coord']) scale_custom_coords(custom_axes,data['coord']) # Generate random output file name and create directories if opts.output_dir: create_dir(opts.output_dir) dir_path = opts.output_dir else: dir_path='./' qiime_dir=get_qiime_project_dir() jar_path=os.path.join(qiime_dir,'qiime/support_files/jar/') data_dir_path = get_random_directory_name(output_dir=dir_path, return_absolute_path=False) try: os.mkdir(data_dir_path) except OSError: pass jar_dir_path = os.path.join(dir_path,'jar') try: os.mkdir(jar_dir_path) except OSError: pass shutil.copyfile(os.path.join(jar_path,'king.jar'), os.path.join(jar_dir_path,'king.jar')) filepath=coord_files[0] filename=filepath.strip().split('/')[-1] try: action = generate_3d_plots except NameError: action = None #Place this outside try/except so we don't mask NameError in action if action: generate_3d_plots(prefs, data, custom_axes, background_color, label_color, dir_path, data_dir_path, filename, ball_scale=ball_scale, arrow_colors=arrow_colors, user_supplied_edges=not(opts.edges_file is None))
def main(): print "\nWarning: compare_3d_plots.py is being deprecated in favor of make_emperor.py, and will no longer be available in QIIME 1.8.0-dev.\n" option_parser, opts, args = parse_command_line_parameters(**script_info) prefs, data, background_color, label_color, ball_scale, arrow_colors = sample_color_prefs_and_map_data_from_options( opts ) if len(opts.coord_fnames) < 2 and opts.edges_file is None: option_parser.error("Please provide at least two " + "coordinate files or a custom edges file") # Open and get coord data (for multiple coords files) coord_files = opts.coord_fnames coord_files_valid = validate_coord_files(coord_files) if not coord_files_valid: option_parser.error("Every line of every coord file must " + "have the same number of columns.") num_coord_files = len(coord_files) data["edges"], data["coord"] = get_multiple_coords(coord_files, opts.edges_file, opts.serial) # if the edges file wasn't supplied, we appended _i to each file's samples # therefore we now add duplicated samples with _0, _1,... to mapping file if opts.edges_file is None: newmap = [data["map"][0]] for i in xrange(len(coord_files)): for sample in data["map"][1:]: newsample = ["%s_%d" % (sample[0], i)] newsample.extend(sample[1:]) newmap.append(newsample) data["map"] = newmap # remove any samples not present in mapping file remove_unmapped_samples(data["map"], data["coord"], data["edges"]) if len(data["coord"][1]) == 0: raise ValueError, "\n\nError: None of the sample IDs in the coordinates files were present in the mapping file.\n" # process custom axes, if present. custom_axes = None if opts.custom_axes: custom_axes = process_custom_axes(opts.custom_axes) get_custom_coords(custom_axes, data["map"], data["coord"]) remove_nans(data["coord"]) scale_custom_coords(custom_axes, data["coord"]) # Generate random output file name and create directories if opts.output_dir: create_dir(opts.output_dir) dir_path = opts.output_dir else: dir_path = "./" qiime_dir = get_qiime_project_dir() jar_path = os.path.join(qiime_dir, "qiime/support_files/jar/") data_dir_path = get_random_directory_name(output_dir=dir_path, return_absolute_path=False) try: os.mkdir(data_dir_path) except OSError: pass jar_dir_path = os.path.join(dir_path, "jar") try: os.mkdir(jar_dir_path) except OSError: pass shutil.copyfile(os.path.join(jar_path, "king.jar"), os.path.join(jar_dir_path, "king.jar")) filepath = coord_files[0] filename = filepath.strip().split("/")[-1] try: action = generate_3d_plots except NameError: action = None # Place this outside try/except so we don't mask NameError in action if action: generate_3d_plots( prefs, data, custom_axes, background_color, label_color, dir_path, data_dir_path, filename, ball_scale=ball_scale, arrow_colors=arrow_colors, user_supplied_edges=not (opts.edges_file is None), )
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) prefs, data, background_color, label_color, ball_scale, arrow_colors = sample_color_prefs_and_map_data_from_options( opts ) scaling_methods = opts.scaling_method.split(",") plot_scaled = False plot_unscaled = False for i in scaling_methods: if i.lower() == "scaled": plot_scaled = True elif i.lower() == "unscaled": plot_unscaled = True if not (plot_scaled or plot_unscaled): raise ValueError, "You must choose a valid scaling method (scaled or unscaled)" if opts.output_format == "invue": # validating the number of points for interpolation if opts.interpolation_points < 0: option_parser.error("The --interpolation_points should be " + "greater or equal to 0.") # make sure that coord file has internally consistent # of columns coord_files_valid = validate_coord_files(opts.coord_fname) if not coord_files_valid: option_parser.error("Every line of every coord file must " + "have the same number of columns.") # Open and get coord data data["coord"] = get_coord(opts.coord_fname, opts.ellipsoid_method) # remove any samples not present in mapping file remove_unmapped_samples(data["map"], data["coord"]) # if no samples overlapped between mapping file and otu table, exit if len(data["coord"][0]) == 0: print "\nError: OTU table and mapping file had no samples in common\n" exit(1) if opts.output_dir: create_dir(opts.output_dir, False) dir_path = opts.output_dir else: dir_path = "./" filepath = opts.coord_fname if os.path.isdir(filepath): coord_files = [fname for fname in os.listdir(filepath) if not fname.startswith(".")] filename = os.path.split(coord_files[0])[-1] else: filename = os.path.split(filepath)[-1] generate_3d_plots_invue( prefs, data, dir_path, filename, opts.interpolation_points, opts.polyhedron_points, opts.polyhedron_offset ) # finish script return # Potential conflicts if not opts.custom_axes is None and os.path.isdir(opts.coord_fname): # can't do averaged pcoa plots _and_ custom axes in the same plot option_parser.error( "Please supply either custom axes or multiple coordinate \ files, but not both." ) # check that smoothness is an integer between 0 and 3 try: ellipsoid_smoothness = int(opts.ellipsoid_smoothness) except: option_parser.error( "Please supply an integer ellipsoid smoothness \ value." ) if ellipsoid_smoothness < 0 or ellipsoid_smoothness > 3: option_parser.error( "Please supply an ellipsoid smoothness value \ between 0 and 3." ) # check that opacity is a float between 0 and 1 try: ellipsoid_alpha = float(opts.ellipsoid_opacity) except: option_parser.error("Please supply a number for ellipsoid opacity.") if ellipsoid_alpha < 0 or ellipsoid_alpha > 1: option_parser.error( "Please supply an ellipsoid opacity value \ between 0 and 1." ) # check that ellipsoid method is valid ellipsoid_methods = ["IQR", "sdev"] if not opts.ellipsoid_method in ellipsoid_methods: option_parser.error( "Please supply a valid ellipsoid method. \ Valid methods are: " + ", ".join(ellipsoid_methods) + "." ) # gather ellipsoid drawing preferences ellipsoid_prefs = {} ellipsoid_prefs["smoothness"] = ellipsoid_smoothness ellipsoid_prefs["alpha"] = ellipsoid_alpha # make sure that coord file has internally consistent # of columns coord_files_valid = validate_coord_files(opts.coord_fname) if not coord_files_valid: option_parser.error("Every line of every coord file must " + "have the same number of columns.") # Open and get coord data data["coord"] = get_coord(opts.coord_fname, opts.ellipsoid_method) # remove any samples not present in mapping file remove_unmapped_samples(data["map"], data["coord"]) # if no samples overlapped between mapping file and otu table, exit if len(data["coord"][0]) == 0: print "\nError: OTU table and mapping file had no samples in common\n" exit(1) # process custom axes, if present. custom_axes = None if opts.custom_axes: custom_axes = process_custom_axes(opts.custom_axes) get_custom_coords(custom_axes, data["map"], data["coord"]) remove_nans(data["coord"]) scale_custom_coords(custom_axes, data["coord"]) # process vectors if requested if opts.add_vectors: add_vectors = {} add_vectors["vectors"] = opts.add_vectors.split(",") add_vectors["weight_by_vector"] = opts.weight_by_vector if len(add_vectors) > 3: raise ValueError, "You must add maximum 3 columns but %s" % opts.add_vectors # Validating Vectors values if opts.vectors_algorithm: axes_number = len(data["coord"][1][1]) if opts.vectors_axes < 0 or opts.vectors_axes > axes_number: raise ValueError, "vectors_algorithm should be between 0 and the max number" + "of samples/pcoa-axes: %d" % len( data["coord"][1][1] ) if opts.vectors_axes == 0: opts.vectors_axes = axes_number add_vectors["vectors_axes"] = opts.vectors_axes valid_chars = "_.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" for c in opts.vectors_path: if c not in valid_chars: raise ValueError, "vectors_path (%s) has invalid chars" % opts.vectors_path add_vectors["vectors_output"] = {} add_vectors["vectors_algorithm"] = opts.vectors_algorithm add_vectors["eigvals"] = data["coord"][3] add_vectors["window_size"] = None # checks specific for the modified first difference algorithm if add_vectors["vectors_algorithm"] == "wdiff": try: add_vectors["window_size"] = int(opts.window_size) except TypeError: raise TypeError, "Specify --window_size as an integer" # sanity check as the value can only be greater or equal to one if add_vectors["window_size"] < 1: raise ValueError, "The value of window_size is invalid, " + "the value must be greater than zero, not %d" % add_vectors[ "window_size" ] else: add_vectors["vectors_algorithm"] = None add_vectors["vectors_path"] = opts.vectors_path else: add_vectors = None if opts.taxa_fname != None: # get taxonomy counts # get list of sample_ids that haven't been removed sample_ids = data["coord"][0] # get taxa summaries for all sample_ids lineages, taxa_counts = get_taxa(opts.taxa_fname, sample_ids) data["taxa"] = {} data["taxa"]["lineages"] = lineages data["taxa"]["counts"] = taxa_counts # get average relative abundance of taxa data["taxa"]["prevalence"] = get_taxa_prevalence(data["taxa"]["counts"]) remove_rare_taxa(data["taxa"], nkeep=opts.n_taxa_keep) # get coordinates of taxa (weighted mean of sample scores) data["taxa"]["coord"] = get_taxa_coords(data["taxa"]["counts"], data["coord"][1]) data["taxa"]["coord"] # write taxa coords if requested if not opts.biplot_output_file is None: output = make_biplot_scores_output(data["taxa"]) fout = open(opts.biplot_output_file, "w") fout.write("\n".join(output)) fout.close() if opts.output_dir: create_dir(opts.output_dir, False) dir_path = opts.output_dir else: dir_path = "./" qiime_dir = get_qiime_project_dir() jar_path = os.path.join(qiime_dir, "qiime/support_files/jar/") data_dir_path = get_random_directory_name(output_dir=dir_path, return_absolute_path=False) try: os.mkdir(data_dir_path) except OSError: pass data_file_path = data_dir_path jar_dir_path = os.path.join(dir_path, "jar") try: os.mkdir(jar_dir_path) except OSError: pass shutil.copyfile(os.path.join(jar_path, "king.jar"), os.path.join(jar_dir_path, "king.jar")) filepath = opts.coord_fname if os.path.isdir(filepath): coord_files = [fname for fname in os.listdir(filepath) if not fname.startswith(".")] filename = os.path.split(coord_files[0])[-1] else: filename = os.path.split(filepath)[-1] try: action = generate_3d_plots except NameError: action = None # Place this outside try/except so we don't mask NameError in action if action: action( prefs, data, custom_axes, background_color, label_color, dir_path, data_file_path, filename, ellipsoid_prefs=ellipsoid_prefs, add_vectors=add_vectors, plot_scaled=plot_scaled, plot_unscaled=plot_unscaled, )
def preprocess_coords_file(coords_header, coords_data, coords_eigenvals, coords_pct, mapping_header, mapping_data, custom_axes=None, jackknifing_method=None, is_comparison=False): """Process a PCoA data and handle customizations in the contents Inputs: coords_header: list of sample identifiers in the PCoA file _or_ list of lists with sample identifiers for each coordinate file (if jackknifing or comparing plots) coords_data: matrix of coordinates in the PCoA file _or_ list of numpy arrays with coordinates for each file (if jackknifing or comparing plots) coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_ list of numpy arrays with the eigenvalues (if jackknifing or comparing plots ) coords_pct: numpy array with a the percent explained by each principal coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or comparing plots) mapping_header: mapping file headers names mapping_data: mapping file data custom_axes: name of the mapping data fields to add to coords_data jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info see qiime.util.summarize_pcoas is_comparison: whether or not the inputs should be considered as the ones for a comparison plot Outputs: coords_header: list of sample identifiers in the PCoA file coords_data: matrix of coordinates in the PCoA file with custom_axes if provided coords_eigenvalues: either the eigenvalues of the input coordinates or the average eigenvalues of the multiple coords that were passed in coords_pct: list of percents explained by each axis as given by the master coordinates i. e. the center around where the values revolve coords_low: coordinates representing the lower edges of an ellipse; None if no jackknifing is applied coords_high: coordinates representing the highere edges of an ellipse; None if no jackknifing is applied clones: total number of input files This controller function handles any customization that has to be done to the PCoA data prior to the formatting. Note that the first element in each list (coords, headers, eigenvalues & percents) will be considered the master set of coordinates. Raises: AssertionError if a comparison plot is requested but a list of data is not passed as input """ # prevent obscure and obfuscated errors if is_comparison: assert type(coords_data) == list, "Cannot process a comparison with "+\ "the data from a single coordinates file" mapping_file = [mapping_header] + mapping_data coords_file = [coords_header, coords_data] # number PCoA files; zero for any case except for comparison plots clones = 0 if custom_axes and type(coords_data) == ndarray: # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) elif type(coords_data) == list and is_comparison == False: # take the first pcoa file as the master set of coordinates master_pcoa = [ coords_header[0], coords_data[0], coords_eigenvals[0], coords_pct[0] ] # support pcoas must be a list of lists where each list contain # all the elements that compose a coordinates file support_pcoas = [[h, d, e, p] for h, d, e, p in zip( coords_header, coords_data, coords_eigenvals, coords_pct)] # do not apply procrustes, at least not for now coords_data, coords_low, coords_high, eigenvalues_average,\ identifiers = summarize_pcoas(master_pcoa, support_pcoas, method=jackknifing_method, apply_procrustes=False) # custom axes and jackknifing is a tricky thing to do, you only have to # add the custom values to the master file which is represented as the # coords_data return value. Since there is really no variation in that # axis then you have to change the values of coords_high and of # coords_low to something really small so that WebGL work properly if custom_axes: coords_file = [master_pcoa[0], coords_data] get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # this opens support for as many custom axes as needed axes = len(custom_axes) coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes]) coords_high[:, 0:axes] = ones([coords_high.shape[0], axes]) * 0.00001 coords_data = coords_file[1] # return a value containing coords_low and coords_high return identifiers, coords_data, eigenvalues_average, master_pcoa[3],\ coords_low, coords_high, clones # comparison plots are processed almost individually elif type(coords_data) == list and is_comparison: # indicates the number of files that were totally processed so other # functions/APIs are aware of how many times to replicate the metadata clones = len(coords_data) out_headers, out_coords = [], [] for index in range(0, clones): headers_i = coords_header[index] coords_i = coords_data[index] # tag each header with the the number in which those coords came in out_headers.extend( [element + '_%d' % index for element in headers_i]) if index == 0: # numpy can only stack things if they have the same shape out_coords = coords_i # the eigenvalues and percents explained are really the ones # belonging to the the first set of coordinates that was passed coords_eigenvals = coords_eigenvals[index] coords_pct = coords_pct[index] else: out_coords = vstack((out_coords, coords_i)) coords_file = [out_headers, out_coords] if custom_axes: # this condition deals with the fact that in order for the custom # axes to be added into the original coordinates, we have to add the # suffix for the sample identifiers that the coordinates have if clones: out_data = [] for index in range(0, clones): out_data.extend([[element[0] + '_%d' % index] + element[1::] for element in mapping_data]) mapping_file = [mapping_header] + out_data # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # if no coords summary is applied, return None in the corresponding values # note that the value of clones will be != 0 for a comparison plot return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None,\ None, clones
def preprocess_coords_file( coords_header, coords_data, coords_eigenvals, coords_pct, mapping_header, mapping_data, custom_axes=None, jackknifing_method=None, is_comparison=False, ): """Process a PCoA data and handle customizations in the contents Inputs: coords_header: list of sample identifiers in the PCoA file _or_ list of lists with sample identifiers for each coordinate file (if jackknifing or comparing plots) coords_data: matrix of coordinates in the PCoA file _or_ list of numpy arrays with coordinates for each file (if jackknifing or comparing plots) coords_eigenvals: numpy array with eigenvalues for the coordinates file _or_ list of numpy arrays with the eigenvalues (if jackknifing or comparing plots ) coords_pct: numpy array with a the percent explained by each principal coordinates axis _or_ a list of lists with numpy arrays (if jackknifing or comparing plots) mapping_header: mapping file headers names mapping_data: mapping file data custom_axes: name of the mapping data fields to add to coords_data jackknifing_method: one of 'sdev' or 'IRQ', defaults to None, for more info see qiime.util.summarize_pcoas is_comparison: whether or not the inputs should be considered as the ones for a comparison plot Outputs: coords_header: list of sample identifiers in the PCoA file coords_data: matrix of coordinates in the PCoA file with custom_axes if provided coords_eigenvalues: either the eigenvalues of the input coordinates or the average eigenvalues of the multiple coords that were passed in coords_pct: list of percents explained by each axis as given by the master coordinates i. e. the center around where the values revolve coords_low: coordinates representing the lower edges of an ellipse; None if no jackknifing is applied coords_high: coordinates representing the highere edges of an ellipse; None if no jackknifing is applied clones: total number of input files This controller function handles any customization that has to be done to the PCoA data prior to the formatting. Note that the first element in each list (coords, headers, eigenvalues & percents) will be considered the master set of coordinates. Raises: AssertionError if a comparison plot is requested but a list of data is not passed as input """ # prevent obscure and obfuscated errors if is_comparison: assert type(coords_data) == list, ( "Cannot process a comparison with " + "the data from a single coordinates file" ) mapping_file = [mapping_header] + mapping_data coords_file = [coords_header, coords_data] # number PCoA files; zero for any case except for comparison plots clones = 0 if custom_axes and type(coords_data) == ndarray: # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) elif type(coords_data) == list and is_comparison == False: # take the first pcoa file as the master set of coordinates master_pcoa = [coords_header[0], coords_data[0], coords_eigenvals[0], coords_pct[0]] # support pcoas must be a list of lists where each list contain # all the elements that compose a coordinates file support_pcoas = [[h, d, e, p] for h, d, e, p in zip(coords_header, coords_data, coords_eigenvals, coords_pct)] # do not apply procrustes, at least not for now coords_data, coords_low, coords_high, eigenvalues_average, identifiers = summarize_pcoas( master_pcoa, support_pcoas, method=jackknifing_method, apply_procrustes=False ) # custom axes and jackknifing is a tricky thing to do, you only have to # add the custom values to the master file which is represented as the # coords_data return value. Since there is really no variation in that # axis then you have to change the values of coords_high and of # coords_low to something really small so that WebGL work properly if custom_axes: coords_file = [master_pcoa[0], coords_data] get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # this opens support for as many custom axes as needed axes = len(custom_axes) coords_low[:, 0:axes] = zeros([coords_low.shape[0], axes]) coords_high[:, 0:axes] = ones([coords_high.shape[0], axes]) * 0.00001 coords_data = coords_file[1] # return a value containing coords_low and coords_high return identifiers, coords_data, eigenvalues_average, master_pcoa[3], coords_low, coords_high, clones # comparison plots are processed almost individually elif type(coords_data) == list and is_comparison: # indicates the number of files that were totally processed so other # functions/APIs are aware of how many times to replicate the metadata clones = len(coords_data) out_headers, out_coords = [], [] for index in range(0, clones): headers_i = coords_header[index] coords_i = coords_data[index] # tag each header with the the number in which those coords came in out_headers.extend([element + "_%d" % index for element in headers_i]) if index == 0: # numpy can only stack things if they have the same shape out_coords = coords_i # the eigenvalues and percents explained are really the ones # belonging to the the first set of coordinates that was passed coords_eigenvals = coords_eigenvals[index] coords_pct = coords_pct[index] else: out_coords = vstack((out_coords, coords_i)) coords_file = [out_headers, out_coords] if custom_axes: # this condition deals with the fact that in order for the custom # axes to be added into the original coordinates, we have to add the # suffix for the sample identifiers that the coordinates have if clones: out_data = [] for index in range(0, clones): out_data.extend([[element[0] + "_%d" % index] + element[1::] for element in mapping_data]) mapping_file = [mapping_header] + out_data # sequence ported from qiime/scripts/make_3d_plots.py @ 9115351 get_custom_coords(custom_axes, mapping_file, coords_file) remove_nans(coords_file) scale_custom_coords(custom_axes, coords_file) # if no coords summary is applied, return None in the corresponding values # note that the value of clones will be != 0 for a comparison plot return coords_file[0], coords_file[1], coords_eigenvals, coords_pct, None, None, clones
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) prefs, data, background_color, label_color, ball_scale, arrow_colors= \ sample_color_prefs_and_map_data_from_options(opts) if opts.output_format == 'invue': # validating the number of points for interpolation if (opts.interpolation_points<0): option_parser.error('The --interpolation_points should be ' +\ 'greater or equal to 0.') # make sure that coord file has internally consistent # of columns coord_files_valid = validate_coord_files(opts.coord_fname) if not coord_files_valid: option_parser.error('Every line of every coord file must ' +\ 'have the same number of columns.') coord_files_valid = validate_coord_files(opts.coord_fname) if not coord_files_valid: option_parser.error('Every line of every coord file must ' +\ 'have the same number of columns.') #Open and get coord data data['coord'] = get_coord(opts.coord_fname, opts.ellipsoid_method) # remove any samples not present in mapping file remove_unmapped_samples(data['map'],data['coord']) # if no samples overlapped between mapping file and otu table, exit if len(data['coord'][0]) == 0: print "\nError: OTU table and mapping file had no samples in common\n" exit(1) if opts.output_dir: create_dir(opts.output_dir,False) dir_path=opts.output_dir else: dir_path='./' filepath=opts.coord_fname if os.path.isdir(filepath): coord_files = [fname for fname in os.listdir(filepath) if not \ fname.startswith('.')] filename = os.path.split(coord_files[0])[-1] else: filename = os.path.split(filepath)[-1] generate_3d_plots_invue(prefs, data, dir_path, filename, \ opts.interpolation_points, opts.polyhedron_points, \ opts.polyhedron_offset) #finish script return # Potential conflicts if not opts.custom_axes is None and os.path.isdir(opts.coord_fname): # can't do averaged pcoa plots _and_ custom axes in the same plot option_parser.error("Please supply either custom axes or multiple coordinate \ files, but not both.") # check that smoothness is an integer between 0 and 3 try: ellipsoid_smoothness = int(opts.ellipsoid_smoothness) except: option_parser.error("Please supply an integer ellipsoid smoothness \ value.") if ellipsoid_smoothness < 0 or ellipsoid_smoothness > 3: option_parser.error("Please supply an ellipsoid smoothness value \ between 0 and 3.") # check that opacity is a float between 0 and 1 try: ellipsoid_alpha = float(opts.ellipsoid_opacity) except: option_parser.error("Please supply a number for ellipsoid opacity.") if ellipsoid_alpha < 0 or ellipsoid_alpha > 1: option_parser.error("Please supply an ellipsoid opacity value \ between 0 and 1.") # check that ellipsoid method is valid ellipsoid_methods = ['IQR','sdev'] if not opts.ellipsoid_method in ellipsoid_methods: option_parser.error("Please supply a valid ellipsoid method. \ Valid methods are: " + ', '.join(ellipsoid_methods) + ".") # gather ellipsoid drawing preferences ellipsoid_prefs = {} ellipsoid_prefs["smoothness"] = ellipsoid_smoothness ellipsoid_prefs["alpha"] = ellipsoid_alpha # make sure that coord file has internally consistent # of columns coord_files_valid = validate_coord_files(opts.coord_fname) if not coord_files_valid: option_parser.error('Every line of every coord file must ' +\ 'have the same number of columns.') #Open and get coord data data['coord'] = get_coord(opts.coord_fname, opts.ellipsoid_method) # remove any samples not present in mapping file remove_unmapped_samples(data['map'],data['coord']) # if no samples overlapped between mapping file and otu table, exit if len(data['coord'][0]) == 0: print "\nError: OTU table and mapping file had no samples in common\n" exit(1) if opts.taxa_fname != None: # get taxonomy counts # get list of sample_ids that haven't been removed sample_ids = data['coord'][0] # get taxa summaries for all sample_ids lineages, taxa_counts = get_taxa(opts.taxa_fname, sample_ids) data['taxa'] = {} data['taxa']['lineages'] = lineages data['taxa']['counts'] = taxa_counts # get average relative abundance of taxa data['taxa']['prevalence'] = get_taxa_prevalence(data['taxa']['counts']) remove_rare_taxa(data['taxa'],nkeep=opts.n_taxa_keep) # get coordinates of taxa (weighted mean of sample scores) data['taxa']['coord'] = get_taxa_coords(data['taxa']['counts'], data['coord'][1]) data['taxa']['coord'] # write taxa coords if requested if not opts.biplot_output_file is None: output = make_biplot_scores_output(data['taxa']) fout = open(opts.biplot_output_file,'w') fout.write('\n'.join(output)) fout.close() # process custom axes, if present. custom_axes = None if opts.custom_axes: custom_axes = process_custom_axes(opts.custom_axes) get_custom_coords(custom_axes, data['map'], data['coord']) remove_nans(data['coord']) scale_custom_coords(custom_axes,data['coord']) if opts.output_dir: create_dir(opts.output_dir,False) dir_path=opts.output_dir else: dir_path='./' qiime_dir=get_qiime_project_dir() jar_path=os.path.join(qiime_dir,'qiime/support_files/jar/') data_dir_path = get_random_directory_name(output_dir=dir_path, return_absolute_path=False) try: os.mkdir(data_dir_path) except OSError: pass data_file_path=data_dir_path jar_dir_path = os.path.join(dir_path,'jar') try: os.mkdir(jar_dir_path) except OSError: pass shutil.copyfile(os.path.join(jar_path,'king.jar'), os.path.join(jar_dir_path,'king.jar')) filepath=opts.coord_fname if os.path.isdir(filepath): coord_files = [fname for fname in os.listdir(filepath) if not \ fname.startswith('.')] filename = os.path.split(coord_files[0])[-1] else: filename = os.path.split(filepath)[-1] try: action = generate_3d_plots except NameError: action = None #Place this outside try/except so we don't mask NameError in action if action: action(prefs,data,custom_axes,background_color,label_color,dir_path, \ data_file_path,filename,ellipsoid_prefs=ellipsoid_prefs)