def get_map(options, data): """Opens and returns mapping data""" try: map_f = open(options.map_fname, 'U').readlines() except (TypeError, IOError): raise MissingFileError('Mapping file required for this analysis') data['map'] = parse_mapping_file(map_f) return data['map']
def get_otu_counts(fpath): """Reads the OTU table file into memory""" try: otu_table = parse_biom_table(open(fpath, 'U')) except (TypeError, IOError): raise MissingFileError('OTU table file required for this analysis') if (otu_table.ObservationMetadata is None or otu_table.ObservationMetadata[0]['taxonomy'] is None): raise ValueError( '\n\nThe lineages are missing from the OTU table. Make sure you included the lineages for the OTUs in your OTU table. \n') return otu_table
def taxonomy_color_prefs_and_map_data_from_options(options): """Returns color prefs and counts data based on options. counts data is any file in a format that can be parsed by parse_otu_table """ data = {} data['counts'] = {} taxonomy_levels = [] # need to set some other way from sample ids # Determine which mapping headers to color by, if none given, color by \ # Sample ID's taxonomy_count_files = options.counts_fname for f in taxonomy_count_files: try: counts_f = open(f, 'U').readlines() except (TypeError, IOError): raise MissingFileError('Counts file required for this analysis') sample_ids, otu_ids, otu_table = \ parse_taxa_summary_table(counts_f) data['counts'][f] = (sample_ids, otu_ids, otu_table) level = max([len(t.split(';')) - 1 for t in otu_ids]) taxonomy_levels.append(str(level)) if options.prefs_path: prefs = eval(open(options.prefs_path, 'U').read()) color_prefs = taxonomy_process_prefs(taxonomy_levels, prefs['taxonomy_coloring']) if 'background_color' in prefs: background_color = prefs['background_color'] else: background_color = 'black' else: background_color = 'black' color_prefs = taxonomy_process_prefs(taxonomy_levels, None) if options.prefs_path and options.background_color: background_color = options.background_color elif options.background_color: background_color = options.background_color if background_color == 'black': label_color = 'white' else: label_color = 'black' return color_prefs, data, background_color, label_color
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table = load_table(opts.otu_table_fp) obs_md_category = opts.obs_md_category obs_md_level = opts.obs_md_level if obs_md_level is None: # grab the last level if the user didn't specify a level obs_md_level = -1 else: # convert to 0-based indexing obs_md_level -= 1 obs_md = otu_table.metadata(axis='observation') obs_md_labels = [] if (obs_md is None or obs_md_category not in obs_md[0]): obs_md_labels = [['']] * len(otu_table.ids(axis='observation')) else: for _, _, md in otu_table.iter(axis='observation'): current_md = md[obs_md_category] if obs_md_level < len(current_md): current_md_at_level = current_md[obs_md_level] else: current_md_at_level = '' obs_md_labels.append([current_md_at_level]) otu_labels = make_otu_labels(otu_table.ids(axis='observation'), obs_md_labels) # Convert to relative abundance if requested if not opts.absolute_abundance: otu_table = otu_table.norm(axis='observation') # Get log transform if requested if not opts.no_log_transform: otu_table = get_log_transform(otu_table) # Re-order samples by tree if provided if opts.sample_tree is not None: sample_order = get_order_from_tree(otu_table.ids(), open(opts.sample_tree, 'U')) # if there's no sample tree, sort samples by mapping file elif opts.map_fname is not None: lines = open(opts.map_fname, 'U').readlines() metadata = list(parse_mapping_file(lines)) new_map, otu_table = get_overlapping_samples(metadata[0], otu_table) metadata[0] = new_map map_sample_ids = zip(*metadata[0])[0] # if there's a category, do clustering within each category if opts.category is not None: category_labels = extract_metadata_column(otu_table.ids(), metadata, opts.category) sample_order = get_order_from_categories(otu_table, category_labels) # else: just use the mapping file order else: ordered_sample_ids = [] for sample_id in map_sample_ids: if otu_table.exists(sample_id): ordered_sample_ids.append(sample_id) sample_order = names_to_indices(otu_table.ids(), ordered_sample_ids) # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_column_clustering: data = np.asarray([i for i in otu_table.iter_data(axis='observation')]) sample_order = get_clusters(data, axis='column') # else just use OTU table ordering else: sample_order = np.arange(len(otu_table.ids())) # re-order OTUs by tree (if provided), or clustering if opts.otu_tree is not None: # open tree file try: f = open(opts.otu_tree, 'U') except (TypeError, IOError): raise MissingFileError("Couldn't read tree file at path: %s" % opts.otu_tree) otu_order = get_order_from_tree(otu_table.ids(axis='observation'), f) f.close() # if no tree or mapping file, perform upgma euclidean elif not opts.suppress_row_clustering: data = np.asarray([i for i in otu_table.iter_data(axis='observation')]) otu_order = get_clusters(data, axis='row') # else just use OTU table ordering else: otu_order = np.arange(len(otu_table.ids(axis='observation'))) # otu_order and sample_order should be ids, rather than indices # to use in sortObservationOrder/sortSampleOrder otu_id_order = [otu_table.ids(axis='observation')[i] for i in otu_order] sample_id_order = [otu_table.ids()[i] for i in sample_order] # Re-order otu table, sampleids, etc. as necessary otu_table = otu_table.sort_order(otu_id_order, axis='observation') otu_labels = np.array(otu_labels)[otu_order] otu_table = otu_table.sort_order(sample_id_order) sample_labels = otu_table.ids() plot_heatmap(otu_table, otu_labels, sample_labels, opts.output_fp, imagetype=opts.imagetype, width=opts.width, height=opts.height, dpi=opts.dpi, color_scheme=opts.color_scheme)