def test_sample_rare_unique(self): t = update_tree(None, tax_strings_by_sample) tax_by_sample = { 'a': tax_strings_by_sample[0], 'b': tax_strings_by_sample[1], 'c': tax_strings_by_sample[2] } exp = [('a', None, [['k__1', 'p__x', 'c__'], ['k__1', 'p__y', 'c__3']], [['k__1', 'p__x', 'c__1'], ['k__1', 'p__x', 'c__2']]), ('b', None, [['k__1', 'p__x', 'c__'], ['k__1', 'p__y', 'c__3']], []), ('c', None, [], [])] obs = sample_rare_unique(t, None, tax_by_sample, 0.7) self.assertEqual(sorted(obs), exp) table_a = Table(array([[14, 15, 16]]), ['k__1; p__y; c__'], ['a', 'b', 'c']) table_b = Table( array([[1, 2, 3], [4, 5, 6], [14, 15, 16]]), ['k__1; p__x; c__1', 'k__1; p__x; c__2', 'k__1; p__y; c__'], ['a', 'b', 'c'], ) table_c = Table( array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [14, 15, 16]]), [ 'k__1; p__x; c__1', 'k__1; p__x; c__2', 'k__1; p__x; c__', 'k__1; p__y; c__3', 'k__1; p__y; c__' ], ['a', 'b', 'c']) exp = [('a', table_a, [['k__1', 'p__x', 'c__'], ['k__1', 'p__y', 'c__3']], [['k__1', 'p__x', 'c__1'], ['k__1', 'p__x', 'c__2']]), ('b', table_b, [['k__1', 'p__x', 'c__'], ['k__1', 'p__y', 'c__3']], []), ('c', table_c, [], [])] obs = sample_rare_unique(t, table, tax_by_sample, 0.7) for o, e in zip(sorted(obs), exp): self.assertEqual(o[0], e[0]) self.assertEqual(o[1], e[1]) self.assertEqual(o[2], e[2]) self.assertEqual(o[3], e[3])
def test_sample_rare_unique(self): t = update_tree(None, tax_strings_by_sample) tax_by_sample = {'a':tax_strings_by_sample[0], 'b':tax_strings_by_sample[1], 'c':tax_strings_by_sample[2]} exp = [('a', None, [['k__1','p__x','c__'],['k__1','p__y','c__3']], [['k__1','p__x','c__1'],['k__1','p__x','c__2']]), ('b', None, [['k__1','p__x','c__'],['k__1','p__y','c__3']], []), ('c', None, [], [])] obs = sample_rare_unique(t, None, tax_by_sample, 0.7) self.assertEqual(sorted(obs), exp) table_a = Table(array([[14,15,16]]), ['k__1; p__y; c__'], ['a','b','c']) table_b = Table(array([[1,2,3], [4,5,6], [14,15,16]]), ['k__1; p__x; c__1', 'k__1; p__x; c__2', 'k__1; p__y; c__'], ['a','b','c'], ) table_c = Table(array([[1,2,3], [4,5,6], [7,8,9], [10,11,12], [14,15,16]]), ['k__1; p__x; c__1', 'k__1; p__x; c__2', 'k__1; p__x; c__', 'k__1; p__y; c__3', 'k__1; p__y; c__'], ['a','b','c']) exp = [('a', table_a, [['k__1','p__x','c__'],['k__1','p__y','c__3']], [['k__1','p__x','c__1'],['k__1','p__x','c__2']]), ('b', table_b, [['k__1','p__x','c__'],['k__1','p__y','c__3']], []), ('c', table_c, [], [])] obs = sample_rare_unique(t, table, tax_by_sample, 0.7) for o,e in zip(sorted(obs), exp): self.assertEqual(o[0], e[0]) self.assertEqual(o[1], e[1]) self.assertEqual(o[2], e[2]) self.assertEqual(o[3], e[3])
def main(tax_table, output_dir, samples_to_analyze=None): """Generates pie chart of the most abundant twelve taxa in the sample INPUTS: otu_table -- a biom formatted taxonomy table at the desired level of resolution output_dir -- the location of the directory where output files should be stored. samples_to_analyze -- a list of sample ids to plot. If no value is passed, then all samples in the biom table are analyzed. OUTPUTS: A pdf of the piechart summarizing the most abundant taxa will be generated and saved to the output directory. These will follow the naming convention PIECHART_<SAMPLEID>.pdf. """ # Creates the text around hte file name FILENAME_BEFORE = 'piechart_' FILENAME_AFTER = '.pdf' # Handles string cleaning RENDER = 'LATEX' UNCLASSIFIED = False # Sets up the rare threshhold for RARE_THRESH = 0.0 SUM_MIN = 1 # Sets up axis parameters AXIS_LENGTH = 7.25 AXIS_BORDER = 0.01 AXIS_TITLE = 0 AXIS_LEGEND = 7 # Modifies the axis limits AX_LIMS = [-1.05, 1.05] # Sets up constants for getting the colormap and plotting MAP_NAME = 'BrBG' NUM_SHOW = 12 OTHER_COLOR = array([[85/255, 85/255, 85/255]]) # Sets up plotting parameters FIG_LEGEND = True FIG_COLOR_EDGE = False FIG_LEG_FRAME = False FIG_LEG_OFFSET = [0.95, 0.025, 1.0, 0.95] # Sets up the the legend font LEG_FONT = FontProperties() LEG_FONT.set_size(28) LEG_FONT.set_family('sans-serif') # Sets the general font properties use_latex = True rc_font_family = 'sans-serif' rc_font = ['Helvetica', 'Arial'] # Sets up the colormap colormap = translate_colors((NUM_SHOW-1), MAP_NAME) colormap = vstack((colormap, OTHER_COLOR)) # Sets up plotting constants (axis_dims, fig_dims) = calculate_dimensions_rectangle( axis_width=AXIS_LENGTH, axis_height=AXIS_LENGTH, border=AXIS_BORDER, title=AXIS_TITLE, legend=AXIS_LEGEND) # Walks over a taxa tree and prioritizes based on taxonomy (tree, all_taxa) = build_tree_from_taxontable(tax_table) # Sets up samples for which tables are being generated if samples_to_analyze is not None: samples_to_test = samples_to_analyze else: samples_to_test = all_taxa.keys() # Checks the samples exist if samples_to_test: samples_to_test = set(samples_to_test) tmp = {k: v for k, v in all_taxa.items() if k in samples_to_test} all_taxa = tmp if not samples_to_test: raise ValueError("No samples!") # Walks over the table filt_fun = lambda v, i, md: v.sum() > 0 for samp, filtered_table, rare, unique in sample_rare_unique(tree, tax_table, all_taxa, RARE_THRESH): # abund_fun = lambda v, i, md: i in all_taxa[samp] filtered_table = tax_table.filterObservations(filt_fun) sample_data = filtered_table.sampleData(samp) taxa = filtered_table.ObservationIds # Calculates abundance and limits to the top n samples. abund_rank = calculate_abundance(sample=sample_data, taxa=taxa, sum_min=SUM_MIN) abund_rank = abund_rank[:(NUM_SHOW-1)] # Cleans the greengenes strings and adds an "Other" Category for # missing taxa [sample_tax, sample_freq] = [list(a) for a in zip(*abund_rank)] clean_tax = [clean_greengenes_string(tax, RENDER, unclassified=UNCLASSIFIED) for tax in sample_tax] clean_tax.append('Other') sample_freq.append(1-sum(sample_freq)) # Sets up the sample filename filename = pjoin(output_dir, '%s%s%s' % (FILENAME_BEFORE, samp, FILENAME_AFTER)) # Creates the pie chart render_single_pie(data_vec=sample_freq, group_names=clean_tax, axis_dims=axis_dims, fig_dims=fig_dims, file_out=filename, legend=FIG_LEGEND, colors=colormap, show_edge=FIG_COLOR_EDGE, legend_frame=FIG_LEG_FRAME, rc_font=rc_font, legend_offset=FIG_LEG_OFFSET, rc_fam=rc_font_family, legend_font=LEG_FONT, use_latex=use_latex, x_lims=AX_LIMS, y_lims=AX_LIMS)
def main(tax_table, output_dir, samples_to_analyze=None): """Generates pie chart of the most abundant twelve taxa in the sample INPUTS: otu_table -- a biom formatted taxonomy table at the desired level of resolution output_dir -- the location of the directory where output files should be stored. samples_to_analyze -- a list of sample ids to plot. If no value is passed, then all samples in the biom table are analyzed. OUTPUTS: A pdf of the piechart summarizing the most abundant taxa will be generated and saved to the output directory. These will follow the naming convention PIECHART_<SAMPLEID>.pdf. """ # Creates the text around hte file name FILENAME_BEFORE = 'piechart_' FILENAME_AFTER = '.pdf' # Handles string cleaning RENDER = 'LATEX' UNCLASSIFIED = False # Sets up the rare threshhold for RARE_THRESH = 0.0 SUM_MIN = 1 # Sets up axis parameters AXIS_LENGTH = 7.25 AXIS_BORDER = 0.01 AXIS_TITLE = 0 AXIS_LEGEND = 7 # Modifies the axis limits AX_LIMS = [-1.05, 1.05] # Sets up constants for getting the colormap and plotting MAP_NAME = 'BrBG' NUM_SHOW = 12 OTHER_COLOR = array([[85 / 255, 85 / 255, 85 / 255]]) # Sets up plotting parameters FIG_LEGEND = True FIG_COLOR_EDGE = False FIG_LEG_FRAME = False FIG_LEG_OFFSET = [0.95, 0.025, 1.0, 0.95] # Sets up the the legend font LEG_FONT = FontProperties() LEG_FONT.set_size(28) LEG_FONT.set_family('sans-serif') # Sets the general font properties use_latex = True rc_font_family = 'sans-serif' rc_font = ['Helvetica', 'Arial'] # Sets up the colormap colormap = translate_colors((NUM_SHOW - 1), MAP_NAME) colormap = vstack((colormap, OTHER_COLOR)) # Sets up plotting constants (axis_dims, fig_dims) = calculate_dimensions_rectangle(axis_width=AXIS_LENGTH, axis_height=AXIS_LENGTH, border=AXIS_BORDER, title=AXIS_TITLE, legend=AXIS_LEGEND) # Walks over a taxa tree and prioritizes based on taxonomy (tree, all_taxa) = build_tree_from_taxontable(tax_table) # Sets up samples for which tables are being generated if samples_to_analyze is not None: samples_to_test = samples_to_analyze else: samples_to_test = all_taxa.keys() # Checks the samples exist if samples_to_test: samples_to_test = set(samples_to_test) tmp = {k: v for k, v in all_taxa.items() if k in samples_to_test} all_taxa = tmp if not samples_to_test: raise ValueError("No samples!") # Walks over the table filt_fun = lambda v, i, md: v.sum() > 0 for samp, filtered_table, rare, unique in sample_rare_unique( tree, tax_table, all_taxa, RARE_THRESH): # abund_fun = lambda v, i, md: i in all_taxa[samp] filtered_table = tax_table.filterObservations(filt_fun) sample_data = filtered_table.sampleData(samp) taxa = filtered_table.ObservationIds # Calculates abundance and limits to the top n samples. abund_rank = calculate_abundance(sample=sample_data, taxa=taxa, sum_min=SUM_MIN) abund_rank = abund_rank[:(NUM_SHOW - 1)] # Cleans the greengenes strings and adds an "Other" Category for # missing taxa [sample_tax, sample_freq] = [list(a) for a in zip(*abund_rank)] clean_tax = [ clean_greengenes_string(tax, RENDER, unclassified=UNCLASSIFIED) for tax in sample_tax ] clean_tax.append('Other') sample_freq.append(1 - sum(sample_freq)) # Sets up the sample filename filename = pjoin(output_dir, '%s%s%s' % (FILENAME_BEFORE, samp, FILENAME_AFTER)) # Creates the pie chart render_single_pie(data_vec=sample_freq, group_names=clean_tax, axis_dims=axis_dims, fig_dims=fig_dims, file_out=filename, legend=FIG_LEGEND, colors=colormap, show_edge=FIG_COLOR_EDGE, legend_frame=FIG_LEG_FRAME, rc_font=rc_font, legend_offset=FIG_LEG_OFFSET, rc_fam=rc_font_family, legend_font=LEG_FONT, use_latex=use_latex, x_lims=AX_LIMS, y_lims=AX_LIMS)
def main(taxa_table, output_dir, mapping=None, samples_to_analyze=None): """Creates LaTeX formatted significant OTU lists INPUTS: tax_table -- a numpy array with the relative frequencies of taxonomies (rows) for each give sample (column) output_dir -- a directory where the final files should be saved. mapping -- a 2D dictionary of mapping data where the sample id is keyed to a dictionary of metadata. samples_to_analyze -- a list of samples_to_analyze which should be used to generate data. If None, all the samples will be used. DEFAULT: None OUTPUTS: Generates text files containing LaTex encoded strings which creates a LaTeX macro dictionary with the information for creating a table of most abundant taxa, most enriched taxa, and rare and unique taxa. Rare defined as present in less than 10% of the total population. The unique taxa are bolded in the lists. """ # Sets up the way samples should be converted SAMPLE_CONVERTER = {'feces': 'fecal', 'oral_cavity': 'oral', 'oral cavity': 'oral', 'skin': 'skin'} DUMMY = ['', '', '', ''] COUNT = [0, 1, 2, 3, 4, 5, 6, 7] # Sets table constants RENDERING = "LATEX" RARE_THRESH = 0.1 SUM_MIN = 1 FORMAT_SIGNIFIGANCE = ['%1.2f', "%1.2f", "%i", "SKIP"] SIGNIFIGANCE_HUNDRED = [True, True, False, False] MACRO_CATS_SIGNIFICANCE = ['enrichTaxon', 'enrichSampl', 'enrichPopul', 'enrichFold'] MACRO_FORM_SIGNIFICANCE = [lambda x: clean_greengenes_string(x, render_mode='LATEX'), lambda x: x, lambda x: x, lambda x: x] DUMMY = ['', '', '', ''] COUNT = [0, 1, 2, 3, 4, 5, 6, 7] FORMAT_ABUNDANCE = ["%1.1f"] ABUNDANCE_HUNDRED = [True] MACRO_CATS_ABUNDANCE = ['abundTaxon', 'abundSampl'] MACRO_FORM_ABUNDANCE = [lambda x: clean_greengenes_string(x, render_mode='LATEX'), lambda x: x] DATE_FIELD = 'COLLECTION_DATE' DATE_FORMAT_SHORT = '%m/%d/%y' DATE_FORMAT_LONG = '%m/%d/%Y' UNKNOWNS = set(['None', 'NONE', 'none', 'NA', 'na', 'UNKNOWN', 'unknown']) DATE_OUT = '%B %d, %Y' TIME_FIELD = 'COLLECTION_TIME' # Number of taxa shown is an indexing value, it is one less than what is # actually shown. NUM_TAXA_SHOW = 5 # Builds the the taxomnomy tree for the table and identifies the # rare/unique taxa in each sample tree, all_taxa = build_tree_from_taxontable(taxa_table) # Sets up samples for which tables are being generated if samples_to_analyze is not None: samples_to_test = samples_to_analyze else: samples_to_test = all_taxa.keys() if samples_to_test: samples_to_test = set(samples_to_test) tmp = {k: v for k, v in all_taxa.items() if k in samples_to_test} all_taxa = tmp if not samples_to_test: raise ValueError("No samples!") # Generates lists and tables for each sample for samp, filtered_table, rare, unique in sample_rare_unique(tree, tax_table, all_taxa, RARE_THRESH): # Sets up filename file_name = pjoin(output_dir, 'macros.tex') def filt_fun(v, i, md): return v.sum() > 0 filtered_table = filtered_table.filter(filt_fun, axis='observation', inplace=False) abund_table = tax_table.filter(filt_fun, axis='observation', inplace=False) # Gets sample information for the whole table abund_sample = abund_table.data(samp) abund_taxa = abund_table.ids(axis='observation') # Gets sample information for other filtered samples filt_taxa = filtered_table.ids(axis='observation') population = array([filtered_table.data(i, axis='observation') for i in filtered_table.ids(axis='observation')]) sample_position = filtered_table.index(samp, axis='sample') filt_sample = filtered_table.data(samp) population = delete(population, sample_position, 1) # Converts the lists into greengenes strings for later processing greengenes_rare = [] greengenes_unique = [] for taxon in rare: greengenes_rare.append(';'.join(taxon)) for taxon in unique: greengenes_unique.append(';'.join(taxon)) # Formats the rare and unique lists rare_format = [] rare_combined = [] for taxon in greengenes_unique: rare_combined.append(taxon) rare_format.append('COLOR') for taxon in greengenes_rare: rare_combined.append(taxon) rare_format.append('REG') number_rare_tax = len(rare_combined) num_rare = len(rare) num_unique = len(unique) rare_formatted = \ convert_taxa_to_list(rare_combined[0:NUM_TAXA_SHOW], tax_format=rare_format, render_mode=RENDERING, comma=True) if num_unique > 0: unique_string = ' and \\textcolor{red}{%i unique}' % num_unique else: unique_string = '' if number_rare_tax == 0: rare_formatted = "There were no rare or unique taxa found in "\ "your sample." elif 0 < number_rare_tax <= NUM_TAXA_SHOW: rare_formatted = 'Your sample contained the following rare%s '\ 'taxa: %s.' % (unique_string, rare_formatted) else: rare_formatted = 'Your sample contained %i rare%s taxa, '\ 'including the following: %s.' \ % (num_rare, unique_string, rare_formatted) # Calculates abundance rank (abundance) = calculate_abundance(abund_sample, abund_taxa, sum_min=SUM_MIN) # Generates formatted abundance table formatted_abundance = convert_taxa(abundance[0:NUM_TAXA_SHOW], formatting_keys=FORMAT_ABUNDANCE, hundredx=ABUNDANCE_HUNDRED) abundance_formatted = \ build_latex_macro(formatted_abundance, categories=MACRO_CATS_ABUNDANCE, format=MACRO_FORM_ABUNDANCE) (high, low) = calculate_tax_rank_1(sample=filt_sample, population=population, taxa=filt_taxa, critical_value=0.05) if len(high) == 0: formatted_high = [['', '', '', '']]*NUM_TAXA_SHOW elif len(high) < NUM_TAXA_SHOW: # Formats the known high taxa formatted_high = \ convert_taxa(high[0:NUM_TAXA_SHOW], formatting_keys=FORMAT_SIGNIFIGANCE, hundredx=SIGNIFIGANCE_HUNDRED) # Adds the dummy list to the end for idx in COUNT: if idx == (NUM_TAXA_SHOW - len(high)): break formatted_high.append(DUMMY) else: formatted_high = convert_taxa(high[0:NUM_TAXA_SHOW], formatting_keys=FORMAT_SIGNIFIGANCE, hundredx=SIGNIFIGANCE_HUNDRED) high_formatted = build_latex_macro(formatted_high, categories=MACRO_CATS_SIGNIFICANCE, format=MACRO_FORM_SIGNIFICANCE) # Handles date parsing if mapping is not None and mapping[samp][DATE_FIELD] not in UNKNOWNS: try: sample_date = format_date(mapping[samp], date_field=DATE_FIELD, d_form_in=DATE_FORMAT_SHORT, format_out=DATE_OUT) except: sample_date = format_date(mapping[samp], date_field=DATE_FIELD, d_form_in=DATE_FORMAT_LONG, format_out=DATE_OUT) else: sample_date = 'unknown' # Removes a zero character from the date if ',' in sample_date and sample_date[sample_date.index(',')-2] == '0': zero_pos = sample_date.index(',')-2 sample_date = ''.join([sample_date[:zero_pos], sample_date[zero_pos+1:]]) else: sample_date = 'unknown' # Handles sample parsing if mapping is not None and mapping[samp][TIME_FIELD] not in UNKNOWNS: sample_time = mapping[samp][TIME_FIELD].lower() else: sample_time = 'unknown' if mapping is not None: sample_type_prelim = mapping[samp]['BODY_HABITAT'].split(':')[1] if sample_type_prelim in SAMPLE_CONVERTER: sample_type = SAMPLE_CONVERTER[sample_type_prelim] elif sample_type in UNKNOWNS: sample_time = 'unknown' else: sample_type = sample_type_prelim.lower() else: sample_type = 'unknown' # Saves the file file_for_editing = open(file_name, 'w') file_for_editing.write('%% Barcode\n\\def\\barcode{%s}\n\n' % samp.split('.')[0]) file_for_editing.write('%% Sample Type\n\\def\\sampletype{%s}\n\n' % sample_type) file_for_editing.write('%% Sample Date\n\\def\\sampledate{%s}\n' '\\def\\sampletime{%s}\n\n\n' % (sample_date, sample_time)) file_for_editing.write('%% Abundance Table\n%s\n\n\n' % abundance_formatted) file_for_editing.write('%% Enrichment Table\n%s\n\n\n' % high_formatted) file_for_editing.write('%% Rare List\n\\def\\rareList{%s}\n' % rare_formatted) file_for_editing.close()
def main(taxa_table, output_dir, samples_to_analyze = None): """Creates LaTeX formatted significant OTU lists INPUTS: taxa -- a numpy vector with greengenes taxonomy strings tax_table -- a numpy array with the relative frequencies of taxonomies (rows) for each give sample (column) samples_to_analyze -- a numpy vector of sample ids associated with the tax_table values output_dir -- a directory where the final files should be saved. samples_to_analyze -- a list of samples_to_analyze which should be used to generate data. If this is left empty, all the samples in the table will be used. OUTPUTS: Generates text files containing LaTex encoded strings which creates a LaTeX macro dictionary with the information for creating a table of most abundant taxa, most enriched taxa, and rare and unique taxa. Rare defined as present in less than 10% of the total population. The unique taxa are bolded in the lists. """ # Sets table constants RARE_THRESHHOLD = 0.1 RENDERING = "LATEX" FORMAT_SIGNIFIGANCE = ['%1.1f', "%1.2f", "%i", "SKIP"] SIGNIFIGANCE_HUNDRED = [True, True, False, False] DUMMY = ['','','',''] COUNT = [0, 1, 2, 3, 4, 5, 6, 7] FORMAT_ABUNDANCE = ["%1.1f"] ABUNDANCE_HUNDRED = [True] MACRO_CATS_SIGNIFICANCE = ['enrichTaxon','enrichSampl', 'enrichPopul', 'enrichFoldd'] MACRO_CATS_ABUNDANCE = ['abundTaxon', 'abundSampl'] FILE_PRECURSER = 'macros_' FILE_EXTENSION = '.tex' # Number of taxa shown is an indexing value, it is one less than what is # actually shown. NUMBER_OF_TAXA_SHOWN = 5 # Builds the the taxomnomy tree for the table and identifies the # rare/unique taxa in each sample tree, all_taxa = build_tree_from_taxontable(taxa_table) # Sets up samples for which tables are being generated if not samples_to_analyze == None: samples_to_test = samples_to_analyze else: samples_to_test = all_taxa.keys() if samples_to_test: samples_to_test = set(samples_to_test) tmp = {k:v for k,v in all_taxa.items() if k in samples_to_test} all_taxa = tmp if not samples_to_test: raise ValueError, "No samples!" # Generates lists and tables for each sample for samp, filtered_table, rare, unique in sample_rare_unique(tree, \ taxa_table, all_taxa, RARE_THRESHHOLD): filtered_table = filtered_table.filterObservations(lambda v,i,md:\ v.sum() > 0) # Gets sample information for other samples taxa = filtered_table.ObservationIds population = array([filtered_table.observationData(i) for i in \ filtered_table.ObservationIds]) sample_position = filtered_table.getSampleIndex(samp) sample = filtered_table.sampleData(samp) print samp, sum(sample > 0) population = delete(population, sample_position, 1) # Converts the lists into greengenes strings for later processing greengenes_rare = [] greengenes_unique = [] for taxon in rare: greengenes_rare.append('; '.join(taxon)) for taxon in unique: greengenes_unique.append('; '.join(taxon)) # Formats the rare and unique lists rare_format = [] rare_combined = [] for taxon in greengenes_unique: rare_combined.append(taxon) rare_format.append('COLOR') for taxon in greengenes_rare: rare_combined.append(taxon) rare_format.append('REG') number_rare_tax = len(rare_combined) num_rare = len(rare) num_unique = len(unique) rare_formatted = convert_taxa_to_list(rare_combined[0:NUMBER_OF_TAXA_SHOWN], tax_format = rare_format, render_mode = RENDERING, comma = True) if num_unique > 0: unique_string = ' and \\textcolor{red}{%i unique}' % num_unique else: unique_string = '' if number_rare_tax == 0: rare_formatted = "There were no rare or unique taxa found in "\ "your sample." elif 0 < number_rare_tax <= NUMBER_OF_TAXA_SHOWN: rare_formatted = 'Your sample contained the following rare %s '\ 'taxa: %s.' % (unique_string, rare_formatted) else: rare_formatted = 'Your sample contained %i rare and %s taxa, '\ 'including the following: %s.' \ % (num_rare, unique_string, rare_formatted) # Calculates abundance rank (abundance) = calculate_abundance(sample, taxa, abundance_threshhold = 1) # Generates formatted abundance table formatted_abundance = convert_taxa(abundance[0:NUMBER_OF_TAXA_SHOWN], formatting_keys = FORMAT_ABUNDANCE, hundredx = ABUNDANCE_HUNDRED) abundance_formatted = generate_latex_macro(formatted_abundance, \ categories = MACRO_CATS_ABUNDANCE) (high, low) = calculate_tax_rank_1(sample = sample, population = population, taxa = taxa, critical_value = 0.05) if len(high) < NUMBER_OF_TAXA_SHOWN: # Formats the known high taxa formatted_high = convert_taxa(high[0:NUMBER_OF_TAXA_SHOWN], formatting_keys = FORMAT_SIGNIFIGANCE, hundredx = SIGNIFIGANCE_HUNDRED) # Adds the dummy list to the end for idx in COUNT: if idx == (NUMBER_OF_TAXA_SHOWN - len(high)): break formatted_high.append(DUMMY) high_formatted = generate_latex_macro(formatted_high, \ categories = MACRO_CATS_SIGNIFICANCE) else: formatted_high = convert_taxa(high[0:NUMBER_OF_TAXA_SHOWN], formatting_keys = FORMAT_SIGNIFIGANCE, hundredx = SIGNIFIGANCE_HUNDRED) high_formatted = generate_latex_macro(formatted_high, \ categories = MACRO_CATS_SIGNIFICANCE) file_name = pjoin(output_dir, '%s%s%s' % (FILE_PRECURSER, samp, FILE_EXTENSION)) # Saves the file file_for_editing = open(file_name, 'w') # file_for_editing.write('% Participant Name\n\\def\\yourname'\ # '{Michael Pollan or longer name}\n\n') file_for_editing.write('%% Abundance Table\n%s\n\n\n' \ % abundance_formatted) file_for_editing.write('%% Enrichment Table\n%s\n\n\n' \ % high_formatted) file_for_editing.write('%% Rare List\n\\def\\rareList{%s}\n' \ % rare_formatted) file_for_editing.close()