コード例 #1
0
    def test_render_latex_macro(self):
        # Sets up variable for testing        
        known_macro = '\\def\\TaxonomyA{Genus \\textit{Escherichia}}\n'\
                      '\\def\\DoctorA{10.00%}\n'\
                      '\\def\\HumansA{1.00%}\n'\
                      '\\def\\FoldA{10}\n'\
                      '\\def\\TaxonomyB{\\textit{Escherichia coli}}\n'\
                      '\\def\\DoctorB{0.10%}\n'\
                      '\\def\\HumansB{1.00%}\n'\
                      '\\def\\FoldB{0.10}'

        # Calculates test variable
        test_macro = generate_latex_macro(self.test_table, self.header)

        # Checks the outputs match
        self.assertEqual(known_macro, test_macro)
コード例 #2
0
def main(taxa_table, output_dir, samples_to_analyze = None):

    """Creates LaTeX formatted significant OTU lists

    INPUTS:
        taxa -- a numpy vector with greengenes taxonomy strings

        tax_table -- a numpy array with the relative frequencies of taxonomies
            (rows) for each give sample (column)

        samples_to_analyze -- a numpy vector of sample ids associated with the 
            tax_table values

        output_dir -- a directory where the final files should be saved.

        samples_to_analyze -- a list of samples_to_analyze which should be used 
                    to generate data. If this is left empty, all the samples in 
                    the table will be used.

    OUTPUTS:
        Generates text files containing LaTex encoded strings which creates a 
        LaTeX macro dictionary with the information for creating a table of most
        abundant taxa, most enriched taxa, and rare and unique taxa. Rare 
        defined as present in less than 10% of the total population. The unique 
        taxa are bolded in the lists. 
    """

    # Sets table constants
    RARE_THRESHHOLD = 0.1
    RENDERING = "LATEX"
    FORMAT_SIGNIFIGANCE = ['%1.1f', "%1.2f", "%i", "SKIP"]
    SIGNIFIGANCE_HUNDRED = [True, True, False, False]
    DUMMY  = ['','','','']    
    COUNT = [0, 1, 2, 3, 4, 5, 6, 7]
    FORMAT_ABUNDANCE = ["%1.1f"]
    ABUNDANCE_HUNDRED = [True]
    MACRO_CATS_SIGNIFICANCE = ['enrichTaxon','enrichSampl', 'enrichPopul', 
        'enrichFoldd']
    MACRO_CATS_ABUNDANCE = ['abundTaxon', 'abundSampl']

    FILE_PRECURSER = 'macros_'
    FILE_EXTENSION = '.tex'

    # Number of taxa shown is an indexing value, it is one less than what is 
    # actually shown.
    NUMBER_OF_TAXA_SHOWN = 5

    # Builds the the taxomnomy tree for the table and identifies the 
    # rare/unique taxa in each sample
    tree, all_taxa = build_tree_from_taxontable(taxa_table)

    # Sets up samples for which tables are being generated    
    if not samples_to_analyze == None:
        samples_to_test = samples_to_analyze
    else:
        samples_to_test = all_taxa.keys()

    if samples_to_test:
        samples_to_test = set(samples_to_test)
        tmp = {k:v for k,v in all_taxa.items() if k in samples_to_test}
        all_taxa = tmp
        if not samples_to_test:
            raise ValueError, "No samples!"

    # Generates lists and tables for each sample
    for samp, filtered_table, rare, unique in sample_rare_unique(tree, \
        taxa_table, all_taxa, RARE_THRESHHOLD):
        filtered_table = filtered_table.filterObservations(lambda v,i,md:\
        v.sum() > 0)

        # Gets sample information for other samples
        taxa = filtered_table.ObservationIds       
        population = array([filtered_table.observationData(i) for i in \
            filtered_table.ObservationIds])
        
        sample_position = filtered_table.getSampleIndex(samp)
        sample = filtered_table.sampleData(samp)
        print samp, sum(sample > 0)
                
        population = delete(population, sample_position, 1)
        
        # Converts the lists into greengenes strings for later processing
        greengenes_rare = []
        greengenes_unique = []
        for taxon in rare:
            greengenes_rare.append('; '.join(taxon))
        for taxon in unique:
            greengenes_unique.append('; '.join(taxon))

        # Formats the rare and unique lists          
        rare_format = []
        rare_combined = []
        for taxon in greengenes_unique:
            rare_combined.append(taxon)
            rare_format.append('COLOR')
        for taxon in greengenes_rare:
            rare_combined.append(taxon)
            rare_format.append('REG')

        number_rare_tax = len(rare_combined)
        num_rare = len(rare)
        num_unique = len(unique)

        rare_formatted = convert_taxa_to_list(rare_combined[0:NUMBER_OF_TAXA_SHOWN], 
                                               tax_format = rare_format,
                                               render_mode = RENDERING, 
                                               comma = True)        
     
        if num_unique > 0:
            unique_string = ' and \\textcolor{red}{%i unique}' % num_unique
        else:
            unique_string = ''

        if number_rare_tax == 0:
            rare_formatted = "There were no rare or unique taxa found in "\
                "your sample." 

        elif 0 < number_rare_tax <= NUMBER_OF_TAXA_SHOWN:
            rare_formatted = 'Your sample contained the following rare %s '\
                'taxa: %s.' % (unique_string, rare_formatted)

        else:
            rare_formatted = 'Your sample contained %i rare and %s taxa, '\
                'including the following: %s.' \
                % (num_rare, unique_string, 
                   rare_formatted)


        # Calculates abundance rank
        (abundance) = calculate_abundance(sample, taxa, 
                                          abundance_threshhold = 1)

        # Generates formatted abundance table
        formatted_abundance = convert_taxa(abundance[0:NUMBER_OF_TAXA_SHOWN],                                        
                                        formatting_keys = FORMAT_ABUNDANCE,
                                        hundredx = ABUNDANCE_HUNDRED)

        abundance_formatted = generate_latex_macro(formatted_abundance, \
            categories = MACRO_CATS_ABUNDANCE)

        (high, low) = calculate_tax_rank_1(sample = sample, 
                                           population = population, 
                                           taxa = taxa,
                                           critical_value = 0.05)

        if len(high) < NUMBER_OF_TAXA_SHOWN:
            # Formats the known high taxa
            formatted_high = convert_taxa(high[0:NUMBER_OF_TAXA_SHOWN],
                                          formatting_keys = FORMAT_SIGNIFIGANCE,
                                          hundredx = SIGNIFIGANCE_HUNDRED)
            # Adds the dummy list to the end
            for idx in COUNT:
                if idx == (NUMBER_OF_TAXA_SHOWN - len(high)):
                    break
                formatted_high.append(DUMMY)           

            high_formatted = generate_latex_macro(formatted_high, \
                categories = MACRO_CATS_SIGNIFICANCE)

        else:
            formatted_high = convert_taxa(high[0:NUMBER_OF_TAXA_SHOWN],
                                          formatting_keys = FORMAT_SIGNIFIGANCE,
                                          hundredx = SIGNIFIGANCE_HUNDRED)

            high_formatted = generate_latex_macro(formatted_high, \
                categories = MACRO_CATS_SIGNIFICANCE)


    
        file_name = pjoin(output_dir, '%s%s%s' % (FILE_PRECURSER, samp, 
            FILE_EXTENSION))

        # Saves the file
        file_for_editing = open(file_name, 'w')
        # file_for_editing.write('% Participant Name\n\\def\\yourname'\
        #     '{Michael Pollan or longer name}\n\n')
        file_for_editing.write('%% Abundance Table\n%s\n\n\n' \
            % abundance_formatted)
        file_for_editing.write('%% Enrichment Table\n%s\n\n\n' \
            % high_formatted)
        file_for_editing.write('%% Rare List\n\\def\\rareList{%s}\n' \
            % rare_formatted)
        file_for_editing.close()