def test_longitudinal_otu_table_conversion_wrapper(self): """londitudinal_otu_table_conversion_wrapper works """ mapping_lines = """#SampleID\tindividual\ttimepoint_zero\ttimepoint AT0\tA\t1\t0 AT1\tA\t0\t1 AT2\tA\t0\t2 BT0\tB\t1\t0 BT1\tB\t0\t1 BT2\tB\t0\t2 """.split('\n') category_mapping = parse_mapping_file(mapping_lines) otu_table = """{"rows": [{"id": "0", "metadata": null}, {"id": "1", "metadata": null}, {"id": "2", "metadata": null}, {"id": "3", "metadata": null}, {"id": "4", "metadata": null}], "format": "Biological Observation Matrix 1.0.0", "data": [[0, 0, 1.0], [0, 1, 2.0], [0, 2, 3.0], [1, 3, 1.0], [1, 4, 2.0], [1, 5, 3.0], [2, 0, 1.0], [2, 1, 2.0], [2, 2, 3.0], [2, 4, 1.0], [2, 5, 2.0], [3, 0, 2.0], [3, 1, 4.0], [3, 2, 6.0], [3, 4, 1.0], [3, 5, 2.0], [4, 0, 3.0], [4, 1, 2.0], [4, 2, 1.0], [4, 3, 6.0], [4, 4, 4.0], [4, 5, 2.0]], "columns": [{"id": "AT0", "metadata": null}, {"id": "AT1", "metadata": null}, {"id": "AT2", "metadata": null}, {"id": "BT0", "metadata": null}, {"id": "BT1", "metadata": null}, {"id": "BT2", "metadata": null}], "generated_by": "BIOM-Format 1.0.0-dev", "matrix_type": "sparse", "shape": [5, 6], "format_url": "http://biom-format.org", "date": "2012-08-01T09:14:03.574451", "type": "OTU table", "id": null, "matrix_element_type": "float"}""" otu_table = parse_biom_table_str(otu_table) new_otu_table = longitudinal_otu_table_conversion_wrapper(otu_table, category_mapping, 'individual', 'timepoint_zero') new_otu_table = str(new_otu_table).split('\n') self.assertEqual(new_otu_table[0], "# Constructed from biom file") data_line1 = new_otu_table[2].split('\t') self.assertFloatEqual(float(data_line1[0]), 0.0) # sets the reference to 0 self.assertFloatEqual(float(data_line1[1]), 0.0) # subtracts values from same individual from the reference self.assertFloatEqual(float(data_line1[2]), 0.05714286) # sets to ignore number when not observed across a person self.assertFloatEqual(float(data_line1[4]), 999999999.0)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp otu_include_fp = opts.otu_include_fp output_fp = opts.output_fp verbose = opts.verbose category_mapping_fp = opts.category_mapping_fp individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.converted_otu_table_output_fp relative_abundance = opts.relative_abundance filter = opts.filter test = opts.test category = opts.category threshold = opts.threshold collate_results = opts.collate_results # check mapping file category_mapping = open(category_mapping_fp, 'U') category_mapping = parse_mapping_file(category_mapping) if not category: if test != 'paired_T': option_parser.error( 'a category in the category mapping file must be' ' specified with the -c option for this test') # set up threshold value for filtering, if any if threshold and threshold != 'None': threshold = float(threshold) # if specifying a list of OTUs to look at specifically if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None # if only passing in a single OTU table if isdir(otu_table_fp) is False: # raise error if collate option is being passed to single table if collate_results is True: option_parser.error( 'Cannot collate the results of only one table.' ' Please rerun the command without passing the -w option') else: #open and parse the biom table fp otu_table = parse_biom_table(open(otu_table_fp, 'U')) # run the statistical test output = test_wrapper( test, otu_table, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) # write output output_file = open(output_fp, 'w') output_file.write('\n'.join(output)) output_file.close() # if the user has passed in a directory if isdir(otu_table_fp) is True: # negate_collate to return an results file on a per table basis if collate_results is False: # build list of otu tables otu_table_paths = glob('%s/*biom' % otu_table_fp) # if output dir doesn't exist, then make it if exists(output_fp): pass else: makedirs(output_fp) for otu_table_fp in otu_table_paths: #open and parse the biom table fp otu_table = parse_biom_table(open(otu_table_fp, 'U')) #synchronize the mapping file with the otu table category_mapping, removed_samples = \ sync_mapping_to_otu_table(otu_table, category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' # create naming convention for output file # will look like: otu_table_ANOVA_Treatment.txt output_basename = basename(otu_table_fp) output_basename = output_basename.replace(".biom", "") output_fp_sweep = "%s_%s_%s.txt" % \ (output_basename,test,category) # if the convert_otu_table_fp is passed, save the converted table if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper( table, category_mapping, individual_column, reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(format_biom_table(converted_otu_table)) of.close() if test == 'longitudinal_correlation': #set the otu_include list to all of the OTUs, this effectively #deactivates the filter for correlation, because the filtered OTU_list is #rewritten with the otu_include list in the test_wrapper if not otu_include: otu_include = set(otu_table.ObservationIds) output = test_wrapper('correlation', converted_otu_table, category_mapping, category, threshold, filter, otu_include, 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, category_mapping, category, threshold, filter, otu_include, 999999999.0, True, individual_column, reference_sample_column) # run test single input table from the directory else: output = test_wrapper( test, otu_table, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) # write output file with new naming convention output_file = open(join(output_fp, output_fp_sweep), 'w') output_file.write('\n'.join(output)) output_file.close() # Use when the input dir contains rarefied OTU tables, and you want # to collate the p-values & results into one results file if collate_results is True: if test != 'longitudinal_correlation' and test != 'paired_T': # get biom tables otu_table_paths = glob('%s/*biom' % otu_table_fp) #get aggregated tables parsed_otu_tables = [] for otu_table_fp in otu_table_paths: otu_table = open(otu_table_fp, 'U') otu_table = parse_biom_table(otu_table) parsed_otu_tables.append(otu_table) #synchronize the mapping file with the otu table #checks with just the first OTU table and assumes that all otu tables #have the same collection of samples category_mapping, removed_samples = \ sync_mapping_to_otu_table(parsed_otu_tables[0],category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' # get output from statistical test output = test_wrapper_multiple( test, parsed_otu_tables, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) #write out aggregated results output_file = open(output_fp, 'w') output_file.write('\n'.join(output)) output_file.close() else: option_parser.error( "You cannot collate the results obtained from " "using the longitudinal_correlation and paired_T options.")
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose output_fp = opts.output_fp category_mapping_fp = opts.category_mapping_fp category_mapping = open(category_mapping_fp, 'U') category_mapping = parse_mapping_file(category_mapping) individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.converted_otu_table_output_fp relative_abundance = opts.relative_abundance filter = opts.filter test = opts.test category = opts.category if not category: if test != 'paired_T': raise ValueError('a category in the category mapping file must be' +\ ' specified with the -c option for this test') threshold = opts.threshold if threshold and threshold != 'None': threshold = float(threshold) otu_include_fp = opts.otu_include_fp if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None otu_table_fp = opts.otu_table_fp if not isdir(opts.otu_table_fp): # if single file, process normally otu_table = open(otu_table_fp, 'U') try: otu_table = parse_biom_table(otu_table) except AttributeError: otu_table = parse_biom_table_str(otu_table) #synchronize the mapping file with the otu table category_mapping, removed_samples = sync_mapping_to_otu_table(otu_table, \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper( otu_table, category_mapping, individual_column, reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(format_biom_table(converted_otu_table)) of.close() if test == 'longitudinal_correlation': #set the otu_include list to all of the OTUs, this effectively #deactivates the filter for correlation, because the filtered OTU_list is #rewritten with the otu_include list in the test_wrapper if not otu_include: otu_include = set(otu_table.ObservationIds) output = test_wrapper('correlation', converted_otu_table, \ category_mapping, category, threshold, filter, otu_include, \ 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, \ category_mapping, category, threshold, \ filter, otu_include, 999999999.0, True, \ individual_column, reference_sample_column) else: output = test_wrapper(test, otu_table, category_mapping, \ category, threshold, filter, otu_include, \ otu_table_relative_abundance=relative_abundance) else: if test != 'longitudinal_correlation' and test != 'paired_T': otu_table_paths = glob('%s/*biom' % otu_table_fp) # if directory, get aggregated results parsed_otu_tables = [] for path in otu_table_paths: ot = open(path, 'U') ot = parse_biom_table(ot) parsed_otu_tables.append(ot) #synchronize the mapping file with the otu table #checks with just the first OTU table and assumes that all otu tables #have the same collection of samples category_mapping, removed_samples = sync_mapping_to_otu_table(parsed_otu_tables[0], \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' output = test_wrapper_multiple(test, parsed_otu_tables, \ category_mapping, category, threshold, filter, otu_include,\ otu_table_relative_abundance=relative_abundance) else: raise ValueError( "the longitudinal_correlation and paired_T options cannot be run on a directory" ) of = open(output_fp, 'w') of.write('\n'.join(output)) of.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp otu_include_fp = opts.otu_include_fp output_fp = opts.output_fp verbose = opts.verbose category_mapping_fp = opts.category_mapping_fp individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.converted_otu_table_output_fp relative_abundance = opts.relative_abundance filter = opts.filter test = opts.test category = opts.category threshold = opts.threshold collate_results = opts.collate_results # check mapping file category_mapping = open(category_mapping_fp,'U') category_mapping = parse_mapping_file(category_mapping) if not category: if test != 'paired_T': option_parser.error('a category in the category mapping file must be' ' specified with the -c option for this test') # set up threshold value for filtering, if any if threshold and threshold != 'None': threshold = float(threshold) # if specifying a list of OTUs to look at specifically if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None # if only passing in a single OTU table if isdir(otu_table_fp) is False: # raise error if collate option is being passed to single table if collate_results is True: option_parser.error('Cannot collate the results of only one table.' ' Please rerun the command without passing the -w option') else: #open and parse the biom table fp otu_table = parse_biom_table(open(otu_table_fp, 'U')) # run the statistical test output = test_wrapper(test, otu_table, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) # write output output_file = open(output_fp, 'w') output_file.write('\n'.join(output)) output_file.close() # if the user has passed in a directory if isdir(otu_table_fp) is True: # negate_collate to return an results file on a per table basis if collate_results is False: # build list of otu tables otu_table_paths = glob('%s/*biom' % otu_table_fp) # if output dir doesn't exist, then make it if exists(output_fp): pass else: makedirs(output_fp) for otu_table_fp in otu_table_paths: #open and parse the biom table fp otu_table = parse_biom_table(open(otu_table_fp, 'U')) #synchronize the mapping file with the otu table category_mapping, removed_samples = \ sync_mapping_to_otu_table(otu_table, category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' # create naming convention for output file # will look like: otu_table_ANOVA_Treatment.txt output_basename = basename(otu_table_fp) output_basename = output_basename.replace(".biom","") output_fp_sweep = "%s_%s_%s.txt" % \ (output_basename,test,category) # if the convert_otu_table_fp is passed, save the converted table if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper(table, category_mapping, individual_column, reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(format_biom_table(converted_otu_table)) of.close() if test == 'longitudinal_correlation': #set the otu_include list to all of the OTUs, this effectively #deactivates the filter for correlation, because the filtered OTU_list is #rewritten with the otu_include list in the test_wrapper if not otu_include: otu_include = set(otu_table.ObservationIds) output = test_wrapper('correlation', converted_otu_table, category_mapping, category, threshold, filter, otu_include, 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, category_mapping, category, threshold, filter, otu_include, 999999999.0, True, individual_column, reference_sample_column) # run test single input table from the directory else: output = test_wrapper(test, otu_table, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) # write output file with new naming convention output_file = open(join(output_fp,output_fp_sweep), 'w') output_file.write('\n'.join(output)) output_file.close() # Use when the input dir contains rarefied OTU tables, and you want # to collate the p-values & results into one results file if collate_results is True: if test != 'longitudinal_correlation' and test != 'paired_T': # get biom tables otu_table_paths = glob('%s/*biom' % otu_table_fp) #get aggregated tables parsed_otu_tables= [] for otu_table_fp in otu_table_paths: otu_table = open(otu_table_fp, 'U') otu_table = parse_biom_table(otu_table) parsed_otu_tables.append(otu_table) #synchronize the mapping file with the otu table #checks with just the first OTU table and assumes that all otu tables #have the same collection of samples category_mapping, removed_samples = \ sync_mapping_to_otu_table(parsed_otu_tables[0],category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' # get output from statistical test output = test_wrapper_multiple(test, parsed_otu_tables, category_mapping, category, threshold, filter, otu_include, otu_table_relative_abundance=relative_abundance) #write out aggregated results output_file = open(output_fp, 'w') output_file.write('\n'.join(output)) output_file.close() else: option_parser.error("You cannot collate the results obtained from " "using the longitudinal_correlation and paired_T options.")
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose output_fp = opts.output_fp category_mapping_fp = opts.category_mapping_fp category_mapping = open(category_mapping_fp,'U') individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.conv_output_fp filter = opts.filter test = opts.test category = opts.category if not category: if test != 'paired_T': raise ValueError('a category in the category mapping file must be' +\ ' specified with the -c option for this test') threshold = opts.threshold if threshold and threshold != 'None': threshold = float(threshold) otu_include_fp = opts.otu_include_fp if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None otu_table_fp = opts.otu_table_fp if not isdir(opts.otu_table_fp): # if single file, process normally otu_table = open(otu_table_fp,'U') if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper(\ otu_table, category_mapping, individual_column, \ reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(converted_otu_table) of.close() converted_otu_table = converted_otu_table.split('\n') category_mapping = open(category_mapping_fp,'U') if test == 'longitudinal_correlation': output = test_wrapper('correlation', converted_otu_table, \ category_mapping, category, threshold, filter, otu_include, \ 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, \ category_mapping, category, threshold, \ filter, otu_include, 999999999.0, True, \ individual_column, reference_sample_column) else: output = test_wrapper(test, otu_table, category_mapping, \ category, threshold, filter, otu_include) else: if test != 'longitudinal_correlation' and test != 'paired_T': otu_table_paths = [join(otu_table_fp,fp) for fp in \ listdir(otu_table_fp)] # if directory, get aggregated results output = test_wrapper_multiple(test, otu_table_paths, \ category_mapping, category, threshold, filter, otu_include) else: raise ValueError("the longitudinal_correlation and paired_T options cannot be run on a directory") of = open(output_fp, 'w') of.write('\n'.join(output)) of.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose output_fp = opts.output_fp category_mapping_fp = opts.category_mapping_fp category_mapping = open(category_mapping_fp,'U') category_mapping = parse_mapping_file(category_mapping) individual_column = opts.individual_column reference_sample_column = opts.reference_sample_column conv_output_fp = opts.converted_otu_table_output_fp relative_abundance = opts.relative_abundance filter = opts.filter test = opts.test category = opts.category if not category: if test != 'paired_T': raise ValueError('a category in the category mapping file must be' +\ ' specified with the -c option for this test') threshold = opts.threshold if threshold and threshold != 'None': threshold = float(threshold) otu_include_fp = opts.otu_include_fp if otu_include_fp and otu_include_fp != 'None': otu_include = open(otu_include_fp) else: otu_include = None otu_table_fp = opts.otu_table_fp if not isdir(opts.otu_table_fp): # if single file, process normally otu_table = open(otu_table_fp,'U') try: otu_table = parse_biom_table(otu_table) except AttributeError: otu_table = parse_biom_table_str(otu_table) #synchronize the mapping file with the otu table category_mapping, removed_samples = sync_mapping_to_otu_table(otu_table, \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' if test == 'longitudinal_correlation' or test == 'paired_T': converted_otu_table = longitudinal_otu_table_conversion_wrapper(otu_table, category_mapping, individual_column, reference_sample_column) if conv_output_fp: of = open(conv_output_fp, 'w') of.write(format_biom_table(converted_otu_table)) of.close() if test == 'longitudinal_correlation': #set the otu_include list to all of the OTUs, this effectively #deactivates the filter for correlation, because the filtered OTU_list is #rewritten with the otu_include list in the test_wrapper if not otu_include: otu_include = set(otu_table.ObservationIds) output = test_wrapper('correlation', converted_otu_table, \ category_mapping, category, threshold, filter, otu_include, \ 999999999.0, True) elif test == 'paired_T': output = test_wrapper('paired_T', converted_otu_table, \ category_mapping, category, threshold, \ filter, otu_include, 999999999.0, True, \ individual_column, reference_sample_column) else: output = test_wrapper(test, otu_table, category_mapping, \ category, threshold, filter, otu_include, \ otu_table_relative_abundance=relative_abundance) else: if test != 'longitudinal_correlation' and test != 'paired_T': otu_table_paths = glob('%s/*biom' % otu_table_fp) # if directory, get aggregated results parsed_otu_tables = [] for path in otu_table_paths: ot = open(path,'U') ot = parse_biom_table(ot) parsed_otu_tables.append(ot) #synchronize the mapping file with the otu table #checks with just the first OTU table and assumes that all otu tables #have the same collection of samples category_mapping, removed_samples = sync_mapping_to_otu_table(parsed_otu_tables[0], \ category_mapping) if removed_samples: print "Warning, the following samples were in the category mapping file " +\ "but not the OTU table and will be ignored: " for i in removed_samples: print i + '\n' output = test_wrapper_multiple(test, parsed_otu_tables, \ category_mapping, category, threshold, filter, otu_include,\ otu_table_relative_abundance=relative_abundance) else: raise ValueError("the longitudinal_correlation and paired_T options cannot be run on a directory") of = open(output_fp, 'w') of.write('\n'.join(output)) of.close()