def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname, 'U') otu_to_taxonomy = parse_taxonomy(infile) ids_to_exclude = [] if exclude_otus_fp: if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'): ids_to_exclude = \ get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U')) else: ids_to_exclude = \ get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U')) sample_metadata = None if opts.mapping_fp is not None: with open(opts.mapping_fp, 'U') as map_f: mapping_data, mapping_header, mapping_comments = \ parse_mapping_file(map_f) sample_metadata = mapping_file_to_dict(mapping_data, mapping_header) with open(opts.otu_map_fp, 'U') as otu_map_f: biom_otu_table = make_otu_table(otu_map_f, otu_to_taxonomy=otu_to_taxonomy, otu_ids_to_exclude=ids_to_exclude, sample_metadata=sample_metadata) write_biom_table(biom_otu_table, opts.output_biom_fp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp outfile = open(opts.output_biom_fp, 'w') if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname,'U') otu_to_taxonomy = parse_taxonomy(infile) ids_to_exclude = [] if exclude_otus_fp: if splitext(exclude_otus_fp)[1] in ('.fasta','.fna'): ids_to_exclude = \ get_seq_ids_from_fasta_file(open(exclude_otus_fp,'U')) else: ids_to_exclude = \ get_seq_ids_from_seq_id_file(open(exclude_otus_fp,'U')) biom_otu_table = make_otu_table(open(opts.otu_map_fp, 'U'), otu_to_taxonomy, ids_to_exclude) outfile.write(biom_otu_table)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname, 'U') otu_to_taxonomy = parse_taxonomy(infile) ids_to_exclude = [] if exclude_otus_fp: if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'): ids_to_exclude = \ get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U')) else: ids_to_exclude = \ get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U')) sample_metadata = None if opts.mapping_fp is not None: mapping_data, mapping_header, mapping_comments = parse_mapping_file(open(opts.mapping_fp, 'U')) sample_metadata = assemble_sample_metadata(mapping_data, mapping_header, mapping_comments) biom_otu_table = make_otu_table(open(opts.otu_map_fp, 'U'), otu_to_taxonomy=otu_to_taxonomy, otu_ids_to_exclude=ids_to_exclude, sample_metadata=sample_metadata) write_biom_table(biom_otu_table, opts.output_biom_fp)
def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp): if observation_metadata_fp is not None: observation_metadata = parse_taxonomy(open(observation_metadata_fp, "U")) else: observation_metadata = None biom_table = make_otu_table(open(observation_map_fp, "U"), observation_metadata) write_biom_table(biom_table, output_biom_fp)
def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp): if observation_metadata_fp is not None: observation_metadata = parse_taxonomy(open(observation_metadata_fp, "U")) else: observation_metadata = None biom_table_f = open(output_biom_fp, "w") biom_table_f.write(make_otu_table(open(observation_map_fp, "U"), observation_metadata)) biom_table_f.close()
def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp): if observation_metadata_fp is not None: observation_metadata = \ parse_taxonomy(open(observation_metadata_fp, 'U')) else: observation_metadata = None biom_table = make_otu_table(open(observation_map_fp, 'U'), observation_metadata) write_biom_table(biom_table, output_biom_fp)
def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp): if observation_metadata_fp != None: observation_metadata = \ parse_taxonomy(open(observation_metadata_fp,'U')) else: observation_metadata = None biom_table_f = open(output_biom_fp, 'w') biom_table_f.write( make_otu_table(open(observation_map_fp, 'U'), observation_metadata)) biom_table_f.close()
def test_parse_taxonomy(self): """ should parse taxonomy example, keeping otu id only""" example_tax = \ """412 PC.635_647 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales 0.930 319 PC.355_281 Root;Bacteria;Bacteroidetes 0.970 353 PC.634_154 Root;Bacteria;Bacteroidetes 0.830 17 PC.607_302 Root;Bacteria;Bacteroidetes 0.960 13 PC.481_1214 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales 0.870 338 PC.593_1314 Root;Bacteria 0.990 42556 Additional fields ignored""" res = parse_taxonomy(example_tax.split('\n')) self.assertEqual(res['412'], "Root;Bacteria;Firmicutes;\"Clostridia\";Clostridiales") self.assertEqual(res['338'], "Root;Bacteria")
def test_parse_taxonomy(self): """ should parse taxonomy example, keeping otu id only""" example_tax = \ """412 PC.635_647 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales 0.930 319 PC.355_281 Root;Bacteria;Bacteroidetes 0.970 353 PC.634_154 Root;Bacteria;Bacteroidetes 0.830 17 PC.607_302 Root;Bacteria;Bacteroidetes 0.960 13 PC.481_1214 Root;Bacteria;Firmicutes;"Clostridia";Clostridiales 0.870 338 PC.593_1314 Root;Bacteria 0.990 42556 Additional fields ignored""" res = parse_taxonomy(example_tax.split('\n')) self.assertEqual( res['412'], "Root;Bacteria;Firmicutes;\"Clostridia\";Clostridiales") self.assertEqual(res['338'], "Root;Bacteria")
def _generate_biom_output(self, observation_map_fp, output_biom_fp, observation_metadata_fp): if observation_metadata_fp != None: observation_metadata = \ parse_taxonomy(open(observation_metadata_fp,'U'), parse_all_fields=True) else: observation_metadata = None biom_table_f = open(output_biom_fp,'w') biom_table_f.write(make_otu_table(open(observation_map_fp,'U'), observation_metadata)) biom_table_f.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname, 'U') otu_to_taxonomy = parse_taxonomy(infile) if not opts.counts_fname: seq_counts = None else: seq_counts = {} with open(opts.counts_fname, 'U') as infile: for line in infile: (key, val) = line.split() seq_counts[key] = val ids_to_exclude = [] if exclude_otus_fp: if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'): ids_to_exclude = \ get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U')) else: ids_to_exclude = \ get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U')) sample_metadata = None if opts.mapping_fp is not None: with open(opts.mapping_fp, 'U') as map_f: mapping_data, mapping_header, mapping_comments = \ parse_mapping_file(map_f) sample_metadata = mapping_file_to_dict(mapping_data, mapping_header) with open(opts.otu_map_fp, 'U') as otu_map_f: biom_otu_table = make_otu_table(otu_map_f, otu_to_taxonomy=otu_to_taxonomy, otu_ids_to_exclude=ids_to_exclude, sample_metadata=sample_metadata,seq_counts=seq_counts) write_biom_table(biom_otu_table, opts.output_biom_fp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp if opts.output_fp: outfile = open(opts.output_fp, 'w') else: outfile = stdout if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname,'U') otu_to_taxonomy = parse_taxonomy(infile) otu_to_seqid = fields_to_dict(open(opts.otu_map_fp, 'U')) if exclude_otus_fp: otu_to_seqid = remove_otus(otu_to_seqid,open(exclude_otus_fp,'U')) outfile.write(make_otu_table(otu_to_seqid, otu_to_taxonomy))
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) exclude_otus_fp = opts.exclude_otus_fp outfile = open(opts.output_biom_fp, 'w') if not opts.taxonomy_fname: otu_to_taxonomy = None else: infile = open(opts.taxonomy_fname, 'U') otu_to_taxonomy = parse_taxonomy(infile) ids_to_exclude = [] if exclude_otus_fp: if splitext(exclude_otus_fp)[1] in ('.fasta', '.fna'): ids_to_exclude = \ get_seq_ids_from_fasta_file(open(exclude_otus_fp, 'U')) else: ids_to_exclude = \ get_seq_ids_from_seq_id_file(open(exclude_otus_fp, 'U')) biom_otu_table = make_otu_table(open(opts.otu_map_fp, 'U'), otu_to_taxonomy, ids_to_exclude) outfile.write(biom_otu_table)
def test_cospeciation(potu_table_fp, cotu_table_fp, host_tree_fp, mapping_fp, mapping_category, output_dir, significance_level, test, permutations, taxonomy_fp, force): # Convert inputs to absolute paths output_dir = os.path.abspath(output_dir) host_tree_fp = os.path.abspath(host_tree_fp) mapping_fp = os.path.abspath(mapping_fp) potu_table_fp = os.path.abspath(potu_table_fp) cotu_table_fp = os.path.abspath(cotu_table_fp) # Check Host Tree try: with open(host_tree_fp) as f: pass except IOError as e: print 'Host Data could not be opened! Are you sure it is located at ' + host_tree_fp + ' ?' exit(1) # Check pOTU table try: with open(potu_table_fp) as f: pass except IOError as e: print 'parent OTU table could not be opened! Are you sure it is located at ' + potu_table_fp + ' ?' exit(1) try: os.makedirs(output_dir) except OSError: if force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. print "Output directory already exists. Please choose " +\ "a different directory, or force overwrite with -f." exit(1) # get sample names present in potu table sample_names, taxon_names, data, lineages = parse_otu_table( open(potu_table_fp, 'Ur')) # Process host input (tree/alignment/matrix) and take subtree of host # supertree host_tree, host_dist = make_dists_and_tree(sample_names, host_tree_fp) # At this point, the host tree and host dist matrix have the intersect of # the samples in the pOTU table and the input host tree/dm. summary_file = open( output_dir + '/' + 'cospeciation_results_summary.txt', 'w') summary_file.write("sig_nodes\tnum_nodes\tfile\n") # Load taxonomic assignments for the pOTUs otu_to_taxonomy = parse_taxonomy(open(taxonomy_fp, 'Ur')) # test that you have a directory, otherwise exit. if os.path.isdir(cotu_table_fp): os.chdir(cotu_table_fp) print os.getcwd() # run test on cOTU tables in directory. # use pOTU table to choose which cOTUs to use. for line in open(potu_table_fp, 'r'): # ignore comment lines if not line.startswith('#'): # first element in OTU table tab-delimited row cotu_basename = line.split('\t')[0] print "Analyzing pOTU # " + cotu_basename cotu_table_fp = cotu_basename + '_seqs_otu_table.txt' basename = cotu_basename + "_" + test # Read in cOTU file try: cotu_file = open(cotu_table_fp, 'Ur') except: print "is this a real file?" # Reconcile hosts in host DM and cOTU table filtered_cotu_file, host_dist_filtered = reconcile_hosts_symbionts( cotu_file, host_dist) cotu_file.close() # Read in reconciled cOTU table sample_names, taxon_names, data, lineages = parse_otu_table( filtered_cotu_file) filtered_cotu_file.close() # exit loop if less than three hosts or cOTUs if len(sample_names) < 3 or len(taxon_names) < 3: print "Less than 3 hosts or cOTUs in cOTU table!" continue # Import, filter, and root cOTU tree otu_tree_fp = cotu_basename + "_seqs_rep_set.tre" otu_tree_file = open(otu_tree_fp, 'r') otu_tree_unrooted = DndParser(otu_tree_file, PhyloNode) otu_tree_file.close() otu_subtree_unrooted = otu_tree_unrooted.getSubTree( taxon_names) # root at midpoint # Consider alternate step to go through and find closest DB seq # to root? otu_subtree = otu_subtree_unrooted.rootAtMidpoint() # filter host tree host_subtree = host_tree.getSubTree(sample_names) # Load up and filter cOTU sequences aligned_otu_seqs = LoadSeqs( cotu_basename + '_seqs_rep_set_aligned.fasta', moltype=DNA, label_to_name=lambda x: x.split()[0]) filtered_seqs = aligned_otu_seqs.takeSeqs(taxon_names) result = False # Run recursive test on this pOTU: try: # DEBUG: # print 'in run_test_cospeciation' # get number of hosts and cOTUs htips = len(host_subtree.getTipNames()) stips = len(otu_subtree.getTipNames()) if test == 'unifrac': print 'calling unifrac test' results_dict, acc_dict = unifrac_recursive_test(host_subtree, otu_subtree, sample_names, taxon_names, data, permutations) pvals = 'p_vals' if test == 'hommola_recursive': # run recursive hommola test results_dict, acc_dict = recursive_hommola(filtered_seqs, host_subtree, host_dist_filtered, otu_subtree, sample_names, taxon_names, data, permutations, recurse=True) pvals = 'p_vals' if test == 'hommola': # run recursive hommola test results_dict, acc_dict = recursive_hommola(filtered_seqs, host_subtree, host_dist_filtered, otu_subtree, sample_names, taxon_names, data, permutations, recurse=False) pvals = 'p_vals' sig_nodes = 0 # Count number of significant nodes for pval in results_dict[pvals]: if pval < significance_level: sig_nodes += 1 num_nodes = write_results( results_dict, acc_dict, output_dir, basename, host_tree) result = True except Exception as e: print e raise if result: outline = "{0}\t{1}\t{2}\t{3}".format( sig_nodes, num_nodes, cotu_basename, otu_to_taxonomy[cotu_basename]) + "\n" else: outline = "ERROR\t\t" + file + "\n" print outline summary_file.write(outline) else: print 'Not a directory.' summary_file.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) cOTUs_dir = opts.cOTUs_dir results_dir = opts.results_dir output_dir = opts.output_dir significance_level = float(opts.significance_level) taxonomy_fp = opts.taxonomy_fp force = opts.force from qiime.otu_category_significance import add_fdr_correction_to_results, \ add_bonferroni_to_results, \ fdr_correction # test input and output dirs if opts.taxonomy_fp: try: taxonomy_file = open(opts.taxonomy_fp, 'Ur') otu_to_taxonomy = parse_taxonomy(open(taxonomy_fp, 'Ur')) # except IOError: raise IOError,\ "Can't open taxonomy file (%s). Does it exist? Do you have read access?"\ % opts.taxonomy_fp else: otu_to_taxonomy = None if not os.path.isdir(cOTUs_dir): print "cOTUs_directory not a directory. Please try again." exit(1) cOTUs_dir = os.path.abspath(cOTUs_dir) if not os.path.isdir(results_dir): print "results_directory not a directory. Please try again." exit(1) results_dir = os.path.abspath(results_dir) try: os.makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. print "Output directory already exists. Please choose " +\ "a different directory, or force overwrite with -f." exit(1) output_dir = os.path.abspath(output_dir) # get results dict # results dict is 2D, with key a sequential per-node UID and fields for: # pOTU, uncorrected pval, taxonomy, plus other results values results_list = [] results_keys = [] os.chdir(results_dir) for file in os.listdir('.'): if file.endswith("results.txt"): results_list += read_results_file(file) if results_keys == []: results_keys = read_results_keys(file) # do FDR correction # now results dict has FDR and bonfo vals p_dict = {} # we're making a dict here for all the nodes that have been tested, with the # key corresponding to the position in the results_list array of dicts. for node in range(len(results_list)): pval = float(results_list[node]['p_vals']) p_dict[node] = [pval, pval] add_fdr_correction_to_results(p_dict) add_bonferroni_to_results(p_dict) # a previous iteration of the permutation test allowed 0.0 p_vals, which gave #'NA' results for highly significant nodes. Retained for legacy purposes. p_dict = de_NA(p_dict) os.chdir(output_dir) print_corrected_results_files(results_list, results_keys, p_dict) print_sig_lists( results_list, results_keys, p_dict, significance_level, otu_to_taxonomy)