def test_load_consensus_map(self): """correctly returns a consensus map""" input = ["foo\ta; b; c; d; e; f; g", "bar\th; i; j; k; l; m; n", "foobar\th; i; j; None; l; ; foo uncultured bar"] exp_noappend = {'foo':['a','b','c','d','e','f','g'], 'bar':['h','i','j','k','l','m','n'], 'foobar':['h','i','j',None,'l',None, None]} exp_append = {'foo':['k__a','p__b','c__c','o__d','f__e','g__f','s__g'], 'bar':['k__h','p__i','c__j','o__k','f__l','g__m','s__n'], 'foobar':['k__h','p__i','c__j','o__','f__l','g__','s__']} obs_noappend = load_consensus_map(input, False) obs_append = load_consensus_map(input, True) self.assertEqual(obs_noappend, exp_noappend) self.assertEqual(obs_append, exp_append)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) taxlookup = load_consensus_map(open(opts.ref_taxonomy_map), False) cs_results = parse_cs_chimeras(open(opts.input_cs)) b3_results = parse_b3_chimeras(open(opts.input_bellerophon)) output = open(opts.output, 'w') output.write("#accession\treason\tnote\tnote\n") overlap = get_overlap(b3_results, cs_results) for id_ in overlap: output.write("%s\tFound by both Bellerophon and ChimeraSlayer\n" % id_) for id_, score, parent_a, parent_b in b3_results: if id_ in overlap: continue if determine_taxon_conflict(taxlookup, parent_a, parent_b): o = [id_, "Class conflict found by Bellerophon"] o.append("%s: %s" % (parent_a, '; '.join(taxlookup[parent_a]))) o.append("%s: %s" % (parent_b, '; '.join(taxlookup[parent_b]))) output.write('\t'.join(o)) output.write('\n') for id_, parent_a, parent_b in cs_results: if id_ in overlap: continue if determine_taxon_conflict(taxlookup, parent_a, parent_b): o = [id_, "Class conflict found by ChimeraSlayer"] o.append("%s: %s" % (parent_a, '; '.join(taxlookup[parent_a]))) o.append("%s: %s" % (parent_b, '; '.join(taxlookup[parent_b]))) output.write('\t'.join(o)) output.write('\n')
def test_load_consensus_map(self): """correctly returns a consensus map""" data = ["foo\ta; b; c; d; e; f; g", "bar\th; i; j; k; l; m; n", "foobar\th; i; j; None; l; ; foo uncultured bar"] exp_noappend = {'foo': ['a', 'b', 'c', 'd', 'e', 'f', 'g'], 'bar': ['h', 'i', 'j', 'k', 'l', 'm', 'n'], 'foobar': ['h', 'i', 'j', None, 'l', None, None]} exp_append = { 'foo': ['d__a', 'p__b', 'c__c', 'o__d', 'f__e', 'g__f', 's__g'], 'bar': ['d__h', 'p__i', 'c__j', 'o__k', 'f__l', 'g__m', 's__n'], 'foobar': ['d__h', 'p__i', 'c__j', 'o__', 'f__l', 'g__', 's__']} obs_noappend = load_consensus_map(data, False) obs_append = load_consensus_map(data, True) self.assertEqual(obs_noappend, exp_noappend) self.assertEqual(obs_append, exp_append)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) taxlookup = load_consensus_map(open(opts.ref_taxonomy_map), False) cs_results = parse_cs_chimeras(open(opts.input_cs)) b3_results = parse_b3_chimeras(open(opts.input_bellerophon)) output = open(opts.output,'w') output.write("#accession\treason\tnote\tnote\n") overlap = get_overlap(b3_results, cs_results) for id_ in overlap: output.write("%s\tFound by both Bellerophon and ChimeraSlayer\n" % id_) for id_, score, parent_a, parent_b in b3_results: if id_ in overlap: continue if determine_taxon_conflict(taxlookup, parent_a, parent_b): o = [id_,"Class conflict found by Bellerophon"] o.append("%s: %s" % (parent_a, '; '.join(taxlookup[parent_a]))) o.append("%s: %s" % (parent_b, '; '.join(taxlookup[parent_b]))) output.write('\t'.join(o)) output.write('\n') for id_, parent_a, parent_b in cs_results: if id_ in overlap: continue if determine_taxon_conflict(taxlookup, parent_a, parent_b): o = [id_,"Class conflict found by ChimeraSlayer"] o.append("%s: %s" % (parent_a, '; '.join(taxlookup[parent_a]))) o.append("%s: %s" % (parent_b, '; '.join(taxlookup[parent_b]))) output.write('\t'.join(o)) output.write('\n')
def test_generate_constrings_valid_input(self): """Tests generate_constrings with standard valid input. Checks that our output mirrors nlevel (tax2tree's interface).""" exp = test_results determine_rank_order(test_cons[0].split('\t')[1]) cons_map = load_consensus_map(test_cons, False) tree = load_tree(test_tree, cons_map) obs = generate_constrings(tree, cons_map) self.assertEqual(obs, exp)
def hierarchy_errors(tax_lines): """Get errors in the taxonomy hierarchy""" conmap = load_consensus_map(tax_lines, False) names = get_polyphyletic(conmap) errors = [] for (name, rank), parents in names.iteritems(): if len(parents) > 1: err = {'Taxon': name, 'Rank': rank, 'Parents': parents} errors.append(err) return errors
def __call__(self, seq_path=None, result_path=None, log_path=None): """Returns a dict mapping {seq_id:(taxonomy, confidence)} for each seq Keep in mind, "confidence" is only done for consistency and in fact all assignments will have a score of 0 because a method for determining confidence is not currently implemented. Parameters: seq_path: path to file of sequences. The sequences themselves are never actually used, but they are needed for their ids. result_path: path to file of results. If specified, dumps the result to the desired path instead of returning it. log_path: path to log, which should include dump of params. """ # initialize the logger logger = self._get_logger(log_path) logger.info(str(self)) with open(seq_path, 'U') as f: seqs = dict(MinimalFastaParser(f)) consensus_map = tax2tree.prep_consensus( open(self.Params['id_to_taxonomy_fp']), seqs.keys()) seed_con = consensus_map[0].strip().split('\t')[1] determine_rank_order(seed_con) tipnames_map = load_consensus_map(consensus_map, False) tree = load_tree(open(self.Params['tree_fp']), tipnames_map) results = tax2tree.generate_constrings(tree, tipnames_map) results = tax2tree.clean_output(results, seqs.keys()) if result_path: # if the user provided a result_path, write the # results to file with open(result_path, 'w') as f: for seq_id, (lineage, confidence) in results.iteritems(): f.write('%s\t%s\t%s\n' % (seq_id, lineage, confidence)) logger.info('Result path: %s' % result_path) return results
def __call__(self, seq_path=None, result_path=None, log_path=None): """Returns a dict mapping {seq_id:(taxonomy, confidence)} for each seq Keep in mind, "confidence" is only done for consistency and in fact all assignments will have a score of 0 because a method for determining confidence is not currently implemented. Parameters: seq_path: path to file of sequences. The sequences themselves are never actually used, but they are needed for their ids. result_path: path to file of results. If specified, dumps the result to the desired path instead of returning it. log_path: path to log, which should include dump of params. """ # initialize the logger logger = self._get_logger(log_path) logger.info(str(self)) with open(seq_path, 'U') as f: seqs = dict(parse_fasta(f)) consensus_map = tax2tree.prep_consensus( open(self.Params['id_to_taxonomy_fp']), seqs.keys()) seed_con = consensus_map[0].strip().split('\t')[1] determine_rank_order(seed_con) tipnames_map = load_consensus_map(consensus_map, False) tree = load_tree(open(self.Params['tree_fp']), tipnames_map) results = tax2tree.generate_constrings(tree, tipnames_map) results = tax2tree.clean_output(results, seqs.keys()) if result_path: # if the user provided a result_path, write the # results to file with open(result_path, 'w') as f: for seq_id, (lineage, confidence) in results.iteritems(): f.write('%s\t%s\t%s\n' % (seq_id, lineage, confidence)) logger.info('Result path: %s' % result_path) return results