Beispiel #1
0
 def test_load_consensus_map(self):
     """correctly returns a consensus map"""
     input = ["foo\ta; b; c; d; e; f; g",
              "bar\th; i; j; k; l; m; n",
              "foobar\th; i; j; None; l; ; foo uncultured bar"]
     exp_noappend = {'foo':['a','b','c','d','e','f','g'],
                     'bar':['h','i','j','k','l','m','n'],
                     'foobar':['h','i','j',None,'l',None, None]}
     exp_append = {'foo':['k__a','p__b','c__c','o__d','f__e','g__f','s__g'],
                   'bar':['k__h','p__i','c__j','o__k','f__l','g__m','s__n'],
                   'foobar':['k__h','p__i','c__j','o__','f__l','g__','s__']}
     obs_noappend = load_consensus_map(input, False)
     obs_append = load_consensus_map(input, True)
     self.assertEqual(obs_noappend, exp_noappend)
     self.assertEqual(obs_append, exp_append)
Beispiel #2
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    taxlookup = load_consensus_map(open(opts.ref_taxonomy_map), False)
    cs_results = parse_cs_chimeras(open(opts.input_cs))
    b3_results = parse_b3_chimeras(open(opts.input_bellerophon))

    output = open(opts.output, 'w')
    output.write("#accession\treason\tnote\tnote\n")
    overlap = get_overlap(b3_results, cs_results)
    for id_ in overlap:
        output.write("%s\tFound by both Bellerophon and ChimeraSlayer\n" % id_)

    for id_, score, parent_a, parent_b in b3_results:
        if id_ in overlap:
            continue
        if determine_taxon_conflict(taxlookup, parent_a, parent_b):
            o = [id_, "Class conflict found by Bellerophon"]
            o.append("%s: %s" % (parent_a, '; '.join(taxlookup[parent_a])))
            o.append("%s: %s" % (parent_b, '; '.join(taxlookup[parent_b])))
            output.write('\t'.join(o))
            output.write('\n')

    for id_, parent_a, parent_b in cs_results:
        if id_ in overlap:
            continue
        if determine_taxon_conflict(taxlookup, parent_a, parent_b):
            o = [id_, "Class conflict found by ChimeraSlayer"]
            o.append("%s: %s" % (parent_a, '; '.join(taxlookup[parent_a])))
            o.append("%s: %s" % (parent_b, '; '.join(taxlookup[parent_b])))
            output.write('\t'.join(o))
            output.write('\n')
Beispiel #3
0
 def test_load_consensus_map(self):
     """correctly returns a consensus map"""
     data = ["foo\ta; b; c; d; e; f; g",
              "bar\th; i; j; k; l; m; n",
              "foobar\th; i; j; None; l; ; foo uncultured bar"]
     exp_noappend = {'foo': ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
                     'bar': ['h', 'i', 'j', 'k', 'l', 'm', 'n'],
                     'foobar': ['h', 'i', 'j', None, 'l', None, None]}
     exp_append = {
         'foo': ['d__a', 'p__b', 'c__c', 'o__d', 'f__e', 'g__f', 's__g'],
         'bar': ['d__h', 'p__i', 'c__j', 'o__k', 'f__l', 'g__m', 's__n'],
         'foobar': ['d__h', 'p__i', 'c__j', 'o__', 'f__l', 'g__', 's__']}
     obs_noappend = load_consensus_map(data, False)
     obs_append = load_consensus_map(data, True)
     self.assertEqual(obs_noappend, exp_noappend)
     self.assertEqual(obs_append, exp_append)
Beispiel #4
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    taxlookup = load_consensus_map(open(opts.ref_taxonomy_map), False)
    cs_results = parse_cs_chimeras(open(opts.input_cs))
    b3_results = parse_b3_chimeras(open(opts.input_bellerophon))
    
    output = open(opts.output,'w')
    output.write("#accession\treason\tnote\tnote\n")
    overlap = get_overlap(b3_results, cs_results)
    for id_ in overlap:
        output.write("%s\tFound by both Bellerophon and ChimeraSlayer\n" % id_)

    for id_, score, parent_a, parent_b in b3_results:
        if id_ in overlap:
            continue
        if determine_taxon_conflict(taxlookup, parent_a, parent_b):
            o = [id_,"Class conflict found by Bellerophon"]
            o.append("%s: %s" % (parent_a, '; '.join(taxlookup[parent_a])))
            o.append("%s: %s" % (parent_b, '; '.join(taxlookup[parent_b])))
            output.write('\t'.join(o))
            output.write('\n')

    for id_, parent_a, parent_b in cs_results:
        if id_ in overlap:
            continue
        if determine_taxon_conflict(taxlookup, parent_a, parent_b):
            o = [id_,"Class conflict found by ChimeraSlayer"]
            o.append("%s: %s" % (parent_a, '; '.join(taxlookup[parent_a])))
            o.append("%s: %s" % (parent_b, '; '.join(taxlookup[parent_b])))
            output.write('\t'.join(o))
            output.write('\n')
Beispiel #5
0
    def test_generate_constrings_valid_input(self):
        """Tests generate_constrings with standard valid input.

        Checks that our output mirrors nlevel (tax2tree's interface)."""
        exp = test_results
        determine_rank_order(test_cons[0].split('\t')[1])
        cons_map = load_consensus_map(test_cons, False)
        tree = load_tree(test_tree, cons_map)

        obs = generate_constrings(tree, cons_map)
        self.assertEqual(obs, exp)
Beispiel #6
0
    def test_generate_constrings_valid_input(self):
        """Tests generate_constrings with standard valid input.

        Checks that our output mirrors nlevel (tax2tree's interface)."""
        exp = test_results
        determine_rank_order(test_cons[0].split('\t')[1])
        cons_map = load_consensus_map(test_cons, False)
        tree = load_tree(test_tree, cons_map)

        obs = generate_constrings(tree, cons_map)
        self.assertEqual(obs, exp)
Beispiel #7
0
def hierarchy_errors(tax_lines):
    """Get errors in the taxonomy hierarchy"""
    conmap = load_consensus_map(tax_lines, False)
    names = get_polyphyletic(conmap)
    errors = []

    for (name, rank), parents in names.iteritems():
        if len(parents) > 1:
            err = {'Taxon': name, 'Rank': rank, 'Parents': parents}
            errors.append(err)

    return errors
Beispiel #8
0
def hierarchy_errors(tax_lines):
    """Get errors in the taxonomy hierarchy"""
    conmap = load_consensus_map(tax_lines, False)
    names = get_polyphyletic(conmap)
    errors = []

    for (name, rank), parents in names.iteritems():
        if len(parents) > 1:
            err = {'Taxon': name, 'Rank': rank, 'Parents': parents}
            errors.append(err)

    return errors
Beispiel #9
0
    def __call__(self, seq_path=None, result_path=None, log_path=None):
        """Returns a dict mapping {seq_id:(taxonomy, confidence)} for each seq

        Keep in mind, "confidence" is only done for consistency and in fact
        all assignments will have a score of 0 because a method for determining
        confidence is not currently implemented.

        Parameters:
        seq_path: path to file of sequences. The sequences themselves are
            never actually used, but they are needed for their ids.
        result_path: path to file of results. If specified, dumps the
            result to the desired path instead of returning it.
        log_path: path to log, which should include dump of params.
        """

        # initialize the logger
        logger = self._get_logger(log_path)
        logger.info(str(self))

        with open(seq_path, 'U') as f:
            seqs = dict(MinimalFastaParser(f))

        consensus_map = tax2tree.prep_consensus(
            open(self.Params['id_to_taxonomy_fp']),
            seqs.keys())
        seed_con = consensus_map[0].strip().split('\t')[1]
        determine_rank_order(seed_con)

        tipnames_map = load_consensus_map(consensus_map, False)

        tree = load_tree(open(self.Params['tree_fp']), tipnames_map)

        results = tax2tree.generate_constrings(tree, tipnames_map)
        results = tax2tree.clean_output(results, seqs.keys())

        if result_path:
            # if the user provided a result_path, write the
            # results to file
            with open(result_path, 'w') as f:
                for seq_id, (lineage, confidence) in results.iteritems():
                    f.write('%s\t%s\t%s\n' % (seq_id, lineage, confidence))
            logger.info('Result path: %s' % result_path)

        return results
Beispiel #10
0
    def __call__(self, seq_path=None, result_path=None, log_path=None):
        """Returns a dict mapping {seq_id:(taxonomy, confidence)} for each seq

        Keep in mind, "confidence" is only done for consistency and in fact
        all assignments will have a score of 0 because a method for determining
        confidence is not currently implemented.

        Parameters:
        seq_path: path to file of sequences. The sequences themselves are
            never actually used, but they are needed for their ids.
        result_path: path to file of results. If specified, dumps the
            result to the desired path instead of returning it.
        log_path: path to log, which should include dump of params.
        """

        # initialize the logger
        logger = self._get_logger(log_path)
        logger.info(str(self))

        with open(seq_path, 'U') as f:
            seqs = dict(parse_fasta(f))

        consensus_map = tax2tree.prep_consensus(
            open(self.Params['id_to_taxonomy_fp']),
            seqs.keys())
        seed_con = consensus_map[0].strip().split('\t')[1]
        determine_rank_order(seed_con)

        tipnames_map = load_consensus_map(consensus_map, False)

        tree = load_tree(open(self.Params['tree_fp']), tipnames_map)

        results = tax2tree.generate_constrings(tree, tipnames_map)
        results = tax2tree.clean_output(results, seqs.keys())

        if result_path:
            # if the user provided a result_path, write the
            # results to file
            with open(result_path, 'w') as f:
                for seq_id, (lineage, confidence) in results.iteritems():
                    f.write('%s\t%s\t%s\n' % (seq_id, lineage, confidence))
            logger.info('Result path: %s' % result_path)

        return results