Exemple #1
0
def convert_tree_taxon_id2accession(biodb_name,
                                    input_tree,
                                    output_tree,
                                    sqlite=False):
    server, db = manipulate_biosqldb.load_db(biodb_name, sqlite=sqlite)
    taxon_id2accession = manipulate_biosqldb.taxon_id2accession_chromosome(
        server, biodb_name)
    for i in taxon_id2accession:
        taxon_id2accession[str(i)] = taxon_id2accession[i]
    print "taxon_id2accession", taxon_id2accession
    new_tree = parse_newick_tree.convert_terminal_node_names(
        input_tree, taxon_id2accession)

    Phylo.write(new_tree, output_tree, 'newick')
Exemple #2
0
def convert_tree_accession2taxon_id(biodb_name,
                                    input_tree,
                                    output_tree,
                                    sqlite=False):
    server, db = manipulate_biosqldb.load_db(biodb_name, sqlite=sqlite)
    accession2taxon_id = manipulate_biosqldb.accession2taxon_id(
        server, biodb_name)
    for i in accession2taxon_id:
        accession2taxon_id[i] = str(accession2taxon_id[i])
    print "accession2taxon_id", accession2taxon_id
    new_tree = parse_newick_tree.convert_terminal_node_names(
        input_tree, accession2taxon_id)

    Phylo.write(new_tree, output_tree, 'newick')
Exemple #3
0
def convert_tree_taxon2genome(biodb_name,
                              input_tree,
                              output_tree,
                              sqlite=False):
    server, db = manipulate_biosqldb.load_db(biodb_name, sqlite=sqlite)
    print biodb_name
    taxon_id2genome_description = manipulate_biosqldb.taxon_id2genome_description(
        server, biodb_name)

    print taxon_id2genome_description

    #locus2genome = manipulate_biosqldb.locus_tag2genome_name(server, biodb_name)

    import re
    for i in taxon_id2genome_description.keys():
        print i
        taxon_id2genome_description[i] = re.sub(" subsp\. aureus", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(", complete genome\.", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(", complete sequence\.", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub("strain ", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub("str\. ", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(" complete genome sequence\.",
                                                "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(" complete genome\.", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(" chromosome", "",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub("Staphylococcus", "S.",
                                                taxon_id2genome_description[i])
        taxon_id2genome_description[i] = re.sub(" DNA", "S.",
                                                taxon_id2genome_description[i])
    #print taxon_id2genome_description[i]

    print taxon_id2genome_description
    new_tree = parse_newick_tree.convert_terminal_node_names(
        input_tree, taxon_id2genome_description)
    #print new_tree[0]
    print "writing converted tree..."
    print output_tree
    Phylo.write(new_tree, output_tree, 'newick')
def convert_leaf_labels_from_genbank(input_tree,
                                     input_gbk_list,
                                     show_rank=False,
                                     use_gbk_file_names=False,
                                     use_source_organism=False):
    import gbk2accessiontodefinition
    import parse_newick_tree

    if not use_source_organism:
        id2description = gbk2accessiontodefinition.get_coressp(
            input_gbk_list, use_gbk_file_names=use_gbk_file_names)
    else:
        id2description = gbk2accessiontodefinition.get_corresp_organism(
            input_gbk_list)

    if show_rank:
        for id in id2description:
            print('searching rank for %s...' % id)
            try:

                id2description[id] = id2description[
                    id] + ' (%s)' % accession2taxon_rank(id, 'phylum')
            except:
                print('no phylum for %s' % id)
                try:

                    id2description[id] = id2description[
                        id] + ' (order: %s)' % accession2taxon_rank(
                            id, 'order')
                except:
                    print('no order for %s' % id)
                    id2description[id] = id2description[id] + ' (?)'

    new_tree = parse_newick_tree.convert_terminal_node_names(
        input_tree, id2description, 1)

    return new_tree
Exemple #5
0
    return name2description


if __name__ == '__main__':
    import argparse
    from Bio import SeqIO
    import re
    import parse_newick_tree
    from Bio import Phylo
    parser = argparse.ArgumentParser()
    parser.add_argument("-g",
                        '--input_gbk',
                        type=str,
                        help="input gbk files",
                        nargs='+')
    parser.add_argument("-m",
                        '--molis_table',
                        type=str,
                        help="input molis number table")
    parser.add_argument("-t", '--tree', type=str, help="input tree")

    args = parser.parse_args()
    id2description = get_coressp(args.input_gbk, args.molis_table)
    new_tree = parse_newick_tree.convert_terminal_node_names(
        args.tree, id2description)
    print "writing converted tree..."

    with open("parsnp_renames.nwk", 'w') as output_tree:
        Phylo.write(new_tree, output_tree, 'newick')