コード例 #1
0
def locus2inference_table(biodb):

    server, db = manipulate_biosqldb.load_db(biodb)

    sql = 'CREATE TABLE locus_tag2uniprot_hit_%s (locus_tag varchar(400),' \
          ' uniprot_id varchar(400), index locus_tag(locus_tag))' % biodb

    server.adaptor.execute(sql, )

    locus2seqfeature_id = manipulate_biosqldb.locus_tag2seqfeature_id_dict(
        server, biodb)

    for locus in locus2seqfeature_id:
        sql = 'select value from seqfeature_qualifier_value where seqfeature_id=%s and value like "%%%%UniProtKB%%%%"' % (
            locus2seqfeature_id[locus])
        try:
            data = server.adaptor.execute_and_fetchall(sql, )[0][0]
            sql2 = 'insert into locus_tag2uniprot_hit_%s values ("%s", "%s")' % (
                biodb, locus, data.split(':')[2])
            try:
                server.adaptor.execute(sql2, )
                server.commit()
            except:
                print sql2
        except:
            pass
コード例 #2
0
def locus_list2identity_in_other_genomes(locus_list, biodb):
    server, db = manipulate_biosqldb.load_db(biodb)

    locus_tag2seqfeature_id = manipulate_biosqldb.locus_tag2seqfeature_id_dict(
        server, biodb)

    taxon_id2description = manipulate_biosqldb.taxon_id2genome_description(
        server, biodb)

    import re
    for i in taxon_id2description.keys():
        taxon_id2description[i] = re.sub(" subsp\. aureus", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(", complete genome\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(", complete sequence\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("strain ", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("str\. ", "", taxon_id2description[i])
        taxon_id2description[i] = re.sub(" complete genome sequence\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(" complete genome\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(" chromosome", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("Staphylococcus aureus ", "",
                                         taxon_id2description[i])

    header = 'orthogroup\t'
    dico = locus_tag2identity_best_hit_all_genomes(biodb, 'wcw_1594',
                                                   'group_417')
    for i in dico.keys():

        header += taxon_id2description[i] + '\t'

    final_out = header + '\n'

    for locus in locus_list:
        #print "locus", i
        seqfeature_id = locus_tag2seqfeature_id[locus]
        orthogroup = manipulate_biosqldb.seqfeature_id2orthogroup(
            server, seqfeature_id, biodb)
        #print "ortho", orthogroup
        dico = locus_tag2identity_best_hit_all_genomes(biodb, locus,
                                                       orthogroup)
        #print "dico done..."
        out = '%s\t' % orthogroup
        for i in dico.keys():
            identity = dico[i]
            out += '%s\t' % identity
        final_out += out + '\n'

    return final_out
コード例 #3
0
def locus_list2presence_absence_all_genomes(locus_list, biodb_name):
    server, db = manipulate_biosqldb.load_db(biodb_name)

    locus_tag2seqfeature_id = manipulate_biosqldb.locus_tag2seqfeature_id_dict(
        server, biodb_name)

    taxon_id2description = manipulate_biosqldb.taxon_id2genome_description(
        server, biodb_name)

    import re
    for i in taxon_id2description.keys():
        taxon_id2description[i] = re.sub(" subsp\. aureus", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(", complete genome\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(", complete sequence\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("strain ", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("str\. ", "", taxon_id2description[i])
        taxon_id2description[i] = re.sub(" complete genome sequence\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(" complete genome\.", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub(" chromosome", "",
                                         taxon_id2description[i])
        taxon_id2description[i] = re.sub("Staphylococcus aureus ", "",
                                         taxon_id2description[i])

    header = 'orthogroup\t'
    genomes = manipulate_biosqldb.get_genome_taxons_list(server, biodb_name)
    for i in genomes:
        header += taxon_id2description[i] + '\t'
    final_out = header + '\n'

    for i in locus_list:
        #print "locus", i
        seqfeature_id = locus_tag2seqfeature_id[i]
        orthogroup = manipulate_biosqldb.seqfeature_id2orthogroup(
            server, seqfeature_id, biodb_name)
        #print "ortho", orthogroup
        dico = heatmap_presence_absence(biodb_name, orthogroup)

        #print "dico done..."
        #print dico
        out = '%s\t' % orthogroup
        for i in genomes:

            out += '%s\t' % dico[i]
        final_out += out + '\n'

    return final_out
コード例 #4
0
def create_locus_tag2seqfeature_table(biodb,
                                      locus2seqfeature_id=False,
                                      locus2taxon_id=False):

    from chlamdb.biosqldb import manipulate_biosqldb

    server, db = manipulate_biosqldb.load_db(biodb)

    sql = 'CREATE TABLE IF NOT EXISTS custom_tables.locus2seqfeature_id_%s (locus_tag varchar(400), ' \
          ' seqfeature_id INT, ' \
          ' taxon_id INT,' \
          ' index locus_tag (locus_tag), ' \
          ' index seqfeature_id(seqfeature_id), ' \
          ' index taxon_id (taxon_id))' % biodb

    server.adaptor.execute(sql)
    server.commit()
    if not locus2seqfeature_id:
        locus2seqfeature_id = manipulate_biosqldb.locus_tag2seqfeature_id_dict(
            server, biodb)
    if not locus2taxon_id:
        locus2taxon_id = manipulate_biosqldb.locus_tag2genome_taxon_id(
            server, biodb)

    for locus in locus2seqfeature_id:
        try:
            sql = 'insert into custom_tables.locus2seqfeature_id_%s values ("%s", %s, %s)' % (
                biodb, locus, locus2seqfeature_id[locus],
                locus2taxon_id[locus])
            server.adaptor.execute(sql)
        except:
            # pseudogenes
            sql = 'insert into custom_tables.locus2seqfeature_id_%s values ("%s", %s, %s)' % (
                biodb, locus, locus2seqfeature_id[locus], "NULL")
            server.adaptor.execute(sql)
        server.commit()
コード例 #5
0
    if args.create_tables:

        create_sql_blastnr_tables(args.mysql_database,
                                  mysql_host,
                                  mysql_user,
                                  mysql_pwd,
                                  mysql_db,
                                  main_blastnr_table=True,
                                  alternate_tables=True)

    if args.load_tables:

        server, db = manipulate_biosqldb.load_db(biodb)

        sys.stdout.write("creating locus_tag2seqfeature_id")
        locus_tag2seqfeature_id = manipulate_biosqldb.locus_tag2seqfeature_id_dict(
            server, biodb)

        sys.stdout.write("creating protein_id2seqfeature_id")
        protein_id2seqfeature_id = manipulate_biosqldb.protein_id2seqfeature_id_dict(
            server, biodb)

        sys.stdout.write("getting seqfeature_id2locus_tag")
        seqfeature_id2locus_tag = manipulate_biosqldb.seqfeature_id2locus_tag_dico(
            server, biodb)

        sys.stdout.write("getting locus_tag2accession")
        locus_tag2accession = manipulate_biosqldb.locus_tag2accession(
            server, args.mysql_database)

        blastnr2biosql(seqfeature_id2locus_tag, locus_tag2seqfeature_id,
                       protein_id2seqfeature_id, locus_tag2accession, biodb,
コード例 #6
0
    parser.add_argument("-d", '--db_name', type=str, help="db name")

    args = parser.parse_args()

    server, db = manipulate_biosqldb.load_db(args.db_name)
    asset_path = "/home/trestan/work/dev/django/chlamydia/assets"

    print("parsing orthofinder file")
    locus_tag2orthogroup_id, \
    orthomcl_groups2locus_tag_list, \
    genome_orthomcl_code2proteins, \
    protein_id2genome_ortho_mcl_code = parse_orthomcl_output(args.mcl,
                                                             True)

    print("get locus_tag2seqfeature_id")
    locus_tag2seqfeature_id = manipulate_biosqldb.locus_tag2seqfeature_id_dict(
        server, args.db_name)
    locus_tag2seqfeature_id_CDS = manipulate_biosqldb.locus_tag2seqfeature_id_dict(
        server, args.db_name, all=False)

    print("number of groups:", len(orthomcl_groups2locus_tag_list))
    print("number of locus in locus_tag2orthogroup_id:",
          len(locus_tag2orthogroup_id))
    print("number of locus in locus_tag2seqfeature_id:",
          len(locus_tag2seqfeature_id))
    print("number of locus in locus_tag2seqfeature_id_CDS:",
          len(locus_tag2seqfeature_id_CDS))

    print("adding orthogroup to seqfeature_qualifier_values")
    add_orthogroup_to_seq(server, locus_tag2orthogroup_id,
                          locus_tag2seqfeature_id)
コード例 #7
0
        help=
        "Path to save core orthogroup fasta. Taxon id as header for concatenation.",
        default=None)
    parser.add_argument("-o",
                        '--orthofinder',
                        action="store_true",
                        help="orthofinder input file (and not orthomcl)")
    args = parser.parse_args()

    server, db = manipulate_biosqldb.load_db(args.db_name)
    asset_path = "/home/trestan/work/dev/django/chlamydia/assets"

    if not args.get_sequences and not args.core_groups_path:

        print "creating locus_tag2seqfeature_id"
        locus_tag2seqfeature_id = manipulate_biosqldb.locus_tag2seqfeature_id_dict(
            server, args.db_name)

        print "creating protein_id2seqfeature_id"
        protein_id2seqfeature_id = manipulate_biosqldb.protein_id2seqfeature_id_dict(
            server, args.db_name)

        print "creating locus_tag2taxon_id dictionnary..."
        locus_tag2genome_taxon_id = manipulate_biosqldb.locus_tag2genome_taxon_id(
            server, args.db_name)
        print "creating protein_id2taxon_id dictionnary..."
        protein_id2genome_taxon_id = manipulate_biosqldb.protein_id2genome_taxon_id(
            server, args.db_name)

        print "creating locus_tag2accession dictionnary..."
        locus_tag2accession = manipulate_biosqldb.locus_tag2accession(
            server, args.db_name)