Ejemplo n.º 1
0
def get_theme_ids(cursor, ensembl_db_name, config_reader, theme_name):

    path = config_reader.get_path("resources")
    fnm = path + "/" + theme_name + ".txt"
    if not os.path.exists(fnm):
        print fnm, "not found"
        exit(1)

    if not os.path.getsize(fnm) > 0:
        print fnm, "empty"
        exit(1)

    switch_to_db(cursor, ensembl_db_name["homo_sapiens"])

    inf = erropen(fnm, "r")
    gene_ids = []
    for line in inf:
        line = line.rstrip()
        fields = line.split()
        stable_id = fields[0]
        qry = "select gene_id, description from gene where stable_id='%s'" % stable_id
        rows = search_db(cursor, qry)
        if not rows:
            continue
        if "ERROR" in rows[0]:
            print rows[0]
            exit(1)
        gene_ids.append(int(rows[0][0]))
    inf.close()

    return gene_ids
Ejemplo n.º 2
0
def get_complement_ids(cursor, ensembl_db_name, config_reader):

    path = config_reader.get_path("resources")

    theme_names = [
        "telomere_maintenance",
        "nonhom_end_joining",
        "egfr_signaling",
        "cell_cycle_checkpoints",
        "genecards_top500",
        "wnt_pathway",
        "enzymes",
        "cell_junction",
        "actin_binding",
        "cell_junction",
        "transcription",
        "translation",
        "meiosis",
    ]

    theme_id_set = set([])
    for theme_name in theme_names:
        fnm = path + "/" + theme_name + ".txt"
        if not os.path.exists(fnm):
            print fnm, "not found"
            exit(1)

        if not os.path.getsize(fnm) > 0:
            print fnm, "empty"
            exit(1)

        switch_to_db(cursor, ensembl_db_name["homo_sapiens"])

        inf = erropen(fnm, "r")
        gene_ids = []
        for line in inf:
            line = line.rstrip()
            fields = line.split()
            stable_id = fields[0]
            qry = "select gene_id, description from gene where stable_id='%s'" % stable_id
            rows = search_db(cursor, qry)
            if not rows:
                continue
            if "ERROR" in rows[0]:
                print rows[0]
                exit(1)
            theme_id_set.add(int(rows[0][0]))
        inf.close()

    switch_to_db(cursor, ensembl_db_name["homo_sapiens"])
    human_gene_set = set(get_gene_ids(cursor, biotype="protein_coding", is_known=1))

    complement = human_gene_set - theme_id_set

    return list(complement)