Esempio n. 1
0
def fetch_string_ppi_edges():
    go_edges = {}
    grid_len = 0
    if constants.USE_CACHE:
        if os.path.isfile(
                os.path.join(constants.DICTIONARIES_DIR,
                             "GO_edges_ppi_total.txt")):
            print "about to load ppi"
            GO_edges_ppi_grid = infra.load_phenotype_data(
                "GO_edges_ppi_total.txt",
                phenotype_list_path=constants.DICTIONARIES_DIR)
            grid_len = len(GO_edges_ppi_grid)
            print "done load ppi ({} lines). about to load to dict".format(
                grid_len)
            for cur in GO_edges_ppi_grid:
                go_edges[cur[0]] = int(cur[1])
            print "done load to dict"
            return go_edges

    print "fetching ensg"
    ensg_dict = get_ensg_dict()
    print "fetching ensp"
    ensp_dict = get_ensp_dict()
    print "fetching string ppi"
    string_ppi_dict = get_string_ppi_dict()
    go_edges = {}
    count = 0
    for cur_edge, cur_score in string_ppi_dict.iteritems():
        count += 1
        print count
        vertices = cur_edge.split("=")
        if not ensp_dict.has_key(vertices[0]) or not ensp_dict.has_key(
                vertices[1]):
            continue

        go_src = ensp_dict[vertices[0]]["GO Terms"]
        go_dst = ensp_dict[vertices[1]]["GO Terms"]

        for cur_src in go_src:
            for cur_dst in go_dst:
                edge = "{}={}".format(cur_src, cur_dst)
                edge_alt = "{}={}".format(cur_dst, cur_src)
                if go_edges.has_key(edge):
                    go_edges[edge] += int(cur_score)
                elif go_edges.has_key(edge_alt):
                    go_edges[edge_alt] += int(cur_score)
                else:
                    go_edges[edge] = int(cur_score)
    with file(
            os.path.join(constants.OUTPUT_GLOBAL_DIR,
                         "GO_edges_ppi_total.txt"), "w+") as f:
        count = 0
        for k, v in go_edges.iteritems():
            count += 1
            print "{}/{}".format(count, grid_len)
            f.write("{}\t{}\n".format(k, v))

    return go_edges
Esempio n. 2
0
def fetch_ppi_edges():
    ensg_dict = get_ensg_dict()
    ensp_dict = get_ensp_dict()
    string_ppi_dict = get_string_ppi_dict()

    for k,v in string_ppi_dict.iteritems():
        cur_vertices = k.split("=")
        if ensp_dict.has_key(cur_vertices[0]) and ensp_dict.has_key(cur_vertices[1]):
            go_src = ensp_dict.has_key(cur_vertices[0])["GO Terms"]
            go_dest = ensp_dict.has_key(cur_vertices[1])["GO Terms"]
            string_ppi_go_edges = {}
            for cur_go_src in go_src:
                for cur_go_dest in go_dest:
                    cur_edge = "{}={}".format(cur_go_src, cur_go_dest)
                    if string_ppi_go_edges.has_key(cur_edge):
                        string_ppi_go_edges[cur_edge]+=v
                    else:
                        string_ppi_go_edges[cur_edge]=v
    return string_ppi_go_edges
Esempio n. 3
0
def fetch_string_ppi_edges():
    go_edges = {}
            if constants.USE_CACHE:
        if os.path.isfile(os.path.join(constants.DICTIONARIES_DIR,"GO_edges_ppi_total.txt")):
            GO_edges_ppi_grid = infra.load_phenotype_data("GO_edges_ppi_total.txt",phenotype_list_path=constants.DICTIONARIES_DIR)
            for cur in GO_edges_ppi_grid:
                go_edges[cur[0]] = int(cur[1])
            return go_edges

    print "fetching ensg"
    ensg_dict = get_ensg_dict()
    print "fetching ensp"
    ensp_dict = get_ensp_dict()
    print "fetching string ppi"
    string_ppi_dict = get_string_ppi_dict()
    go_edges = {}
    count = 0
    for cur_edge, cur_score in string_ppi_dict.iteritems():
        count +=1
        print count
        vertices = cur_edge.split("=")
        if not ensp_dict.has_key(vertices[0]) or not ensp_dict.has_key(vertices[1]): continue

        go_src = ensp_dict[vertices[0]]["GO Terms"]
        go_dst = ensp_dict[vertices[1]]["GO Terms"]

        for cur_src in go_src:
            for cur_dst in go_dst:
                edge = "{}={}".format(cur_src,cur_dst)
                edge_alt = "{}={}".format(cur_dst, cur_src)