"35": "Artery - Tibial", "34": "Brain - Cortex", "33": "Heart - Left Ventricle", "32": "Brain - Hippocampus", "50": "Brain - Substantia nigra", } graph = nx.read_gpickle(sys.argv[1]) graph_nodes = graph.nodes() start_nodes = load_input_list(sys.argv[2]) graph_choice = "intact" print "starting", len(start_nodes) fishertest.load( list(set(start_nodes)), 0.05, ["C", "P", "F", "R", "K", "O", "KDr", "KDi", "DB", "Or", "VH"], graph_choice, path_def="results_old/", single="0", ) nodes = list(set(start_nodes).intersection(set(graph_nodes))) folder = "tissue_expr_norm/" for i in range(0, 48, 1): print str(i) + " " + pa_basal[i] val = raw_input() if "," not in val: val = map(int, str(val).split()) else: print val val = map(int, val.split(","))
def mcn(nodes, graph_nodes, graph, expression, graph_choice, start_nodes): graph_go_term, fisher_one_occurence = load_go_term(graph_choice) path = {} removable = [] path_prob = {} path_value = {} path_count = {} seek = {} nodes_ic_value = {} nodes_ic_path = {} combination = list(itertools.combinations((list(set(graph_nodes).intersection(set(nodes)))), 2)) for i in nodes: seek[i] = {} for i in combination: path_value[i] = 0.0 path_count[i] = {} nodes_ic_value[i] = 10 nodes_ic_path[i] = [] f1 = open( "../../web2py_test_new_version/applications/magneto/data/" + graph_choice + "/path/index/" + i[0] + ".txt", "r", ) seq = f1.readline() while seq != "": seq = seq.strip().split("\t") if seq[0] == i[1]: seek[i[0]][seq[0]] = int(seq[1]) seq = f1.readline() to_remove = [] for i in seek: if len(seek[i]) > 0: for j in seek[i]: f1 = open( "../../web2py_test_new_version/applications/magneto/data/" + graph_choice + "/path/" + i + ".txt" ) f1.seek(seek[i][j]) seq = f1.readline() key_start = seq.split("|")[0][1:].strip() key_end = seq.split("|")[1].strip() key = (key_start, key_end) path[key] = [] seq = f1.readline() count = 0 while seq[0] != ">": path_seq = seq.strip() node_list = seq.strip().split()[0:-2] # print node_list path[key].append(node_list) path_coex = [] path_exp = [] for k in range(len(node_list[0:-1])): if k == 0: path_coex.append(graph[node_list[k]][node_list[k + 1]]["capacity"]) if k != 0: path_coex.append(graph[node_list[k]][node_list[k + 1]]["capacity"]) if tissue_expr.has_key(node_list[k]): path_exp.append(tissue_expr[node_list[k]]) else: path_exp.append(0.0001) removable.append(node_list[1:-1]) seq = f1.readline() if len(path_exp) == 0: val1 = 1.00000 else: val1 = scipy.stats.mstats.gmean(path_exp) val2 = scipy.stats.mstats.gmean(path_coex) total_prob = math.sqrt(val1 * val2) path_count[key][total_prob] = node_list # print key,node_list,total_prob,count count = count + 1 nodes_ic = [] flow = [] # graph_go_term,fisher_one_occurence for i in path_count: if len(path_count[i]) == 1: nodes_ic_path[i] = path_count[i].values()[0] # flow.append(path_count[i].values()[0]) else: count = 0 # max_score=sorted(path_count[i].keys())[::-1][0] mean = np.mean(path_count[i].keys()) for k in sorted(path_count[i].items(), key=itemgetter(0))[::-1]: if k[0] >= mean: temp_C = [] temp_P = [] temp_F = [] temp_R = [] temp_K = [] if count == 0: flow.append(k[1]) for j in k[1]: if graph_go_term["P"].has_key(j): temp_P.extend(graph_go_term["P"][j]) if graph_go_term["K"].has_key(j): temp_K.extend(graph_go_term["K"][j]) if graph_go_term["R"].has_key(j): temp_R.extend(graph_go_term["R"][j]) if graph_go_term["C"].has_key(j): temp_C.extend(graph_go_term["C"][j]) if graph_go_term["F"].has_key(j): temp_F.extend(graph_go_term["F"][j]) print k[1], len(temp_C) + len(temp_P) + len(temp_R) + len(temp_F) + len(temp_K) value_p = 0.0 value_c = 0.0 value_f = 0.0 value_r = 0.0 value_k = 0.0 for j in set(temp_P): if fisher_one_occurence["P"].has_key(j): value_p = value_p + fisher_one_occurence["P"][j] for j in set(temp_C): if fisher_one_occurence["C"].has_key(j): value_c = value_c + fisher_one_occurence["C"][j] for j in set(temp_R): if fisher_one_occurence["R"].has_key(j): value_r = value_r + fisher_one_occurence["R"][j] for j in set(temp_F): if fisher_one_occurence["F"].has_key(j): value_f = value_f + fisher_one_occurence["F"][j] for j in set(temp_K): if fisher_one_occurence["K"].has_key(j): value_k = value_k + fisher_one_occurence["K"][j] # print k[1],value_p+value_c+value_r+value_f+value_k,float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K)) go_value = (value_p + value_c + value_r + value_f + value_k) / float( len(temp_C) + len(temp_P) + len(temp_R) + len(temp_F) + len(temp_K) ) # print k[1],value_p+value_c+value_r+value_f+value_k,float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K)),go_value # go_value=(value_p+value_c+value_r+value_f)/float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)) if go_value < nodes_ic_value[i]: nodes_ic_value[i] = go_value f5.write( "\t".join(k[1]) + "\t" + str(go_value) + "\t" + str(value_p + value_c + value_r + value_f + value_k) + "\t" + str(len(temp_C) + len(temp_P) + len(temp_R) + len(temp_F) + len(temp_K)) + "\n" ) nodes_ic_path[i] = k[1] # print i,k[1],go_value # print i,k[1],k[0],go_value,float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)) count = count + 1 # for i in nodes_ic_path: # print i,nodes_ic_path[i],nodes_ic_value[i] nodes_ic = list(set(sum(nodes_ic_path.values(), []) + start_nodes)) flow = list(set(sum(flow, []))) print "nodes_ic", len(nodes_ic) print "flow", len(flow) fishertest.load( nodes_ic, 0.05, ["C", "P", "F", "R", "K", "O", "KDr", "KDi", "DB", "Or", "VH"], graph_choice, path_def="ic/", single=sys.argv[3], )
def mcn(nodes,graph_nodes,graph,expression,graph_choice,start_nodes,tissue,maxmin): graph_go_term,fisher_one_occurence,fisher_one_occurence2,trends,tissue_protein_prob=load_go_term(graph_choice,tissue) path={} removable=[] path_prob={} path_value={} path_count={} seek={} nodes_ic_value={} nodes_ic_path={} combination=list(itertools.combinations((list(set(graph_nodes).intersection(set(nodes)))),2)) for i in nodes: seek[i]={} for i in combination: path_value[i]=0.0 path_count[i]={} if maxmin=="0": nodes_ic_value[i]=100000 else: nodes_ic_value[i]=-1 nodes_ic_path[i]=[] f1=open("../../web2py_test_new_version/applications/magneto/data/"+graph_choice+"/path/index/"+i[0]+".txt","r") seq=f1.readline() while(seq!=""): seq= seq.strip().split("\t") if seq[0]==i[1]: seek[i[0]][seq[0]]=int(seq[1]) seq=f1.readline() to_remove=[] minimum=min(tissue_expr.values()) for i in seek: if len(seek[i])>0: for j in seek[i]: f1=open("../../web2py_test_new_version/applications/magneto/data/"+graph_choice+"/path/"+i+".txt") f1.seek(seek[i][j]) seq=f1.readline() key_start=seq.split("|")[0][1:].strip() key_end=seq.split("|")[1].strip() key=(key_start,key_end) path[key]=[] seq=f1.readline() count=0 while(seq[0]!=">"): path_seq=seq.strip() node_list=seq.strip().split()[0:-2] #print node_list path[key].append(node_list) path_coex=[] path_exp=[] expr_coex_prob=0.0 for k in range(len(node_list[0:-1])): if k==0: path_coex.append(graph[node_list[k]][node_list[k+1]]["coex"][1]) if k!=0: path_coex.append(graph[node_list[k]][node_list[k+1]]["coex"][1]) if tissue_expr.has_key(node_list[k]): path_exp.append(tissue_expr[node_list[k]]) else: path_exp.append(minimum) removable.append(node_list[1:-1]) seq=f1.readline() if len(path_exp)==0: val1=1.00000 else: val1=scipy.stats.mstats.gmean(path_exp) val2=scipy.stats.mstats.gmean(path_coex) total_prob=math.sqrt(val1*val2) path_count[key][total_prob]=node_list count=count+1 nodes_ic=[] for i in path_count: if len(path_count[i])==1: nodes_ic_path[i]=path_count[i].values()[0] #flow.append(path_count[i].values()[0]) else: count=0 #max_score=sorted(path_count[i].keys())[::-1][0] mean=np.mean(path_count[i].keys()) sorted_path_count=sorted(path_count[i].items(), key=itemgetter(0))[::-1] for k in sorted_path_count: if k[0]>=mean: temp=[] temp_C=[] temp_P=[] temp_F=[] temp_R=[] temp_K=[] temp_prob=[] for j in k[1]: temp_prob.append(tissue_protein_prob[j]) if graph_go_term["P"].has_key(j): temp_P.extend(graph_go_term["P"][j]) if graph_go_term["K"].has_key(j): temp_K.extend(graph_go_term["K"][j]) if graph_go_term["R"].has_key(j): temp_R.extend(graph_go_term["R"][j]) if graph_go_term["C"].has_key(j): temp_C.extend(graph_go_term["C"][j]) if graph_go_term["F"].has_key(j): temp_F.extend(graph_go_term["F"][j]) #print k[1],len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K) value_p=0.0 value_c=0.0 value_f=0.0 value_r=0.0 value_k=0.0 c=0 for j in set(temp_P): if fisher_one_occurence["P"].has_key(j): value_p=value_p+ fisher_one_occurence["P"][j] #temp.append(fisher_one_occurence["P"][j]) #coex_temp_P.append(fisher_one_occurence["P"][j]) else: c=c+1 #value_p=value_p+ fisher_one_occurence2["P"][j] for j in set(temp_C): if fisher_one_occurence["C"].has_key(j): value_c=value_c+ fisher_one_occurence["C"][j] #temp.append(fisher_one_occurence["C"][j]) else: c=c+1 #value_c=value_c+ fisher_one_occurence2["C"][j] for j in set(temp_R): if fisher_one_occurence["R"].has_key(j): value_r=value_r+ fisher_one_occurence["R"][j] #temp.append(fisher_one_occurence["R"][j]) else: c=c+1 #value_r=value_r+ fisher_one_occurence2["R"][j] for j in set(temp_F): if fisher_one_occurence["F"].has_key(j): value_f=value_f+ fisher_one_occurence["F"][j] #temp.append(fisher_one_occurence["F"][j]) else: c=c+1 #value_r=value_r+ fisher_one_occurence2["F"][j] for j in set(temp_K): if fisher_one_occurence["K"].has_key(j): value_k=value_k+ fisher_one_occurence["K"][j] #temp.append(fisher_one_occurence["K"][j]) else: c=c+1 #print i,k,len(temp) #n, (smin, smax), sm, sv, ss, sk= scipy.stats.describe(temp) #print k[1],len(temp),sm,sv,trends[0][1],trends[0][2],sm-float(trends[0][1]),sv-float(trends[0][2]) #go_value=(value_p*value_c*value_r*value_f*value_k)*float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K)) go_value=(value_p+value_c+value_r+value_f+value_k)/float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K)) #print k[1],len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K),go_value,scipy.stats.mstats.gmean(temp_prob) #go_value=scipy.stats.mstats.gmean(temp_prob) #go_value=np.mean(temp_prob) #go_value=math.fabs(sm-float(trends[0][1]))+math.fabs(sv-float(trends[0][2])) f5.write("\t".join(k[1])+"\t"+str(go_value)+"\t"+str(np.mean(temp))+"\t"+str(value_p+value_c+value_r+value_f+value_k)+"\t"+str(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K))+"\n") if maxmin=="0": if go_value<nodes_ic_value[i]: nodes_ic_value[i]=go_value f5.write(">"+"\t".join(k[1])+"\t"+str(go_value)+"\t"+str(np.mean(temp))+"\t"+str(value_p+value_c+value_r+value_f+value_k)+"\t"+str(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K))+"\n") nodes_ic_path[i]=k[1] else: if go_value>nodes_ic_value[i]: nodes_ic_value[i]=go_value f5.write(">"+"\t".join(k[1])+"\t"+str(go_value)+"\t"+str(np.mean(temp))+"\t"+str(value_p+value_c+value_r+value_f+value_k)+"\t"+str(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K))+"\n") nodes_ic_path[i]=k[1] count=count+1 for i in nodes_ic_path: f6.write(i[0]+"\t"+i[1]+"\t"+"\t".join(nodes_ic_path[i])+"\n") # print i,nodes_ic_path[i],nodes_ic_value[i] nodes_ic=list(set(sum(nodes_ic_path.values(),[])+start_nodes)) #flow=list(set(sum(flow,[])+start_nodes)) print "nodes_ic",len(nodes_ic) #print "flow",len(set(flow)) fishertest.load(nodes_ic,0.05,["C","P","F","R","K","O","KDr","KDi","DB","Or","VH"],graph_choice,path_def="ic3/",single=sys.argv[3])