Esempio n. 1
0
    "35": "Artery - Tibial",
    "34": "Brain - Cortex",
    "33": "Heart - Left Ventricle",
    "32": "Brain - Hippocampus",
    "50": "Brain - Substantia nigra",
}

graph = nx.read_gpickle(sys.argv[1])
graph_nodes = graph.nodes()
start_nodes = load_input_list(sys.argv[2])
graph_choice = "intact"
print "starting", len(start_nodes)
fishertest.load(
    list(set(start_nodes)),
    0.05,
    ["C", "P", "F", "R", "K", "O", "KDr", "KDi", "DB", "Or", "VH"],
    graph_choice,
    path_def="results_old/",
    single="0",
)

nodes = list(set(start_nodes).intersection(set(graph_nodes)))

folder = "tissue_expr_norm/"
for i in range(0, 48, 1):
    print str(i) + " " + pa_basal[i]
val = raw_input()
if "," not in val:
    val = map(int, str(val).split())
else:
    print val
    val = map(int, val.split(","))
Esempio n. 2
0
def mcn(nodes, graph_nodes, graph, expression, graph_choice, start_nodes):
    graph_go_term, fisher_one_occurence = load_go_term(graph_choice)
    path = {}
    removable = []
    path_prob = {}
    path_value = {}
    path_count = {}
    seek = {}
    nodes_ic_value = {}
    nodes_ic_path = {}
    combination = list(itertools.combinations((list(set(graph_nodes).intersection(set(nodes)))), 2))
    for i in nodes:
        seek[i] = {}
    for i in combination:
        path_value[i] = 0.0
        path_count[i] = {}
        nodes_ic_value[i] = 10
        nodes_ic_path[i] = []
        f1 = open(
            "../../web2py_test_new_version/applications/magneto/data/" + graph_choice + "/path/index/" + i[0] + ".txt",
            "r",
        )
        seq = f1.readline()
        while seq != "":
            seq = seq.strip().split("\t")
            if seq[0] == i[1]:
                seek[i[0]][seq[0]] = int(seq[1])
            seq = f1.readline()

    to_remove = []

    for i in seek:
        if len(seek[i]) > 0:
            for j in seek[i]:
                f1 = open(
                    "../../web2py_test_new_version/applications/magneto/data/" + graph_choice + "/path/" + i + ".txt"
                )
                f1.seek(seek[i][j])
                seq = f1.readline()
                key_start = seq.split("|")[0][1:].strip()
                key_end = seq.split("|")[1].strip()
                key = (key_start, key_end)
                path[key] = []
                seq = f1.readline()
                count = 0
                while seq[0] != ">":

                    path_seq = seq.strip()
                    node_list = seq.strip().split()[0:-2]
                    # print node_list
                    path[key].append(node_list)
                    path_coex = []
                    path_exp = []
                    for k in range(len(node_list[0:-1])):

                        if k == 0:
                            path_coex.append(graph[node_list[k]][node_list[k + 1]]["capacity"])
                        if k != 0:
                            path_coex.append(graph[node_list[k]][node_list[k + 1]]["capacity"])
                            if tissue_expr.has_key(node_list[k]):
                                path_exp.append(tissue_expr[node_list[k]])
                            else:
                                path_exp.append(0.0001)

                    removable.append(node_list[1:-1])
                    seq = f1.readline()
                    if len(path_exp) == 0:
                        val1 = 1.00000
                    else:
                        val1 = scipy.stats.mstats.gmean(path_exp)
                    val2 = scipy.stats.mstats.gmean(path_coex)

                    total_prob = math.sqrt(val1 * val2)
                    path_count[key][total_prob] = node_list
                    # print key,node_list,total_prob,count
                    count = count + 1
    nodes_ic = []
    flow = []

    # graph_go_term,fisher_one_occurence
    for i in path_count:
        if len(path_count[i]) == 1:
            nodes_ic_path[i] = path_count[i].values()[0]
            # flow.append(path_count[i].values()[0])
        else:
            count = 0
            # max_score=sorted(path_count[i].keys())[::-1][0]
            mean = np.mean(path_count[i].keys())
            for k in sorted(path_count[i].items(), key=itemgetter(0))[::-1]:

                if k[0] >= mean:
                    temp_C = []
                    temp_P = []
                    temp_F = []
                    temp_R = []
                    temp_K = []

                    if count == 0:
                        flow.append(k[1])
                    for j in k[1]:
                        if graph_go_term["P"].has_key(j):
                            temp_P.extend(graph_go_term["P"][j])
                        if graph_go_term["K"].has_key(j):
                            temp_K.extend(graph_go_term["K"][j])
                        if graph_go_term["R"].has_key(j):
                            temp_R.extend(graph_go_term["R"][j])
                        if graph_go_term["C"].has_key(j):
                            temp_C.extend(graph_go_term["C"][j])
                        if graph_go_term["F"].has_key(j):
                            temp_F.extend(graph_go_term["F"][j])

                    print k[1], len(temp_C) + len(temp_P) + len(temp_R) + len(temp_F) + len(temp_K)
                    value_p = 0.0
                    value_c = 0.0
                    value_f = 0.0
                    value_r = 0.0
                    value_k = 0.0

                    for j in set(temp_P):
                        if fisher_one_occurence["P"].has_key(j):
                            value_p = value_p + fisher_one_occurence["P"][j]
                    for j in set(temp_C):
                        if fisher_one_occurence["C"].has_key(j):
                            value_c = value_c + fisher_one_occurence["C"][j]
                    for j in set(temp_R):
                        if fisher_one_occurence["R"].has_key(j):
                            value_r = value_r + fisher_one_occurence["R"][j]
                    for j in set(temp_F):
                        if fisher_one_occurence["F"].has_key(j):
                            value_f = value_f + fisher_one_occurence["F"][j]
                    for j in set(temp_K):
                        if fisher_one_occurence["K"].has_key(j):
                            value_k = value_k + fisher_one_occurence["K"][j]
                            # print k[1],value_p+value_c+value_r+value_f+value_k,float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K))

                    go_value = (value_p + value_c + value_r + value_f + value_k) / float(
                        len(temp_C) + len(temp_P) + len(temp_R) + len(temp_F) + len(temp_K)
                    )
                    # print k[1],value_p+value_c+value_r+value_f+value_k,float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K)),go_value

                    # go_value=(value_p+value_c+value_r+value_f)/float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F))
                    if go_value < nodes_ic_value[i]:
                        nodes_ic_value[i] = go_value
                        f5.write(
                            "\t".join(k[1])
                            + "\t"
                            + str(go_value)
                            + "\t"
                            + str(value_p + value_c + value_r + value_f + value_k)
                            + "\t"
                            + str(len(temp_C) + len(temp_P) + len(temp_R) + len(temp_F) + len(temp_K))
                            + "\n"
                        )
                        nodes_ic_path[i] = k[1]
                        # print i,k[1],go_value
                        # print i,k[1],k[0],go_value,float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F))
                    count = count + 1
                # for i in nodes_ic_path:
                # 	print i,nodes_ic_path[i],nodes_ic_value[i]
    nodes_ic = list(set(sum(nodes_ic_path.values(), []) + start_nodes))
    flow = list(set(sum(flow, [])))
    print "nodes_ic", len(nodes_ic)
    print "flow", len(flow)
    fishertest.load(
        nodes_ic,
        0.05,
        ["C", "P", "F", "R", "K", "O", "KDr", "KDi", "DB", "Or", "VH"],
        graph_choice,
        path_def="ic/",
        single=sys.argv[3],
    )
Esempio n. 3
0
def mcn(nodes,graph_nodes,graph,expression,graph_choice,start_nodes,tissue,maxmin):
	graph_go_term,fisher_one_occurence,fisher_one_occurence2,trends,tissue_protein_prob=load_go_term(graph_choice,tissue)
	path={}
	removable=[]
	path_prob={}
	path_value={}
	path_count={}
	seek={}
	nodes_ic_value={}
	nodes_ic_path={}
	combination=list(itertools.combinations((list(set(graph_nodes).intersection(set(nodes)))),2))
	for i in nodes:
		seek[i]={}
	for i in combination:
		path_value[i]=0.0
		path_count[i]={}
		if maxmin=="0":
			nodes_ic_value[i]=100000
		else:
			nodes_ic_value[i]=-1
		nodes_ic_path[i]=[]
		f1=open("../../web2py_test_new_version/applications/magneto/data/"+graph_choice+"/path/index/"+i[0]+".txt","r")
		seq=f1.readline()
		while(seq!=""):
			seq= seq.strip().split("\t")
			if seq[0]==i[1]:
				seek[i[0]][seq[0]]=int(seq[1])
			seq=f1.readline()

	to_remove=[]
	minimum=min(tissue_expr.values())
	for i in seek:
		if len(seek[i])>0:
			for j in seek[i]:
				f1=open("../../web2py_test_new_version/applications/magneto/data/"+graph_choice+"/path/"+i+".txt")
				f1.seek(seek[i][j])
				seq=f1.readline()
				key_start=seq.split("|")[0][1:].strip()
				key_end=seq.split("|")[1].strip()
				key=(key_start,key_end)
				path[key]=[]
				seq=f1.readline()
				count=0
				while(seq[0]!=">"):
					
					path_seq=seq.strip()
					node_list=seq.strip().split()[0:-2]
					#print node_list
					path[key].append(node_list)
					path_coex=[]
					path_exp=[]
					expr_coex_prob=0.0
					for k in range(len(node_list[0:-1])):
							
						if k==0:
							path_coex.append(graph[node_list[k]][node_list[k+1]]["coex"][1])
						if k!=0:
							path_coex.append(graph[node_list[k]][node_list[k+1]]["coex"][1])
							if tissue_expr.has_key(node_list[k]):
								path_exp.append(tissue_expr[node_list[k]])
							else:
								path_exp.append(minimum)
					
					removable.append(node_list[1:-1])
					seq=f1.readline()
					if len(path_exp)==0:
						val1=1.00000
					else:
						val1=scipy.stats.mstats.gmean(path_exp)
					val2=scipy.stats.mstats.gmean(path_coex)
					total_prob=math.sqrt(val1*val2)
					path_count[key][total_prob]=node_list
					count=count+1
	nodes_ic=[]
	for i in path_count:
		
		if len(path_count[i])==1:
			nodes_ic_path[i]=path_count[i].values()[0]
			#flow.append(path_count[i].values()[0])
		else:
			count=0	
			#max_score=sorted(path_count[i].keys())[::-1][0]
			mean=np.mean(path_count[i].keys())
			sorted_path_count=sorted(path_count[i].items(), key=itemgetter(0))[::-1]
			for k in sorted_path_count:
				
				if k[0]>=mean:
					temp=[]
					temp_C=[]
					temp_P=[]
					temp_F=[]
					temp_R=[]
					temp_K=[]
					temp_prob=[]
					for j in k[1]:
						temp_prob.append(tissue_protein_prob[j])
						if graph_go_term["P"].has_key(j):
							temp_P.extend(graph_go_term["P"][j])
						if graph_go_term["K"].has_key(j):
							temp_K.extend(graph_go_term["K"][j])
						if graph_go_term["R"].has_key(j):
							temp_R.extend(graph_go_term["R"][j])
						if graph_go_term["C"].has_key(j):
							temp_C.extend(graph_go_term["C"][j])
						if graph_go_term["F"].has_key(j):
							temp_F.extend(graph_go_term["F"][j])
					#print k[1],len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K)

					value_p=0.0
					value_c=0.0
					value_f=0.0
					value_r=0.0
					value_k=0.0
					c=0
					
					
					for j in set(temp_P):
						if fisher_one_occurence["P"].has_key(j):
							value_p=value_p+ fisher_one_occurence["P"][j]
							#temp.append(fisher_one_occurence["P"][j])
							#coex_temp_P.append(fisher_one_occurence["P"][j])
						else:
							c=c+1
							#value_p=value_p+ fisher_one_occurence2["P"][j]
					
					for j in set(temp_C):
						if fisher_one_occurence["C"].has_key(j):
							value_c=value_c+ fisher_one_occurence["C"][j]
							#temp.append(fisher_one_occurence["C"][j])
						else:
							c=c+1
							#value_c=value_c+ fisher_one_occurence2["C"][j]
					for j in set(temp_R):
						if fisher_one_occurence["R"].has_key(j):
							value_r=value_r+ fisher_one_occurence["R"][j]
							#temp.append(fisher_one_occurence["R"][j])
						else:
							c=c+1
							#value_r=value_r+ fisher_one_occurence2["R"][j]
					for j in set(temp_F):
						if fisher_one_occurence["F"].has_key(j):
							value_f=value_f+ fisher_one_occurence["F"][j]
							#temp.append(fisher_one_occurence["F"][j])
						else:
							c=c+1
							#value_r=value_r+ fisher_one_occurence2["F"][j]
					for j in set(temp_K):
						if fisher_one_occurence["K"].has_key(j):
							value_k=value_k+ fisher_one_occurence["K"][j]
							#temp.append(fisher_one_occurence["K"][j])
						else:
							c=c+1
					#print i,k,len(temp)		
					#n, (smin, smax), sm, sv, ss, sk= scipy.stats.describe(temp)
					#print k[1],len(temp),sm,sv,trends[0][1],trends[0][2],sm-float(trends[0][1]),sv-float(trends[0][2])
					#go_value=(value_p*value_c*value_r*value_f*value_k)*float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K))
					go_value=(value_p+value_c+value_r+value_f+value_k)/float(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K))
					
					#print k[1],len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K),go_value,scipy.stats.mstats.gmean(temp_prob)
					#go_value=scipy.stats.mstats.gmean(temp_prob)
					#go_value=np.mean(temp_prob)

					#go_value=math.fabs(sm-float(trends[0][1]))+math.fabs(sv-float(trends[0][2]))
					f5.write("\t".join(k[1])+"\t"+str(go_value)+"\t"+str(np.mean(temp))+"\t"+str(value_p+value_c+value_r+value_f+value_k)+"\t"+str(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K))+"\n")

					if maxmin=="0":
						if go_value<nodes_ic_value[i]:
							nodes_ic_value[i]=go_value
							f5.write(">"+"\t".join(k[1])+"\t"+str(go_value)+"\t"+str(np.mean(temp))+"\t"+str(value_p+value_c+value_r+value_f+value_k)+"\t"+str(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K))+"\n")
							nodes_ic_path[i]=k[1]
						
					else:
						if go_value>nodes_ic_value[i]:
							nodes_ic_value[i]=go_value
							f5.write(">"+"\t".join(k[1])+"\t"+str(go_value)+"\t"+str(np.mean(temp))+"\t"+str(value_p+value_c+value_r+value_f+value_k)+"\t"+str(len(temp_C)+len(temp_P)+len(temp_R)+len(temp_F)+len(temp_K))+"\n")
							nodes_ic_path[i]=k[1]
					count=count+1
			
	for i in nodes_ic_path:
		f6.write(i[0]+"\t"+i[1]+"\t"+"\t".join(nodes_ic_path[i])+"\n")
	#	print i,nodes_ic_path[i],nodes_ic_value[i]
	nodes_ic=list(set(sum(nodes_ic_path.values(),[])+start_nodes))
	
	#flow=list(set(sum(flow,[])+start_nodes))
	print "nodes_ic",len(nodes_ic)
	#print "flow",len(set(flow))
	fishertest.load(nodes_ic,0.05,["C","P","F","R","K","O","KDr","KDi","DB","Or","VH"],graph_choice,path_def="ic3/",single=sys.argv[3])