Esempio n. 1
0
def makeModelRules(rules, sss, equal_sign='*='):
    graph = nx.DiGraph()
    andNodeList = []
    nodeListTemp = []
    for rule in rules:
        andNodeTemp = []
        ruler = rule.strip('( )\t\n')
        startNode = ruler.split(equal_sign)[0].strip('( )\t')
        nodeListTemp.append(startNode)
        ruler = ruler.split(equal_sign)[1]
        if 'or' in ruler:
            rulers = ruler.split('or')
        else:
            rulers = [ruler]
        for ruler in rulers:
            andNode = []
            if 'and' in ruler:
                andRules = ruler.split('and')
            else:
                andRules = [ruler]
            for andRule in andRules:
                temprule = andRule.strip('( )\t')
                if 'not' in andRule:
                    graph.add_edge(temprule[3:].strip('( )\t'),
                                   startNode,
                                   attr_dict={'signal': 'i'})
                    andNode.append(temprule[3:].strip('( )\t'))
                else:
                    andNode.append(temprule)
                    graph.add_edge(temprule,
                                   startNode,
                                   attr_dict={'signal': 'a'})
            andNode.sort()
            andNodeTemp.append(andNode)
        andNodeList.append(andNodeTemp)
    model = sim.modelClass(graph, sss, True)
    individual = []
    for i in range(len(model.nodeList)):
        nodeTemp = nodeListTemp.index(model.nodeList[i])
        for j in range(0, model.individualParse[i + 1] -
                       model.individualParse[i]):
            tempAndNode = [
                model.nodeList[node] for node in model.andNodeList[i][j]
            ]
            tempAndNode.sort()
            if tempAndNode in andNodeList[nodeTemp]:
                individual.append(1)
            else:
                individual.append(0)
    return model, individual, graph
def PAtester(graph, name):
	controls=5
	experimentals=5
	true=10
	false=true
	params=sim.paramClass()
	sampleLists,geneDicts= [],[]
	# import starting points
	for i in range(1,11):
		sampleList, geneDict=readFpkmData2('neg_binom_gen_'+str(i)+'.csv', ',') # read in data
		sampleLists.append(sampleList)
		geneDicts.append(geneDict)
	knockoutLists, knockinLists= setupEmptyKOKI(len(sampleList))
	updateBooler=cdll.LoadLibrary('./simulator.so')
	boolC=updateBooler.syncBool 
	geneNames=geneDicts[0].keys()
	for node in graph.nodes():
		if node in geneNames:
			print(node)
		else:
			print(node)
			for k in range(10):
				q=randint(0,len(geneNames)-1)
				geneDicts[k][str.upper(node)]=geneDicts[k][geneNames[q]]
				for j in range(10):
					sampleLists[k][j][str.upper(node)]=sampleLists[k][j][str.upper(geneNames[q])]
				print([sampleLists[k][j][str.upper(node)] for j in range(10)])
	for j in range(10): # iterate over imported starting points
		model= sim.modelClass(graph,sampleLists[j], True)
		rule=ga.genBits(model)
		newInitValueList=genInitValueList(sampleLists[j],model)
		model.initValueList=newInitValueList
		# print(newInitValueList)
		model.updateCpointers() 
		output=[sim.NPsync(rule[1], model, params.cells, newInitValueList[k], params, knockinLists[k], knockoutLists[k], boolC) for k in range(5)]
		controlSampleList=compileOuts(output,sampleLists[j], controls, model)

		# generate model
		# loop over number of times we want to generate fake data and perform sequence of events
		# generate Boolean model for this trial
		genelist=geneDicts[j].keys()
		for perturbation in [0,5,10,15,20]: 
			tSampleList=list(sampleLists[j][5:10])			
			perturbationSize=2.**(-.1*perturbation)
			for i in range(5):
				# generate values across samples
				for node in graph.nodes():
					if len(graph.predecessors(node))==0:
						tSampleList[i][str.upper(node)]=min(max(0,sampleLists[j][i+5][str.upper(node)]*(perturbationSize)),1)
			outputData(controlSampleList, tSampleList, genelist,name+str(perturbation)+'_true_'+str(j)+'.csv', geneDicts[j])
Esempio n. 3
0
def transformTest(graph, name, fileName):
    # can't fit a rule to only one node
    if len(graph.nodes()) < 2:
        print('not enough overlap')
        return

    # load in C function
    #updateBooler=ctypes.cdll.LoadLibrary('./testRun.so')
    updateBooler = cdll.LoadLibrary('./testRun.so')
    boolC = updateBooler.syncBool

    # load data, params, make empty knockout and knockin lists (no KO or KI in transform tests)
    sampleDict = constructBinInput(fileName)
    params = sim.paramClass()

    # generate turn sample dict into sample list (list of dicts instead of dict of lists)
    keyList = sampleDict.keys()
    sampleList = [{} for i in range(len(sampleDict[keyList[0]]))]
    for i in range(len(sampleList)):
        for key in keyList:
            if key in graph.nodes():
                sampleList[i][key] = sampleDict[key][i]

    knockoutLists, knockinLists = setupEmptyKOKI(len(sampleList))

    # generate model encompassing graph and samples to do rule inference on
    model = sim.modelClass(graph, sampleList, False)
    model.updateCpointers()
    # cpy data into correct order for simulation
    newInitValueList = genInitValueList(sampleList, model)
    model.initValueList = newInitValueList
    print('setup successful')

    # find the rules
    model, dev1, bruteOut = ga.GAsearchModel(model, sampleList, params,
                                             knockoutLists, knockinLists, name,
                                             boolC)
    bruteOut, equivalents, dev2 = ga.localSearch(model, bruteOut, sampleList,
                                                 params, knockoutLists,
                                                 knockinLists, boolC)
    pickle.dump([[dev1], [dev2], [bruteOut], [model]],
                open(name + "_output.pickle", "wb"))
Esempio n. 4
0
def runExperiment(graph, name, samples, noise, edgeNoise, params):
	#creates a model, runs simulations, then tests reverse engineering capabilities of models in a single function
	#samples is the number of different initial conditions to provide per trial
	#graph specifies the network we are testing. 
	# does everything except params

	# load in C function
	#updateBooler=ctypes.cdll.LoadLibrary('./testRun.so')
	updateBooler=cdll.LoadLibrary('./testRun.so')
	boolC=updateBooler.syncBool 
	params.sample=samples

	sampleList=synthesizeInputs(graph,samples) # get empty list of inputs

	model=sim.modelClass(graph,sampleList, True) # generate empty model
	model.updateCpointers()
	individual=ga.genBits(model) #generate random set of logic rules to start with


	if edgeNoise > 0:
		individual[1]=	[0,1,1,1,1,1,0,0,1,0,1,1,1]


	initModel=[(model.size), list(model.nodeList), list(model.individualParse), list(model.andNodeList) , list(model.andNodeInvertList), list(model.andLenList),	list(model.nodeList), dict(model.nodeDict), list(model.initValueList)]
	knockoutLists, knockinLists= setupEmptyKOKI(samples)
		
	# generate some simulated samples
	output=ga.runProbabilityBooleanSims(individual[1], model, samples, params.cells, params, knockoutLists, knockinLists, boolC)
	
	# add noise in omics data
	if noise>0:
		multiplier=findNoiseValue(noise)
		for sample in output:
			for i in range(len(sample)):
				sample[i]=min(max(0,sample[i]+multiplier*(random()*2-1)),1)

	# add noise in RPKN
	if edgeNoise > 0:
		newgraph=graph.copy()
		edgelist=newgraph.edges()
		nodelist=newgraph.nodes()
		for newer in range(edgeNoise): # add edgeNoise FP edges
			rand1=randint(0,len(nodelist)-1)
			rand2=randint(0,len(nodelist)-1)
			edgeCandidate=(nodelist[rand1],nodelist[rand2])
			while edgeCandidate in edgelist or edgeCandidate[0]==edgeCandidate[1]:
				rand1=randint(0,len(nodelist)-1)
				rand2=randint(0,len(nodelist)-1)
				edgeCandidate=(nodelist[rand1],nodelist[rand2])
			if random()<.5:
				activity1='a'
			else:
				activity1='i'
			print(edgeCandidate)
			newgraph.add_edge(nodelist[rand1],nodelist[rand2], signal=activity1)
			edgelist.append((nodelist[rand1],nodelist[rand2]))

		print(edgelist)
		print(newgraph.edges())
	else:
		newgraph=graph
	
	# output the initial generated data
	pickle.dump( output, open( name+"_input.pickle", "wb" ) )

	# copy simulated data into right format
	newSampleList=genSampleList(output, sampleList, samples, model)
	testModel=sim.modelClass(newgraph,newSampleList, False)
	testModel.updateCpointers()
	# put initial values into correct format, add to model
	newInitValueList=genInitValueList(newSampleList,testModel)
	testModel.initValueList=newInitValueList
	
	#find rules
	testModel, dev, bruteOut =ga.GAsearchModel(testModel, newSampleList, params, knockoutLists, knockinLists, name, boolC) # run GA
	bruteOut, equivalents, dev2 = ga.localSearch(testModel, bruteOut, newSampleList, params, knockoutLists, knockinLists, boolC) # run local search
	storeModel3=[(testModel.size), list(testModel.nodeList), list(testModel.individualParse), list(testModel.andNodeList) , list(testModel.andNodeInvertList), list(testModel.andLenList),	list(testModel.nodeList), dict(testModel.nodeDict), list(testModel.initValueList)]

	outputList=[individual[1],bruteOut,initModel, storeModel3, equivalents, dev2]
	pickle.dump( outputList, open( name+"_local1.pickle", "wb" ) )
	results = parser.parse_args()
	graphName=results.graph
	iterNum=int(results.iterNum)
	name=graphName[:-8]+'_'+results.iterNum
	graph = nx.read_gpickle(graphName)
	
	# read in C function to run simulations
	updateBooler=cdll.LoadLibrary('./simulator.so')
	boolC=updateBooler.syncBool 

	# load data
	sampleList=pickle.Unpickler(open( graphName[:-8]+'_sss.pickle', "rb" )).load()
	
	# set up parameters of run, model
	params=paramClass()
	model=modelClass(graph,sampleList, False)
	model.updateCpointers()

	storeModel=[(model.size), list(model.nodeList), list(model.individualParse), list(model.andNodeList) , list(model.andNodeInvertList), list(model.andLenList),	list(model.nodeList), dict(model.nodeDict), list(model.initValueList)]
	
	# put lack of KOs, initial values into correct format
	knockoutLists, knockinLists= setupEmptyKOKI(len(sampleList))
	newInitValueList=genInitValueList(sampleList,model)
	model.initValueList=newInitValueList

	# find rules by doing GA then local search
	model1, dev, bruteOut =GAsearchModel(model, sampleList, params, knockoutLists, knockinLists, name, boolC) # run GA
	bruteOut1, equivalents, dev2 = localSearch(model1, bruteOut, sampleList, params, knockoutLists, knockinLists, boolC) # run local search
	
	# output results
	storeModel3=[(model.size), list(model.nodeList), list(model.individualParse), list(model.andNodeList) , list(model.andNodeInvertList), list(model.andLenList),	list(model.nodeList), dict(model.nodeDict), list(model.initValueList)]