def loadFitProbData(fileNumString): try: fitProbData = load(fileNumString + '_fitProbData.dat') except (IOError, EOFError): print "loadFitProbData: WARNING Unable to load fitProbData file."\ "Returning None." fitProbData = None return fitProbData
def subsetsWithFits(fileNumString, onlyNew=False): """ Find data subsets (N) that have models that have been fit to all conditions. onlyNew (False) : Optionally include only subsets that have fits that are not included in the current combined fitProbs. """ fpd = loadFitProbData(fileNumString) saveFilename = fpd.values()[0]['saveFilename'] Nlist = [] for N in scipy.sort(fpd.keys()): # find models that have been fit to all conditions if len(fpd[N]['fitProbDataList']) == 1: fitModels = fpd[N]['fitProbDataList'][0]['logLikelihoodDict'].keys( ) else: fitModels = scipy.intersect1d([ fp['logLikelihoodDict'].keys() \ for fp in fpd[N]['fittingProblemList'] ]) if onlyNew: Nfilename = directoryPrefixNonly(fileNumString, N) + '/' + saveFilename fileExists = os.path.exists(Nfilename) if not fileExists: # no combined file exists if len(fitModels) > 0: Nlist.append(N) else: # check which fit models are currently included in the saved file fpMultiple = load(Nfilename) fitModelsSaved = fpMultiple.logLikelihoodDict.keys() if len(scipy.intersect1d(fitModels, fitModelsSaved)) < len(fitModels): Nlist.append(N) else: if len(fitModels) > 0: Nlist.append(N) return Nlist
singleTimepoint = True # single time point for each IC useYeast = True usePhosphorylation = False makeFittingData = True # use False if you're getting data from a previous run if useYeast: numVisibleSpecies = 3 numSpecies = 4 dataFilename = '120725_yeastSpeciesAndDerivsData_10_numSpecies' + str( numVisibleSpecies) + '.data' visibleSpeciesNames = ['S1', 'S2', 'S3', 'S4', 'N2', 'A3', 'S4ex'][:numVisibleSpecies] indepParamNames = [n + '_init' for n in visibleSpeciesNames] if loadData: speciesData, derivsData = load(dataFilename) elif not loadData: # set up list of random initial conditions # taken from runFittingProblem.yeastDataFunction # run MATLAB simulator speciesData = [] derivsData = [] i = 0 for ICs, time in zip(ICsList, randTimes): i += 1 print "runPowerLawDerivFit: Running yeast simulation", i, "of", len( ICsList) T = 288 # K initialConditions = defaultICs initialConditions[:numSpecies] = ICs[:numSpecies]
def yeastData(numPoints,timeInterval, \ numICs,useDerivs=True,includedIndices=range(7), \ timesSeed=3,noiseSeed=4,ICseed=5, \ multiplicativeErrorBar=0.1,upperRangeMultiple=1., \ randomX=True,inputVars=None,nonzeroMin=True, \ yeastSavedDataFile=None,includeNoise=True): """ upperRangeMultiple (1.) : Each range of initial conditions is expanded by this factor by increasing the upper limit. nonzeroMin (True) : Use the lower limit for initial conditions given in SchValJen11; otherwise use 0. as lower limit. """ usePreloadedData = True #<<<<<<<******************* allNames = scipy.array(['S1','S2','S3','S4','N2','A3','S4ex']) names = allNames[includedIndices] varList = names defaultTemperature = 286.5 #changed 10.16.2013 (was 288 K before) # Table 2 of RuoChrWol03 defaultICs = scipy.array([1.187,0.193,0.050,0.115,0.077,2.475,0.077]) # mM # choose input variable(s) if False: # use temp. as input inputVars = ['Temperature'] inputList = [[278],[288],[293]] inputDescriptors = [inputs[0] for inputs in inputList] elif False: # only a single temperature value inputVars = ['Temperature'] inputList = [[defaultTemperature]] inputDescriptors = [inputs[0] for inputs in inputList] elif inputVars is None: # use varying initial conditions on all 7 species inputVars = [name+"_init" for name in names] print "inputVars =",inputVars # taken from SchValJen11 Table 2 ICranges = scipy.array( \ [[0.15,1.60],[0.19,2.16], \ [0.04,0.20],[0.10,0.35], \ [0.08,0.30],[0.14,2.67],[0.05,0.10]] )[includedIndices] # mM # as I vary the number of ICs, I want the same sequence of random ICs scipy.random.seed(ICseed) randICmults = [] for i in range(numICs): randICmults.append(scipy.rand(len(includedIndices))) #randomICs = scipy.rand(numICs,len(includedIndices))* \ # (ICranges[:,1]-ICranges[:,0]) + ICranges[:,0] randomICs = scipy.array(randICmults)*upperRangeMultiple* \ (ICranges[:,1]-nonzeroMin*ICranges[:,0]) + nonzeroMin*ICranges[:,0] inputList = randomICs inputDescriptors = range(len(inputList)) else: raise Exception, "Changing inputVars not yet implemented" # as I vary the number of ICs, I want the same sequence of lists of times scipy.random.seed(timesSeed) timesRandList = [] for i in range(numICs): timesRandList.append(scipy.rand(numPoints)) # all other random stuff is done now except for the noise # as I vary the number of ICs, I want the same sequence of noise scipy.random.seed(noiseSeed) # set up preloaded data dict if yeastSavedDataFile is None: yeastSavedDataFile = "yeastExampleData.fittingData" yeastDict = {} if usePreloadedData: try: #yeastTimes,loadedICs,yeastRawData = \ # simplePickle.load(yeastDataFilename) yeastDict = simplePickle.load(yeastSavedDataFile) except IOError: print "yeastData: Could not load "+yeastSavedDataFile usePreloadedData = False yeastOscillatorData = {} yeastOscillatorDataDerivs = {} for inputs,inputDescriptor,timesRand in zip(inputList,inputDescriptors,timesRandList): stopAfterRunningMATLAB = False # deal with independent parameters # if it's temperature (only) if inputVars is ['Temperature']: temperature = inputs[0] else: temperature = defaultTemperature initialConditions = copy.deepcopy(defaultICs) for inputVar,input in zip(inputVars,inputs): # if it's initial conditions if inputVar[-5:] == "_init": index = pylab.find(allNames==inputVar[:-5])[0] initialConditions[index] = input print "initialConditions =",initialConditions eps = (timeInterval[1]-timeInterval[0])/1000. #1e-3 #1e-4 # (minutes) resolution of timepoints integratorTimes = scipy.arange(timeInterval[0],timeInterval[1]+eps,eps) if not randomX: desiredTimes = scipy.linspace(timeInterval[0],timeInterval[1],numPoints) else: desiredTimes = timesRand * (timeInterval[1]-timeInterval[0]) \ + timeInterval[0] yeastDataKey = copy.deepcopy( \ (tuple(initialConditions),inputDescriptor,tuple(desiredTimes))) runMATLAB = not usePreloadedData if usePreloadedData: if yeastDict.has_key(yeastDataKey): yeastTimes,loadedICs,yeastRawData = yeastDict[yeastDataKey] if not scipy.all(loadedICs == initialConditions): print "loadedICs =",loadedICs print "initialConditions =",initialConditions raise Exception, "loadedICs != initialConditions" else: print "yeastData: "+yeastSavedDataFile+" does not " \ "contain the necessary data." runMATLAB = True if runMATLAB: print "yeastData: Running simulateYeastOscillator." yeastTimes,yeastRawData,yeastParams = \ simulateYeastOscillator(integratorTimes,temperature, \ initialConditions=initialConditions) # the integrator returns more timepoints than I want desiredIndices = [ pylab.find(abs(yeastTimes-time)<eps/2)[0] \ for time in desiredTimes ] yeastRawData = yeastRawData[:,desiredIndices] yeastTimes = yeastTimes[:,desiredIndices] yeastDict[yeastDataKey] = (yeastTimes,initialConditions,yeastRawData) if False: # OLD! use temp. as input, calculate v1 as output, mult. error bars S1data = yeastRawData[0] A3data = yeastRawData[5] k1,K1,q = yeastParams[1],yeastParams[10],yeastParams[9] v1data = k1*S1data*A3data/(1.+(A3data/K1)**q) yeastOscillatorData[inputDescriptor] = {} yeastOscillatorData[inputDescriptor]['v1'] = \ dict( zip(times, zip(v1data,multiplicativeErrorBar*v1data)) ) varList = ['v1'] if useDerivs: # use derivatives as output (7-dimensional), const. error bars # 2.22.2012 from Table 2 of SchValJen11 # 11.29.2012 using these even though they are stddevs of values, not derivs stddevs = [0.4872,0.6263,0.0503,0.0814,0.0379,0.7478,0.0159] # mM stddevs = scipy.array(stddevs)[includedIndices] #derivNames = [ 'ddt_'+name for name in names ] derivDataList = yeastRawData[7:][includedIndices] yeastOscillatorDataDerivs[inputDescriptor] = {} for derivName,derivData,stddev in zip(names,derivDataList,stddevs): constErrorBar = multiplicativeErrorBar*stddev yeastOscillatorDataDerivs[inputDescriptor][derivName] = \ dict( zip(yeastTimes, \ zip(derivData,scipy.ones_like(derivData)*constErrorBar)) ) #varList = names # 10.3.2011 if True: # use all species as output (7-dimensional), const. error bars # 2.22.2012 from Table 2 of SchValJen11 stddevs = [0.4872,0.6263,0.0503,0.0814,0.0379,0.7478,0.0159] # mM #includeNoise = False #True # <<<<****** 12.5.2012 to avoid negative values verbose = True dataList = scipy.array(yeastRawData)[:7][includedIndices] stddevs = scipy.array(stddevs)[includedIndices] # 11.17.2011 reorder stuff #names,dataList = names[::-1],dataList[::-1] yeastOscillatorData[inputDescriptor] = {} for name,data,stddev in zip(names,dataList,stddevs): #constErrorBar = multiplicativeErrorBar*scipy.mean(data) #*data[0] constErrorBar = multiplicativeErrorBar*stddev if includeNoise: noise = scipy.random.normal(scale=constErrorBar,size=len(data)) data = data + noise yeastOscillatorData[inputDescriptor][name] = \ dict( zip(yeastTimes, \ zip(data,scipy.ones_like(data)*constErrorBar )) ) #if usePreloadedData: # save data for future use via usePreloadedData simplePickle.save(yeastDict,yeastSavedDataFile) #Plotting.plot(yeastTimes,v1data,'o-',label=str(temperature)) if (not usePreloadedData) and stopAfterRunningMATLAB: die fittingData = [ yeastOscillatorData[d] for d in inputDescriptors ] if useDerivs: fittingDataDerivs = [ yeastOscillatorDataDerivs[d] for d in inputDescriptors ] else: fittingDataDerivs = None return fittingData,fittingDataDerivs,inputVars,inputList
def yeastData(numPoints,timeInterval, \ numICs,useDerivs=True,includedIndices=range(7), \ timesSeed=3,noiseSeed=4,ICseed=5, \ multiplicativeErrorBar=0.1,upperRangeMultiple=1., \ randomX=True,inputVars=None,nonzeroMin=True, \ yeastSavedDataFile=None,includeNoise=True): """ upperRangeMultiple (1.) : Each range of initial conditions is expanded by this factor by increasing the upper limit. nonzeroMin (True) : Use the lower limit for initial conditions given in SchValJen11; otherwise use 0. as lower limit. """ usePreloadedData = True #<<<<<<<******************* allNames = scipy.array(['S1', 'S2', 'S3', 'S4', 'N2', 'A3', 'S4ex']) names = allNames[includedIndices] varList = names defaultTemperature = 286.5 #changed 10.16.2013 (was 288 K before) # Table 2 of RuoChrWol03 defaultICs = scipy.array([1.187, 0.193, 0.050, 0.115, 0.077, 2.475, 0.077]) # mM # choose input variable(s) if False: # use temp. as input inputVars = ['Temperature'] inputList = [[278], [288], [293]] inputDescriptors = [inputs[0] for inputs in inputList] elif False: # only a single temperature value inputVars = ['Temperature'] inputList = [[defaultTemperature]] inputDescriptors = [inputs[0] for inputs in inputList] elif inputVars is None: # use varying initial conditions on all 7 species inputVars = [name + "_init" for name in names] print "inputVars =", inputVars # taken from SchValJen11 Table 2 ICranges = scipy.array( \ [[0.15,1.60],[0.19,2.16], \ [0.04,0.20],[0.10,0.35], \ [0.08,0.30],[0.14,2.67],[0.05,0.10]] )[includedIndices] # mM # as I vary the number of ICs, I want the same sequence of random ICs scipy.random.seed(ICseed) randICmults = [] for i in range(numICs): randICmults.append(scipy.rand(len(includedIndices))) #randomICs = scipy.rand(numICs,len(includedIndices))* \ # (ICranges[:,1]-ICranges[:,0]) + ICranges[:,0] randomICs = scipy.array(randICmults)*upperRangeMultiple* \ (ICranges[:,1]-nonzeroMin*ICranges[:,0]) + nonzeroMin*ICranges[:,0] inputList = randomICs inputDescriptors = range(len(inputList)) else: raise Exception, "Changing inputVars not yet implemented" # as I vary the number of ICs, I want the same sequence of lists of times scipy.random.seed(timesSeed) timesRandList = [] for i in range(numICs): timesRandList.append(scipy.rand(numPoints)) # all other random stuff is done now except for the noise # as I vary the number of ICs, I want the same sequence of noise scipy.random.seed(noiseSeed) # set up preloaded data dict if yeastSavedDataFile is None: yeastSavedDataFile = "yeastExampleData.fittingData" yeastDict = {} if usePreloadedData: try: #yeastTimes,loadedICs,yeastRawData = \ # simplePickle.load(yeastDataFilename) yeastDict = simplePickle.load(yeastSavedDataFile) except IOError: print "yeastData: Could not load " + yeastSavedDataFile usePreloadedData = False yeastOscillatorData = {} yeastOscillatorDataDerivs = {} for inputs, inputDescriptor, timesRand in zip(inputList, inputDescriptors, timesRandList): stopAfterRunningMATLAB = False # deal with independent parameters # if it's temperature (only) if inputVars is ['Temperature']: temperature = inputs[0] else: temperature = defaultTemperature initialConditions = copy.deepcopy(defaultICs) for inputVar, input in zip(inputVars, inputs): # if it's initial conditions if inputVar[-5:] == "_init": index = pylab.find(allNames == inputVar[:-5])[0] initialConditions[index] = input print "initialConditions =", initialConditions eps = (timeInterval[1] - timeInterval[0] ) / 1000. #1e-3 #1e-4 # (minutes) resolution of timepoints integratorTimes = scipy.arange(timeInterval[0], timeInterval[1] + eps, eps) if not randomX: desiredTimes = scipy.linspace(timeInterval[0], timeInterval[1], numPoints) else: desiredTimes = timesRand * (timeInterval[1]-timeInterval[0]) \ + timeInterval[0] yeastDataKey = copy.deepcopy( \ (tuple(initialConditions),inputDescriptor,tuple(desiredTimes))) runMATLAB = not usePreloadedData if usePreloadedData: if yeastDict.has_key(yeastDataKey): yeastTimes, loadedICs, yeastRawData = yeastDict[yeastDataKey] if not scipy.all(loadedICs == initialConditions): print "loadedICs =", loadedICs print "initialConditions =", initialConditions raise Exception, "loadedICs != initialConditions" else: print "yeastData: "+yeastSavedDataFile+" does not " \ "contain the necessary data." runMATLAB = True if runMATLAB: print "yeastData: Running simulateYeastOscillator." yeastTimes,yeastRawData,yeastParams = \ simulateYeastOscillator(integratorTimes,temperature, \ initialConditions=initialConditions) # the integrator returns more timepoints than I want desiredIndices = [ pylab.find(abs(yeastTimes-time)<eps/2)[0] \ for time in desiredTimes ] yeastRawData = yeastRawData[:, desiredIndices] yeastTimes = yeastTimes[:, desiredIndices] yeastDict[yeastDataKey] = (yeastTimes, initialConditions, yeastRawData) if False: # OLD! use temp. as input, calculate v1 as output, mult. error bars S1data = yeastRawData[0] A3data = yeastRawData[5] k1, K1, q = yeastParams[1], yeastParams[10], yeastParams[9] v1data = k1 * S1data * A3data / (1. + (A3data / K1)**q) yeastOscillatorData[inputDescriptor] = {} yeastOscillatorData[inputDescriptor]['v1'] = \ dict( zip(times, zip(v1data,multiplicativeErrorBar*v1data)) ) varList = ['v1'] if useDerivs: # use derivatives as output (7-dimensional), const. error bars # 2.22.2012 from Table 2 of SchValJen11 # 11.29.2012 using these even though they are stddevs of values, not derivs stddevs = [0.4872, 0.6263, 0.0503, 0.0814, 0.0379, 0.7478, 0.0159] # mM stddevs = scipy.array(stddevs)[includedIndices] #derivNames = [ 'ddt_'+name for name in names ] derivDataList = yeastRawData[7:][includedIndices] yeastOscillatorDataDerivs[inputDescriptor] = {} for derivName, derivData, stddev in zip(names, derivDataList, stddevs): constErrorBar = multiplicativeErrorBar * stddev yeastOscillatorDataDerivs[inputDescriptor][derivName] = \ dict( zip(yeastTimes, \ zip(derivData,scipy.ones_like(derivData)*constErrorBar)) ) #varList = names # 10.3.2011 if True: # use all species as output (7-dimensional), const. error bars # 2.22.2012 from Table 2 of SchValJen11 stddevs = [0.4872, 0.6263, 0.0503, 0.0814, 0.0379, 0.7478, 0.0159] # mM #includeNoise = False #True # <<<<****** 12.5.2012 to avoid negative values verbose = True dataList = scipy.array(yeastRawData)[:7][includedIndices] stddevs = scipy.array(stddevs)[includedIndices] # 11.17.2011 reorder stuff #names,dataList = names[::-1],dataList[::-1] yeastOscillatorData[inputDescriptor] = {} for name, data, stddev in zip(names, dataList, stddevs): #constErrorBar = multiplicativeErrorBar*scipy.mean(data) #*data[0] constErrorBar = multiplicativeErrorBar * stddev if includeNoise: noise = scipy.random.normal(scale=constErrorBar, size=len(data)) data = data + noise yeastOscillatorData[inputDescriptor][name] = \ dict( zip(yeastTimes, \ zip(data,scipy.ones_like(data)*constErrorBar )) ) #if usePreloadedData: # save data for future use via usePreloadedData simplePickle.save(yeastDict, yeastSavedDataFile) #Plotting.plot(yeastTimes,v1data,'o-',label=str(temperature)) if (not usePreloadedData) and stopAfterRunningMATLAB: die fittingData = [yeastOscillatorData[d] for d in inputDescriptors] if useDerivs: fittingDataDerivs = [ yeastOscillatorDataDerivs[d] for d in inputDescriptors ] else: fittingDataDerivs = None return fittingData, fittingDataDerivs, inputVars, inputList
scipy.random.seed(timesSeed) randTimes = scipy.rand(numICs) * maxTime singleTimepoint = True # single time point for each IC useYeast = True usePhosphorylation = False makeFittingData = True # use False if you're getting data from a previous run if useYeast: numVisibleSpecies = 3 numSpecies = 4 dataFilename = "120725_yeastSpeciesAndDerivsData_10_numSpecies" + str(numVisibleSpecies) + ".data" visibleSpeciesNames = ["S1", "S2", "S3", "S4", "N2", "A3", "S4ex"][:numVisibleSpecies] indepParamNames = [n + "_init" for n in visibleSpeciesNames] if loadData: speciesData, derivsData = load(dataFilename) elif not loadData: # set up list of random initial conditions # taken from runFittingProblem.yeastDataFunction # run MATLAB simulator speciesData = [] derivsData = [] i = 0 for ICs, time in zip(ICsList, randTimes): i += 1 print "runPowerLawDerivFit: Running yeast simulation", i, "of", len(ICsList) T = 288 # K initialConditions = defaultICs initialConditions[:numSpecies] = ICs[:numSpecies] if singleTimepoint: # single time point for each IC
def loadFitProb(saveFilename, fileNumString, conditioni, numTimepoints): dirPrefix = directoryPrefix(fileNumString, conditioni, numTimepoints) return load(dirPrefix + saveFilename)[numTimepoints]
def combineFitProbs(fileNumString, saveCombined=True, combinedLean=True, reset=False): """ Combine fittingProblems from multiple conditions saved in the parallel file structure into a single fittingProblemDict. Currently only includes data from models that have been fit to all conditions. saveCombined (True) : Overwrites any current top-level fitProbDict file with a combined fitProbDict containing all numTimepoints. Set to False to minimize memory use. combinedLean (True) : Combined fpd is saved without models to save memory. reset (False) : If True, overwrite or delete any existing combined fitProbDicts. This erases any existing outOfSampleCost information. """ fitProbData = loadFitProbData(fileNumString) saveFilename = fitProbData.values()[0]['saveFilename'] #save({},saveFilename) if saveCombined: fpdMultiple = {} fitSubsets = subsetsWithFits(fileNumString) subsetsToCombine = subsetsWithFits(fileNumString, onlyNew=not reset) for numTimepoints in fitSubsets: Nfilename = directoryPrefixNonly(fileNumString, numTimepoints) + '/' + saveFilename fileExists = os.path.exists(Nfilename) if fileExists and reset: # then an old combined file exists -- erase it to reset os.remove(Nfilename) fileExists = False print "combineFitProbs: Reset removed file for numTimepoints =", numTimepoints if numTimepoints in subsetsToCombine: # combine oldOutOfSampleCostDict = {} if fileExists: # grab any out-of-sample cost data fpMultiple = load(Nfilename) if hasattr(fpMultiple, 'outOfSampleCostDict'): oldOutOfSampleCostDict = fpMultiple.outOfSampleCostDict p = fitProbData[numTimepoints] fpList = [] for conditioni in range(len(p['fitProbDataList'])): fp = loadFitProb(saveFilename, fileNumString, conditioni, numTimepoints) fpList.append(fp) # make new multiple condition fitting problem by starting # with an empty fitting problem and inserting the fittingProblemList saveKey = p['saveKey'] fp.stopFittingN = p['stopFittingN'] fpMultiple = FittingProblemMultipleCondition([], [], saveFilename=None, saveKey=saveKey, fp0=fp) fpMultiple.fittingProblemList = fpList fpMultiple.outOfSampleCostDict = oldOutOfSampleCostDict # Populate the logLikelihoodDict, etc by running fitAll. fpMultiple.fitAll(onlyCombine=True) if saveCombined: fpdMultiple[numTimepoints] = fpMultiple save(fpMultiple, Nfilename) print "combineFitProbs: Done with numTimepoints =", numTimepoints else: # no new fits to combine; just load from file if saveCombined: fpdMultiple[numTimepoints] = load(Nfilename) print "combineFitProbs: Done with numTimepoints =", numTimepoints if saveCombined: if combinedLean: makeFpdLean(fpdMultiple) save(fpdMultiple, saveFilename[:-4] + '_combined.dat')
if __name__ == '__main__': if len(sys.argv) < 2 or len(sys.argv) > 2: print "Usage: mpirun -np [numprocs] python mpi_test.py test_input_filename" exit() test_input_filename = sys.argv[1] if num_procs < 2: raise Exception("No worker processes detected.") if my_rank == 0: # master process file_data = load(test_input_filename) # send work for worker in range(1, num_procs): comm.send(("file_data['test']", { 'comp': { 1: 2 }, 'file_data': file_data }), dest=worker) # get results for worker in range(1, num_procs): msg = comm.recv(source=worker) print("mpi_test: Worker {} said {}".format(worker, msg))