def fitToModel(tPoints, ratios, times, turnover, tor_v, rFile, numToFit, BGD, numToSample, alpha): # Assign model parameters time_sec = [x * 60 for x in tPoints] time = [x * 1 for x in tPoints] #time = time_sec # Turn-over ratio sample time-points (in seconds) proteinRel = ratios[:] proteinTime = times[:] par_mod = list() # create list of parameters to send into function par_mod.append(time_sec); # Turn-over ratio sample time-points (in seconds) par_mod.append(proteinTime) # Protein times from input file par_mod.append(proteinRel) # Relative protein level par_mod.append(0) # initial concentration par_mod.append(0.0) # background R_ccc_all = list() R_ccc_all_best = list() lin_turn_best = list() # for each site in the turnover matrix num = 0 ssdnum = 0 for s in range(0, len(turnover)): #plt.figure(num) name = str(turnover[s][0]) #+ '\t'+ str(turnover[s][1]) + '\t' + str(turnover[s][2]) + '\t'+ str(turnover[s][3]) SSDs = list() acceptedSSDs = list() print('Fitting model to data for site: ' + name) #k = 0 lin_turn = list() # linear form of turnover data for this site for c in range(1,len(turnover[s])): lin_turn.append(pow(2, turnover[s][c])) num+=1 SSD = 1E15; # start with a really big sum of squares difference bestP = tor_v[0] val_init = bestP #tor_v = [1.010261e-04] for p in tor_v: # for each initial parameter choice, p # sample a range of parameters newP = p # run model with new parameter & calculated SSD R_ccc = model_v0(newP, par_mod, lin_turn, BGD) #plt.plot(time,R_ccc, 'g-*') newSSD = 0 # check to see if the model was solved: if (len(R_ccc) >= (numToFit)): # fit to time points in for v in range(0,numToFit): newSSD += pow((lin_turn[v] - R_ccc[v]), 2) SSDs.append(newSSD) else: # model did not finish newSSD = -1 #print('SSD: ' + str(newSSD)) #if (newSSD < SSD): # bestP = newP # set this as the new best parameter # SSD = newSSD # val_init = p # acceptedSSDs.append(newSSD) # only sample around this parameter if this SSD is better than best if (newSSD > 0): #newSSD <= SSD): #print('old ssd:' + str(SSD) + ' new ssd: ' + str(newSSD)) for i in range(0,numToSample): # sample new parameter newP = random.uniform(newP-newP*alpha, newP+newP*alpha) while (newP < 0): newP = random.uniform(newP-newP*alpha, newP+newP*alpha) # run the model R_ccc = model_v0(newP, par_mod, lin_turn, BGD) #plt.plot(time,R_ccc, 'g-*') newSSD = 0 if (len(R_ccc) >= (numToFit)): for v in range(0,numToFit): newSSD = newSSD + pow(R_ccc[v]-lin_turn[v], 2) SSDs.append(newSSD) else: newSSD = -1 if ((newSSD < SSD) and (newSSD != -1)): #print('selecting new SSD') #for v in range(0,numToFit): # print(str(R_ccc[v])) # print(str(lin_turn[v])) bestP = newP # set this as the new best parameter SSD = newSSD val_init = p acceptedSSDs.append(newSSD) R_ccc_best = model_v0(bestP, par_mod, lin_turn, BGD) R_ccc_all_best.append(R_ccc_best) lin_turn_best.append(lin_turn) R_ccc_all.append([name, val_init, bestP, SSD]) ssdnum = ssdnum + 1 num = num + 1 # write results to file print ('Writing model fit results to file....') file = open(rFile, 'w') # print header line header = 'Site_Chr' + '\t' + 'Site_Pos' + '\t' + 'Mapped_Chr' + '\t' + 'Mapped_Pos' + '\t' + 'initial turnover' + '\t' + 'predicted turnover' + '\t' + 'best SSD' + '\t' for t in range(0, len(tPoints) - 1): header = header + str(tPoints[t]) + '\t' header = header + str(tPoints[len(tPoints)-1]) + '\n' file.write(header) i = 0 for result in R_ccc_all: # for each row file.write(result[0] + '\t' + str(result[1]) + '\t' + str(result[2]) + '\t' + str(result[3]) +'\t') for j in range(0,len(R_ccc_all_best[i])-1): # write log 2 transformed file.write(str(log(R_ccc_all_best[i][j],2)) + '\t') file.write(str(log(R_ccc_all_best[i][len(R_ccc_all_best[i])-1],2)) +'\n') i += 1 file.close() return [R_ccc_all_best, lin_turn_best]
def plot_multiple_fits(turnover, resultsFileArray, numToFitArray, ratios, times, tPoints, figName): plt.ioff() # turn off interactive plotting # Assign model parameters time_sec = [x * 60 for x in tPoints] time = [x * 1 for x in tPoints] # Turn-over ratio sample time-points (in seconds) proteinRel = ratios[:] proteinTime = times[:] # initialize absolute protein proteinAbs = list() for value in ratios: proteinAbs.append(value/(1-value)) par_mod = list() # create list of parameters to send into function par_mod.append(time_sec); # Turn-over ratio sample time-points (in seconds) par_mod.append(proteinTime) # Protein times from input file par_mod.append(proteinRel) # Relative protein level par_mod.append(0) # initial concentration par_mod.append(0.0) # background # number of background points to use BGD = 2 # create array of colors for plots colors = ['k-*', 'r', 'y', 'g', 'b'] SSDColors = ['k', 'ro', 'yo', 'go', 'bo'] # create array to hold file streams fileStream = list() # site number = line number in file stream lineNum = 1 # first line in file is a header line print ('Processing the following files:') for file in resultsFileArray: print(file) # for each peak in turnover2D for s in range(0, len(turnover)): lineNum = lineNum + 1 # line number in the results file # set up plot color index colIndex = 0 # initialize list to hold turnover data for this site lin_turn = list() # pull site name name = turnover[s][0] # linearize turnover data for c in range(1,len(turnover[s])): lin_turn.append(pow(2, turnover[s][c])) # create figure to hold turnover data and model fits plot fig1 = plt.figure('Model Fits for ' + name, figsize=(10,10)) # plot linearized version of turnover data plt.plot(time, lin_turn, colors[colIndex], linewidth=3, label = 'exp', markersize=15) # create figure to hold ssd v. turnover rate plot fig2 = plt.figure('Residence Time vs. SSD for ' + name, figsize=(10,10)) # for each open file for file in resultsFileArray: # increment color index colIndex = colIndex + 1 # read current line site = linecache.getline(file, lineNum) # split current line and save turnover rate turnoverRate = float(site.split('\t')[1]) SSD = float(site.split('\t')[2]) # plot turnover rate as function of SSD plt.figure('Residence Time vs. SSD for ' + name) plt.plot(1/turnoverRate, SSD, SSDColors[colIndex], markersize=15, label='# points = ' + str(numToFitArray[colIndex-1])) # run model using turnover rate R_ccc = model_v0(turnoverRate, par_mod, lin_turn, BGD) # plot mode fit for this turnover rate plt.figure('Model Fits for ' + name) plt.plot(time, R_ccc, colors[colIndex], linewidth=1.5, label='# pts = ' + str(numToFitArray[colIndex-1]) + ' RT = ' + "%.0f" % (1/turnoverRate)) #str(1/turnoverRate) ) # add axis labels to model fit plot plt.xlabel('Time (min)') plt.ylabel('Turnover ratio (linear)') plt.title(name) plt.ylim(min(lin_turn), max(lin_turn)*1.5) plt.legend(loc=2, fontsize=10) # save model fit plot plt.savefig(figName + '_Fits_' + name) plt.close(fig1) # add axis labels to ssd vs. turnover rate plot plt.figure('Residence Time vs. SSD for ' + name) # select this figure plt.xlabel('Predicted Residence Time (RT)') plt.ylabel('Sum of squares difference (SSD)') plt.title(name + ' SSD') plt.legend(loc=2, fontsize=10) # save ssd vs turnover rate plot plt.savefig(figName + '_SSDvTO_' + name) plt.close(fig2) # for each file in resultsFileArray for stream in fileStream: # close each file stream.close()