예제 #1
0
def fitToModel(tPoints, ratios, times, turnover, tor_v, rFile, numToFit, BGD, numToSample, alpha):
    
    # Assign model parameters
    time_sec = [x * 60 for x in tPoints]
    
    time = [x * 1 for x in tPoints]    
    #time     = time_sec     # Turn-over ratio sample time-points (in seconds)
    proteinRel  = ratios[:]
    proteinTime = times[:]
   
    par_mod = list()
    # create list of parameters to send into function
    par_mod.append(time_sec);          	# Turn-over ratio sample time-points (in seconds)
    par_mod.append(proteinTime)  		# Protein times from input file
    par_mod.append(proteinRel)   		# Relative protein level
    par_mod.append(0)                       # initial concentration
    par_mod.append(0.0)                     # background
    
    R_ccc_all = list()
    R_ccc_all_best = list()
    lin_turn_best = list()
    # for each site in the turnover matrix
    num = 0
    ssdnum = 0
    
    
    for s in range(0,  len(turnover)):
        
        #plt.figure(num)
        name = str(turnover[s][0]) #+ '\t'+  str(turnover[s][1]) + '\t' + str(turnover[s][2]) + '\t'+ str(turnover[s][3]) 
        SSDs = list()
        acceptedSSDs = list()
        print('Fitting model to data for site: ' + name)
        #k = 0
        lin_turn = list()  # linear form of turnover data for this site
        for c in range(1,len(turnover[s])):        
            lin_turn.append(pow(2, turnover[s][c]))
        num+=1
        SSD = 1E15; # start with a really big sum of squares difference
 
        bestP = tor_v[0]
        val_init = bestP
        
        #tor_v = [1.010261e-04]
        
        for p in tor_v:  # for each initial parameter choice, p
            
            # sample a range of parameters
            newP = p
            # run model with new parameter & calculated SSD
            R_ccc = model_v0(newP, par_mod, lin_turn, BGD)
            
            #plt.plot(time,R_ccc, 'g-*')
            newSSD = 0
            
            # check to see if the model was solved:
            if (len(R_ccc) >= (numToFit)):
                 # fit to time points in 
                 for v in range(0,numToFit):
                     newSSD += pow((lin_turn[v] - R_ccc[v]), 2)
                     SSDs.append(newSSD)
            else: # model did not finish
                newSSD = -1
            #print('SSD: ' + str(newSSD))
            
            #if (newSSD < SSD):
            #    bestP = newP  # set this as the new best parameter
            #    SSD = newSSD
            #    val_init = p
            #    acceptedSSDs.append(newSSD)
            # only sample around this parameter if this SSD is better than best
            if (newSSD > 0): #newSSD <= SSD):
                #print('old ssd:' + str(SSD) + ' new ssd: ' + str(newSSD))
                for i in range(0,numToSample):
                    # sample new parameter
                    newP = random.uniform(newP-newP*alpha, newP+newP*alpha)
                    while (newP < 0):    
                        newP = random.uniform(newP-newP*alpha, newP+newP*alpha)
                    # run the model
                    R_ccc = model_v0(newP, par_mod, lin_turn, BGD)
                    #plt.plot(time,R_ccc, 'g-*')
                    newSSD = 0
                    if (len(R_ccc) >= (numToFit)):
                        for v in range(0,numToFit):
                            newSSD = newSSD + pow(R_ccc[v]-lin_turn[v], 2)
                            SSDs.append(newSSD)
                    else:
                        newSSD = -1
                    if ((newSSD < SSD) and (newSSD != -1)):
                        #print('selecting new SSD')
                        #for v in range(0,numToFit):
                        #    print(str(R_ccc[v]))
                        #    print(str(lin_turn[v]))
                        bestP = newP  # set this as the new best parameter
                        SSD = newSSD
                        val_init = p
                        acceptedSSDs.append(newSSD)
                                        
        R_ccc_best = model_v0(bestP, par_mod, lin_turn, BGD)
        R_ccc_all_best.append(R_ccc_best)
        lin_turn_best.append(lin_turn)
        
        R_ccc_all.append([name, val_init, bestP, SSD])  
        ssdnum = ssdnum + 1
        num = num + 1

    # write results to file
    print ('Writing model fit results to file....') 

    file = open(rFile, 'w')
    # print header line
    header = 'Site_Chr' + '\t' + 'Site_Pos' + '\t' + 'Mapped_Chr' + '\t' + 'Mapped_Pos' + '\t' + 'initial turnover' + '\t' + 'predicted turnover' + '\t' + 'best SSD' + '\t'
    for t in range(0, len(tPoints) - 1):
        header = header + str(tPoints[t]) + '\t'
    header = header + str(tPoints[len(tPoints)-1])  + '\n'
    file.write(header)
    
    i = 0
    for result in R_ccc_all: # for each row
        file.write(result[0] + '\t' + str(result[1]) + '\t' + str(result[2]) + '\t' + str(result[3]) +'\t')
        for j in range(0,len(R_ccc_all_best[i])-1):
            # write log 2 transformed
            file.write(str(log(R_ccc_all_best[i][j],2)) + '\t')
        file.write(str(log(R_ccc_all_best[i][len(R_ccc_all_best[i])-1],2)) +'\n')
        i += 1
    file.close()

    return [R_ccc_all_best, lin_turn_best]
예제 #2
0
def plot_multiple_fits(turnover, resultsFileArray, numToFitArray, ratios, times, tPoints, figName):
    
    plt.ioff()  # turn off interactive plotting
    
      
    # Assign model parameters
    time_sec = [x * 60 for x in tPoints]
    time     = [x * 1 for x in tPoints]    # Turn-over ratio sample time-points (in seconds)
    proteinRel  = ratios[:]
    proteinTime = times[:]
    # initialize absolute protein
    proteinAbs = list()
    for value in ratios:
        proteinAbs.append(value/(1-value))
	
    par_mod = list()
    # create list of parameters to send into function
    par_mod.append(time_sec);          	# Turn-over ratio sample time-points (in seconds)
    par_mod.append(proteinTime)  		# Protein times from input file
    par_mod.append(proteinRel)   		# Relative protein level
    par_mod.append(0)                       # initial concentration
    par_mod.append(0.0)                     # background

    # number of background points to use
    BGD = 2
    # create array of colors for plots
    colors = ['k-*', 'r', 'y', 'g', 'b']
    SSDColors = ['k', 'ro', 'yo', 'go', 'bo']
    
    # create array to hold file streams
    fileStream = list()    

    # site number = line number in file stream
    lineNum = 1  # first line in file is a header line
     
    print ('Processing the following files:')
    for file in resultsFileArray:
        print(file)
    
    
    # for each peak in turnover2D
    for s in range(0, len(turnover)):
        
        lineNum = lineNum + 1  # line number in the results file
        
        # set up plot color index
        colIndex = 0
        # initialize list to hold turnover data for this site
        lin_turn = list()        
        
        # pull site name
        name = turnover[s][0]
        
        # linearize turnover data
        for c in range(1,len(turnover[s])):        
            lin_turn.append(pow(2, turnover[s][c]))
        # create figure to hold turnover data and model fits plot  
        fig1 = plt.figure('Model Fits for ' + name, figsize=(10,10))
        # plot linearized version of turnover data
        plt.plot(time, lin_turn, colors[colIndex], linewidth=3, label = 'exp', markersize=15)
        
        # create figure to hold ssd v. turnover rate plot
        fig2 = plt.figure('Residence Time vs. SSD for ' + name, figsize=(10,10))
        # for each open file
        for file in resultsFileArray:
               
            # increment color index
            colIndex = colIndex + 1
            # read current line
            site = linecache.getline(file, lineNum)
            
            # split current line and save turnover rate
            turnoverRate = float(site.split('\t')[1])
            SSD = float(site.split('\t')[2])
            # plot turnover rate as function of SSD 
            plt.figure('Residence Time vs. SSD for ' + name)
            plt.plot(1/turnoverRate, SSD, SSDColors[colIndex], markersize=15, label='# points = ' + str(numToFitArray[colIndex-1]))
            # run model using turnover rate
            R_ccc = model_v0(turnoverRate, par_mod, lin_turn, BGD)
            # plot mode fit for this turnover rate
            plt.figure('Model Fits for ' + name)
            plt.plot(time, R_ccc, colors[colIndex], linewidth=1.5, label='# pts = ' + str(numToFitArray[colIndex-1]) + ' RT  = ' + "%.0f" % (1/turnoverRate))
            #str(1/turnoverRate) )
        # add axis labels to model fit plot
        plt.xlabel('Time (min)')
        plt.ylabel('Turnover ratio (linear)')
        plt.title(name)
        plt.ylim(min(lin_turn), max(lin_turn)*1.5)
        plt.legend(loc=2, fontsize=10)
        # save model fit plot
        plt.savefig(figName + '_Fits_' + name)
        plt.close(fig1)
        # add axis labels to ssd vs. turnover rate plot
        plt.figure('Residence Time vs. SSD for ' + name) # select this figure
        plt.xlabel('Predicted Residence Time (RT)')
        plt.ylabel('Sum of squares difference (SSD)')
        plt.title(name + ' SSD')
        plt.legend(loc=2, fontsize=10)
        # save ssd vs turnover rate plot 
        plt.savefig(figName + '_SSDvTO_' + name)
        plt.close(fig2)
        
      
        
    # for each file in resultsFileArray
    for stream in fileStream:
        # close each file
        stream.close()