Python makeHist 예제들, measure.makeHist Python 예제들

예제 #1

0

파일 보기

파일: utils.py 프로젝트: tanmoy7989/protein_model

def isConverged(TrajDir, PdbName, TempSet = 300, OutFile = None):
    if OutFile is None: OutFile = os.path.abspath('./rmsd_convergence.png')
    import cgprotein as lib, measure as m
    TempFile = os.path.join(TrajDir, 'temps.txt')
    Temps = np.loadtxt(TempFile)
    Ind = np.argmin(abs(Temps - TempSet))
    TrajTemp = Temps[Ind]
    Traj = os.path.join(TrajDir, 'prot_%s.%3.2f.lammpstrj.gz' % (PdbName, TrajTemp))
    NativePdb = parseNative(PdbName)
    LammpsREMDLog = os.path.join(TrajDir, 'prot_%slammps.log' % PdbName)
    
    calc = lib.Compute(TrajFn = Traj, NativePdb = NativePdb, Temp = TrajTemp)
    rmsd = calc.RMSD_frame()
    avgrmsd = rmsd.mean()
    stdrmsd = np.std(rmsd, ddof = 1)
    avgerr = stdrmsd / avgrmsd
    avgerr *= 100.
    m.NBins = 50 ; m.NBlocks = 4 ; m.Normalize = True
    rmsdhist = m.makeHist(rmsd)
    
    Walk = np.loadtxt(LammpsREMDLog, skiprows = 3)[:, 1:]
    
    fig = plt.figure(facecolor = 'w', edgecolor = 'w', figsize = (8, 4))
    ax1 = fig.add_subplot(2,2,1)
    ax2 = fig.add_subplot(2,2,2)
    ax3 = fig.add_subplot(2,2,3)
    ax1.plot(rmsd, lw = 1, color = 'blue', label = 'fluct from mean = %3.2f %%' % avgerr)
    ax1.axhline(avgrmsd, ls = '--', color = 'black', lw = 2)
    ax1.legend(loc = 'best')
    ax2.errorbar(rmsdhist[0], rmsdhist[1], yerr = rmsdhist[2], color = 'black', lw = 2, marker = 'o', markersize = 5)
    ax2.set_xlabel(r'$RMSD (\AA)$') ; ax2.set_ylabel('distribution')
    for i, T in enumerate(Temps):
        x = Walk[:,i]
        y = np.array([Temps[int(k)] for k in x])
        m.NBins = len(Temps) ; m.NBlocks = 1; m.Normalize = False
        walkhist = m.makeHist(y)
        ax3.plot(walkhist[0], walkhist[1], lw = 2, label = 'Replica: %d' % i)
        ax3.legend(loc = 'best', fontsize = 5)
    ax3.set_xlabel('Temp (K)') ; ax3.set_ylabel('distribution')
    fig.tight_layout()
    plt.savefig(OutFile)
    return

예제 #2

0

파일 보기

파일: get_contact_hist.py 프로젝트: tanmoy7989/protein_model

def getData(pdbname, ax, of):
    print '\nPdb: %s\n-----------' % pdbname
    pdbfile = os.path.abspath('../native_struct/mapped_pseudoGLY/%s.pdb' % pdbname)
    p0 = topo.ProteinNCOS(Pdb = pdbfile, cfg = cfg, Prefix = pdbfile)
    p = p0.Map2Polymer(PolyName = PolyName) if Map2Polymer else p0
    cdict = ps.ParsePdb(p)
    d_native = cdict['d_native']
    d_ss = cdict['d_ss_native']
    h_native = measure.makeHist(d_native)
    h_ss = measure.makeHist(d_ss)
    of.write('%7s %14.3f %14.3f %14.3f %14.3f\n' % (pdbname, d_native.min(), d_native.max(), d_ss.min(), d_ss.max()))
    ax.plot(h_native[0], h_native[1], 'b-', lw = 2, label = 'COM-COM')
    ax.plot(h_ss[0], h_ss[1], 'g-', lw = 2, label = 'S-S')
    ax.legend()
    ax.set_title(pdbname)
    delflist = [pdbname+'_nativecontact.txt', pdbname+'_nonnativecontact.txt']
    for i in delflist:
        f = os.path.join(os.getcwd(), i)
        if os.path.isfile(f): os.remove(f)
    return

예제 #3

0

파일 보기

파일: cgprotein.py 프로젝트: tanmoy7989/protein_model

    def FoldCurve(self, O='RMSD'):
        # calculate folding curve w.r.t. chosen order param (usually rmsd)
        picklename = FMT['FOLDCURVE'] % (self.Prefix, O)
        if os.path.isfile(picklename): return
        # see if config weights need to be recalculated
        if self.ReInitWeights: self.GetConfigWeights()
        # retrieve all data for orderparam
        d = shelve.open(self.DataShelf)
        x_kn = np.zeros([len(self.Temps), self.NFrames], np.float64)
        for k, t in enumerate(self.Temps):
            key = self.genShelfKey(O, t)
            x_kn[k, :] = d[key]
        # get overall max and min and bin w.r.t them
        x_max = x_kn.max() * (1. - measure.HistPadding)
        x_min = x_kn.min() * (1. + measure.HistPadding)
        dx = (x_max - x_min) / float(NBins)
        x_centers = x_min + dx * (0.5 + np.arange(NBins))
        cut_inds = (x_centers <= OCut[O])
        # computing folding fraction block by block
        BlockSize = int(self.NFrames / NBlocks)
        foldfrac_block = np.zeros([len(self.Temps), NBlocks])
        for k, t in enumerate(self.Temps):
            print 'Target Temp = %3.2f K' % t
            for b in range(NBlocks):
                if NBlocks > 1: print ' Block: ', b
                start = b * BlockSize
                stop = (b + 1
                        ) * BlockSize if not b == NBlocks - 1 else self.NFrames
                # get config weights
                weights = d['w_kn'][(k, b)].flatten()
                print 'Calculating histograms...'
                x = x_kn[:, start:stop].flatten()
                measure.NBins = NBins
                measure.NBlocks = 1
                this_bin_centers, this_hist, this_err = measure.makeHist(
                    x, weights=weights, bintuple=(x_min, x_max, dx))
                # computing folding fraction at (temp, block) as cumulative histogram within a cutoff
                foldfrac_block[k, b] = this_hist[cut_inds].sum() / float(
                    this_hist.sum())

        foldfrac = np.mean(foldfrac_block, axis=1)
        if NBlocks > 1: err = np.std(foldfrac_block, axis=1, ddof=1)
        else: err = np.zeros(len(self.Temps))
        # write to pickle
        ret = (self.Temps, foldfrac, err)
        with open(picklename, 'w') as of:
            pickle.dump(ret, of)
        d.close()
        return

예제 #4

0

파일 보기

파일: cgprotein.py 프로젝트: tanmoy7989/protein_model

    def PMF(self, O):
        # calculates pmf w.r.t order param O
        picklename = FMT['PMF1D'] % (self.Prefix, self.TempSet, O)
        if os.path.isfile(picklename): return
        # see if config weights need to be recalculated
        if self.ReInitWeights: self.GetConfigWeights()
        # extract order parames and energies
        d = shelve.open(self.DataShelf)
        x_kn = np.zeros([len(self.Temps), self.NFrames], np.float64)
        for k, t in enumerate(self.Temps):
            key = self.genShelfKey(O, t)
            x_kn[k, :] = d[key]
        # get overall max and min and bin w.r.t them
        x_min = x_kn.min() * (1.0 - measure.HistPadding)
        x_max = x_kn.max() * (1.0 + measure.HistPadding)
        dx = (x_max - x_min) / float(NBins)

        # compute PMF block by block
        BlockSize = int(self.NFrames / NBlocks)
        pmf_block = np.zeros([NBlocks, NBins])
        for b in range(NBlocks):
            if NBlocks > 1: print 'Block: ', b
            start = b * BlockSize
            stop = (b +
                    1) * BlockSize if not b == NBlocks - 1 else self.NFrames
            x = x_kn[:, start:stop].flatten()
            weights = d['w_kn'][(k, b)].flatten()
            measure.NBins = NBins
            measure.NBlocks = 1
            bintuple = (x_min, x_max, dx)
            bin_centers, this_hist, this_err = measure.makeHist(
                x, weights=weights, bintuple=bintuple)
            pmf_block[b, :] = -(kB * self.TempSet) * np.log(this_hist)

        # trim the pmf
        pmf = np.mean(pmf_block, axis=0)
        pmf = TrimPMF(pmf, Dim=1)
        if NBlocks > 1: err = np.std(pmf_block, axis=0, ddof=1)
        else: err = np.zeros(NBins)
        # write to pickle
        ret = (bin_centers, pmf, err)
        with open(picklename, 'w') as of:
            pickle.dump(ret, of)
        d.close()
        return

예제 #5

0

파일 보기

파일: cgprotein.py 프로젝트: tanmoy7989/protein_model

 def GetCO(self):
     self.ResContacts_frame()
     ContactPickle = FMT['RESCONTACTS'] % (self.Prefix, self.Temp)
     with open(ContactPickle, 'r') as of:
         data = pickle.load(of)
     ContactMap, ContactDist = data
     NFrames = ContactMap.shape[0]
     CO = np.zeros(NFrames)
     for i in range(NFrames):
         CO[i] = self.p.GetCO(ContactMap=ContactMap[i, :, :])
     # remove nans and infs, if any
     CO = CO[~np.isnan(CO)]
     CO = CO[~np.isinf(CO)]
     measure.NBins = NBins
     measure.NBlocks = NBlocks
     measure.Normalize = True
     hist = measure.makeHist(CO)
     return hist

예제 #6

0

파일 보기

파일: cgprotein.py 프로젝트: tanmoy7989/protein_model

 def GetFracNativeContacts(self):
     # get AA contact map
     NativeContactMap, NativeContactDist = self.pNative.GetResContacts()
     ind = (NativeContactMap == 1)
     # get CG contact map
     self.ResContacts_frame()
     # get per frame fraction of native contacts
     ContactPickle = FMT['RESCONTACTS'] % (self.Prefix, self.Temp)
     with open(ContactPickle, 'r') as of:
         data = pickle.load(of)
     ContactMap, ContactDist = data
     NFrames = ContactMap.shape[0]
     frac = np.zeros(NFrames)
     for i in range(NFrames):
         frac[i] = np.sum(
             ContactMap[i, :, :][ind]) / np.sum(NativeContactMap)
     measure.NBins = NBins
     measure.NBlocks = NBlocks
     measure.Normalize = True
     hist = measure.makeHist(frac)
     return hist

예제 #7

0

파일 보기

파일: cgprotein.py 프로젝트: tanmoy7989/protein_model

 def QuickRMSD(self):
     # get overall RMSD distribution, but don't save to file
     # should be very fast and callable from replica and other external routines
     rmsd_frame = np.zeros(self.NFrames)
     rmsd_hist = None
     # loop over frames
     pb = sim.utility.ProgressBar(
         'Calculating overall RMSD for %s at %3.2f K...' %
         (self.OutPrefix, self.Temp),
         Steps=self.NFrames)
     for i, frame in enumerate(self.FrameRange):
         Pos = self.Trj[frame]
         self.p.UpdatePos(Pos)
         rmsd_frame[i] = self.p.QuickRMSD(self.pNative)
         pb.Update(i)
     # histogram for overall RMSD
     measure.NBins = NBins
     measure.NBlocks = NBlocks
     measure.NFrames = self.NFrames
     rmsd_hist = measure.makeHist(rmsd_frame)
     return rmsd_hist

예제 #8

0

파일 보기

import os, numpy as np
import matplotlib; matplotlib.use('Agg')
import matplotlib.pyplot as plt
import measure

sigma_lj = {'ala_spc': 3.7811, 'leu15': 3.7845}
epsilon_lj = {'ala_spc': 2.6623, 'leu15': 2.4763}

nativedistfile = os.path.abspath('../cgff/leu15_protg/nativelj/nativecontactdist.txt')
d = np.loadtxt(nativedistfile)
sigma0 = d.min() * (2**(-1/6.)) 

measure.NBins = 25
measure.NBlocks = 1
measure.Normalize = True
x, y, err = measure.makeHist(d)

plt.plot(x, y, 'b-', lw = 2)

l1 = 'ala_spc\n' + r'$\sigma = %2.2f \AA$' % sigma_lj['ala_spc'] + ' , ' + r'$\epsilon = %2.2f k_B T$' % (epsilon_lj['ala_spc'] / 0.6)
plt.axvline(sigma_lj['ala_spc'], ls = '--', lw = 2, color = 'red', label = l1)

l2 = 'leu15\n' + r'$\sigma = %2.2f \AA$' % sigma_lj['leu15'] + ' , ' + r'$\epsilon = %2.2f k_B T$' % (epsilon_lj['leu15'] / 0.6)
plt.axvline(sigma_lj['leu15'], ls = '--', lw = 2, color = 'black', label = l2)

l3 = '$\sigma_0 = %2.2f \AA$' % sigma0
plt.axvline(sigma0, ls = '--', lw = 2, color = 'blue', label = l3)

plt.legend()
plt.xlabel('res-res distance between native contacts ' + r'$ (\AA)$')
plt.ylabel('distribution')

예제 #9

0

파일 보기

파일: test_nonnative_ss_dist.py 프로젝트: tanmoy7989/protein_model

    count = 0
    for n in range(NFrames):
        Pos = trj[n]
        for k, (i, j) in enumerate(c_nonnative):
            si = SInds_nonnative[i]
            sj = SInds_nonnative[j]
            d_si_sj = Pos[sj] - Pos[si]
            d = np.sqrt(np.sum(d_si_sj * d_si_sj))
            d_ss_nonnative_traj[n, k] = d
            pb.Update(count)
            count += 1
    np.savetxt('protg_traj_nonnative_ss.txt', d_ss_nonnative_traj)

d_ss_nonnative_traj = np.loadtxt('protg_traj_nonnative_ss.txt')
# histograming
h1 = measure.makeHist(d_ss_nonnative_pdb)
h2 = measure.makeHist(d_ss_nonnative_traj)
ret = d_ss_nonnative_traj

# plot
fig = plt.figure(figsize=(5, 5), facecolor='w', edgecolor='w')
ax = fig.add_subplot(1, 1, 1)
ax.plot(h1[0], h1[1], 'b-', lw=2, label='Native Struct')
ax.plot(h2[0], h2[1], 'r-', lw=2, label='AA Traj')
ax.legend()
ax.set_xlabel('s-s distance between nonnative contacts')
ax.set_ylabel('distribution')
fig.tight_layout()
fig.savefig('test_protg.png', bbox_inches='tight')
plt.show()