예제 #1
0
    def avgRmsd(self, cluster, aMask=None, threshold=0.):
        """
        Claculate the average pairwise rmsd (in Angstrom) for members
        of a cluter.

        @param cluster: cluster number
        @type  cluster: int       
        @param aMask: atom mask applied before calculation
        @type  aMask: [1|0]
        @param threshold: float 0-1, minimal cluster membership,
                          see L{memberTraj()}
        @type  threshold: float

        @return: average rmsd and the standard deviation 
        @rtype: float, float   
        """
        try:
            rms = self.memberTraj(cluster, threshold).pairwiseRmsd(aMask)
            rms = aboveDiagonal(rms)
        except:
            rms = []

        if len(N.ravel(rms)) == 1:
            ## was: return N.average(rms)[0], 0.0
            return N.average(rms), 0.0
        if len(N.ravel(rms)) == 0:
            return 0.0, 0.0

        return N.average(rms), SD(rms)
예제 #2
0
    def avgRmsd( self, cluster, aMask=None, threshold=0. ):
        """
        Claculate the average pairwise rmsd (in Angstrom) for members
        of a cluter.

        @param cluster: cluster number
        @type  cluster: int       
        @param aMask: atom mask applied before calculation
        @type  aMask: [1|0]
        @param threshold: float 0-1, minimal cluster membership,
                          see L{memberTraj()}
        @type  threshold: float

        @return: average rmsd and the standard deviation 
        @rtype: float, float   
        """
        try:
            rms = self.memberTraj(cluster,threshold).pairwiseRmsd( aMask )
            rms = aboveDiagonal( rms )
        except:
            rms = []

        if len(N.ravel(rms)) == 1:
            ## was: return N.average(rms)[0], 0.0
            return N.average(rms), 0.0
        if len(N.ravel(rms)) == 0:
            return 0.0, 0.0

        return N.average( rms ), SD( rms )
예제 #3
0
    def calcClusterNumber(self,
                          min_clst=5,
                          max_clst=30,
                          rmsLimit=1.0,
                          weight=1.13,
                          converged=1e-11,
                          aMask=None,
                          force=0):
        """
        Calculate the approximate number of clusters needed to pass
        the average intra-cluster rmsd limit.

        @param min_clst: lower limit for clusters (default: 5)
        @type  min_clst: int
        @param max_clst: upper limit for clusters (default: 30 )
        @type  max_clst: int
        @param rmsLimit: rmsd criteria that the average of all clusters
                         must meet in Angstrom (default: 1.0)
        @type  rmsLimit: float
        @param weight: fuzziness weigth (default: 1.13)
        @type  weight: float
        @param converged: stop iteration if min dist changes less than
                          converged (default: 1e-11)
        @type  converged: float
        @param force: re-calculate even if parameters haven't changed
                      (default: 0)
        @type  force: 1|0

        @return: number of clusters
        @rtype: int

        @raise ClusterError: if can't determining number of clusters
        """
        pos = [min_clst, max_clst]

        while 1:
            clst = int(N.average(pos))
            self.cluster(clst, weight, converged, aMask, force=force)
            rmsLst = [self.avgRmsd(i, aMask)[0] for i in range(clst)]

            if N.average(rmsLst) > rmsLimit:
                pos[0] = clst
            else:
                pos[1] = clst

            if pos[1] - pos[0] == 1:
                if self.verbose:
                    T.flushPrint(
                        'Converged at %i clusters, current average cluster rmsd %.2f\n'
                        % (clst, N.average(rmsLst)))
                return pos[1]

            if pos[1] - pos[0] != 1:
                if self.verbose:
                    T.flushPrint(
                        'Current cluster setting %i, current average cluster rmsd %.2f\n'
                        % (clst, N.average(rmsLst)))

            if pos[1] - pos[0] <= 0 or pos[0] < min_clst or pos[1] > max_clst:
                raise ClusterError, "Error determining number of clusters"
예제 #4
0
def linfit( x, y ):
    """
    Calculate linear least-square fit to the points given by x and y.
    see U{http://mathworld.wolfram.com/LeastSquaresFitting.html}

    @param x: x-data
    @type  x: [ float ]
    @param y: y-data
    @type  y: [ float ]

    @return: m, n, r^2 (slope, intersection, corr. coefficient)
    @rtype: float, float, float

    @raise BiskitError: if x and y have different number of elements
    """
    x, y = N.array( x, N.Float64), N.array( y, N.Float64)
    if len( x ) != len( y ):
        raise Exception, 'linfit: x and y must have same length'

    av_x = N.average( x )
    av_y = N.average( y )
    n = len( x )

    ss_xy = N.sum( x * y ) - n * av_x * av_y
    ss_xx = N.sum( x * x ) - n * av_x * av_x
    ss_yy = N.sum( y * y ) - n * av_y * av_y

    slope = ss_xy / ss_xx

    inter = av_y - slope * av_x

    corr  = ss_xy**2 / ( ss_xx * ss_yy )

    return slope, inter, corr
예제 #5
0
def linfit(x, y):
    """
    Calculate linear least-square fit to the points given by x and y.
    see U{http://mathworld.wolfram.com/LeastSquaresFitting.html}

    @param x: x-data
    @type  x: [ float ]
    @param y: y-data
    @type  y: [ float ]

    @return: m, n, r^2 (slope, intersection, corr. coefficient)
    @rtype: float, float, float

    @raise BiskitError: if x and y have different number of elements
    """
    x, y = N.array(x, N.Float64), N.array(y, N.Float64)
    if len(x) != len(y):
        raise Exception, 'linfit: x and y must have same length'

    av_x = N.average(x)
    av_y = N.average(y)
    n = len(x)

    ss_xy = N.sum(x * y) - n * av_x * av_y
    ss_xx = N.sum(x * x) - n * av_x * av_x
    ss_yy = N.sum(y * y) - n * av_y * av_y

    slope = ss_xy / ss_xx

    inter = av_y - slope * av_x

    corr = ss_xy**2 / (ss_xx * ss_yy)

    return slope, inter, corr
예제 #6
0
파일: rmsFit.py 프로젝트: ostrokach/biskit
def findTransformation(x, y):
    """
    Match two arrays by rotation and translation. Returns the
    rotation matrix and the translation vector.

    @param x: first set of coordinates
    @type  x: array('f')
    @param y: second set of coordinates
    @type  y: array('f')

    @return: rotation matrix (3x3) and translation vector (1x3)
    @rtype:  array, array
    """
    ## center configurations
    x_av = N.average(x)
    y_av = N.average(y)

    x = x - x_av
    y = y - y_av


    ## svd of correlation matrix
    v, l, u = svd(N.dot(N.transpose(x), y))

    ## build rotation matrix and translation vector
    r = N.dot(v, u)

    t = x_av - N.dot(r, y_av)

    return r, t
예제 #7
0
파일: rmsFit.py 프로젝트: ostrokach/biskit
def findTransformation(x, y):
    """
    Match two arrays by rotation and translation. Returns the
    rotation matrix and the translation vector.

    @param x: first set of coordinates
    @type  x: array('f')
    @param y: second set of coordinates
    @type  y: array('f')

    @return: rotation matrix (3x3) and translation vector (1x3)
    @rtype:  array, array
    """
    ## center configurations
    x_av = N.average(x)
    y_av = N.average(y)

    x = x - x_av
    y = y - y_av

    ## svd of correlation matrix
    v, l, u = svd(N.dot(N.transpose(x), y))

    ## build rotation matrix and translation vector
    r = N.dot(v, u)

    t = x_av - N.dot(r, y_av)

    return r, t
예제 #8
0
def runningAverage(x, interval=2, preserve_boundaries=0):
    """
    Running average (smoothing) over a given data window.

    @param x: data
    @type  x: list of int/float
    @param interval: window size C{ (-(interval-1)/2 to +(interval-1)/2) }
                     (default: 2)
    @type  interval: int
    @param preserve_boundaries: shrink window at edges to keep original
                                start and end value (default: 0)
    @type  preserve_boundaries: 0|1

    @return: list of floats
    @rtype: [ float ]
    """

    if interval == 0:
        return x

    l = []

    interval = int((interval - 1) / 2)

    if not preserve_boundaries:

        for i in range(len(x)):

            left = max(0, i - interval)
            right = min(len(x), i + interval + 1)

            slice = x[left:right]

            l.append(N.average(slice))
    else:

        for i in range(len(x)):

            left = i - interval
            right = i + interval + 1

            if left < 0:
                right = right + left
                left = 0
            if right > len(x):
                left = left + right - len(x)
                right = len(x)

            slice = x[left:right]

            l.append(N.average(slice))

    return N.array(l)
예제 #9
0
def runningAverage( x, interval=2, preserve_boundaries=0 ):
    """
    Running average (smoothing) over a given data window.

    @param x: data
    @type  x: list of int/float
    @param interval: window size C{ (-(interval-1)/2 to +(interval-1)/2) }
                     (default: 2)
    @type  interval: int
    @param preserve_boundaries: shrink window at edges to keep original
                                start and end value (default: 0)
    @type  preserve_boundaries: 0|1

    @return: list of floats
    @rtype: [ float ]
    """

    if interval == 0:
        return x

    l = []

    interval = int((interval-1)/2)

    if not preserve_boundaries:

        for i in range(len(x)):

            left = max(0, i - interval)
            right = min(len(x), i + interval + 1)

            slice = x[left:right]

            l.append(N.average(slice))
    else:

        for i in range( len(x) ):

            left = i - interval
            right= i + interval + 1

            if left < 0:
                right = right + left
                left = 0
            if right > len(x):
                left = left + right - len(x)
                right = len(x)

            slice = x[left:right]

            l.append(N.average(slice))

    return N.array(l)
예제 #10
0
    def calcClusterNumber( self, min_clst=5, max_clst=30, rmsLimit=1.0,
                           weight=1.13, converged=1e-11, aMask=None, force=0 ):
        """
        Calculate the approximate number of clusters needed to pass
        the average intra-cluster rmsd limit.

        @param min_clst: lower limit for clusters (default: 5)
        @type  min_clst: int
        @param max_clst: upper limit for clusters (default: 30 )
        @type  max_clst: int
        @param rmsLimit: rmsd criteria that the average of all clusters
                         must meet in Angstrom (default: 1.0)
        @type  rmsLimit: float
        @param weight: fuzziness weigth (default: 1.13)
        @type  weight: float
        @param converged: stop iteration if min dist changes less than
                          converged (default: 1e-11)
        @type  converged: float
        @param force: re-calculate even if parameters haven't changed
                      (default: 0)
        @type  force: 1|0

        @return: number of clusters
        @rtype: int

        @raise ClusterError: if can't determining number of clusters
        """
        pos = [ min_clst, max_clst ]

        while 1:
            clst = int( N.average(pos) )
            self.cluster( clst, weight, converged, aMask, force=force )
            rmsLst = [ self.avgRmsd(i, aMask)[0] for i in range(clst)]

            if N.average( rmsLst ) > rmsLimit:
                pos[0] = clst
            else:
                pos[1] = clst

            if pos[1]-pos[0] == 1:
                if self.verbose:
                    T.flushPrint('Converged at %i clusters, current average cluster rmsd %.2f\n'%( clst, N.average( rmsLst ) ))
                return pos[1]

            if pos[1]-pos[0] != 1:
                if self.verbose:
                    T.flushPrint('Current cluster setting %i, current average cluster rmsd %.2f\n'%( clst, N.average( rmsLst ) ))

            if pos[1]-pos[0]<= 0 or pos[0]<min_clst or pos[1]>max_clst:
                raise ClusterError, "Error determining number of clusters"
예제 #11
0
    def pairwiseRmsd(self, aMask=None, noFit=0):
        """
        Calculate rmsd between each 2 coordinate frames.

        @param aMask: atom mask
        @type  aMask: [1|0]
        @return: frames x frames array of float
        @rtype: array
        """
        frames = self.frames

        if aMask != None:
            frames = N.compress(aMask, frames, 1)

        result = N.zeros((len(frames), len(frames)), N.Float32)

        for i in range(0, len(frames)):

            for j in range(i + 1, len(frames)):
                if noFit:
                    d = N.sqrt(N.sum(N.power(frames[i] - frames[j], 2), 1))
                    result[i, j] = result[j, i] = N.sqrt(N.average(d**2))

                else:
                    rt, rmsdLst = rmsFit.match(frames[i], frames[j], 1)
                    result[i, j] = result[j, i] = rmsdLst[0][1]

        return result
예제 #12
0
def projectOnSphere( xyz, radius=None, center=None ):
    """
    Project the coordinates xyz on a sphere with a given radius around
    a given center.

    @param xyz: cartesian coordinates
    @type  xyz: array N x 3 of float
    @param radius: radius of target sphere, if not provided the maximal
                   distance to center will be used (default: None)
    @type  radius: float
    @param center: center of the sphere, if not given the average of xyz
                   will be assigned to the center (default: None)
    @type  center: array 0 x 3 of float

    @return: array of cartesian coordinates (x, y, z)
    @rtype: array    
    """
    if center is None:
        center = N.average( xyz )

    if radius is None:
        radius = max( N.sqrt( N.sum( N.power( xyz - center, 2 ), 1 ) ) )

    rtp = cartesianToPolar( xyz - center )
    rtp[ :, 0 ] = radius

    return polarToCartesian( rtp ) + center
예제 #13
0
def logConfidence( x, R, clip=0 ):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.

    @param x: observed value
    @type  x: float
    @param R: sample of random values
    @type  R: [float]
    @param clip: clip zeros at this value  0->don't clip (default: 0)
    @type  clip: float

    @return: confidence that x is not random, median of random distr.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N.clip( R, clip, max( R ) )
    if clip and x == 0:
        x = clip

    ## remove 0 instead of clipping
    R = N.compress( R, R )
    if x == 0:
        return 0, 0

    ## get mean and stdv of log-transformed random sample
    alpha = N.average( N.log( R ) )

    n = len( R )

    beta = N.sqrt(N.sum(N.power(N.log( R ) - alpha, 2)) / (n - 1.))

    return logArea( x, alpha, beta ), logMedian( alpha )
예제 #14
0
    def pairwiseRmsd( self, aMask=None, noFit=0 ):
        """
        Calculate rmsd between each 2 coordinate frames.

        @param aMask: atom mask
        @type  aMask: [1|0]
        @return: frames x frames array of float
        @rtype: array
        """
        frames = self.frames

        if aMask != None:
            frames = N.compress( aMask, frames, 1 )

        result = N.zeros( (len( frames ), len( frames )), N.Float32 )

        for i in range(0, len( frames ) ):

            for j in range( i+1, len( frames ) ):
                if noFit:
                    d = N.sqrt(N.sum(N.power(frames[i]-frames[j], 2), 1))
                    result[i,j] = result[j,i] = N.sqrt( N.average(d**2) )

                else:
                    rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 )
                    result[i,j] = result[j,i] = rmsdLst[0][1]

        return result
예제 #15
0
    def avgRmsd2Ref( self, cluster, ref, avg=1 ):
        """
        Claculate the rmsd (or average rmsd) of all frames belonging to a
        cluster to a reference structure (in Angstrom).

        @param cluster: cluster number
        @type  cluster: int
        @param ref: reference structure
        @type  ref: model
        @param avg: return the average rmsd (1) OR a list with all rmsds (0)
                    (default: 1)
        @type  avg: float OR [float]

        """
        eTraj = self.memberTraj(cluster,threshold=0)
        rms = []

        if type(ref) == types.InstanceType:
            ref = ref.xyz

        for frame in eTraj.frames:
            rt, rmsdLst = rmsFit.match( ref, frame)
            rms += [ rmsdLst[0][1] ]
        if avg ==1:
            return  N.average(rms)
        return rms
예제 #16
0
    def avgRmsd2Ref(self, cluster, ref, avg=1):
        """
        Claculate the rmsd (or average rmsd) of all frames belonging to a
        cluster to a reference structure (in Angstrom).

        @param cluster: cluster number
        @type  cluster: int
        @param ref: reference structure
        @type  ref: model
        @param avg: return the average rmsd (1) OR a list with all rmsds (0)
                    (default: 1)
        @type  avg: float OR [float]

        """
        eTraj = self.memberTraj(cluster, threshold=0)
        rms = []

        if type(ref) == types.InstanceType:
            ref = ref.xyz

        for frame in eTraj.frames:
            rt, rmsdLst = rmsFit.match(ref, frame)
            rms += [rmsdLst[0][1]]
        if avg == 1:
            return N.average(rms)
        return rms
예제 #17
0
def logConfidence(x, R, clip=0):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.

    @param x: observed value
    @type  x: float
    @param R: sample of random values
    @type  R: [float]
    @param clip: clip zeros at this value  0->don't clip (default: 0)
    @type  clip: float

    @return: confidence that x is not random, median of random distr.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N.clip(R, clip, max(R))
    if clip and x == 0:
        x = clip

    ## remove 0 instead of clipping
    R = N.compress(R, R)
    if x == 0:
        return 0, 0

    ## get mean and stdv of log-transformed random sample
    alpha = N.average(N.log(R))

    n = len(R)

    beta = N.sqrt(N.sum(N.power(N.log(R) - alpha, 2)) / (n - 1.))

    return logArea(x, alpha, beta), logMedian(alpha)
예제 #18
0
def projectOnSphere(xyz, radius=None, center=None):
    """
    Project the coordinates xyz on a sphere with a given radius around
    a given center.

    @param xyz: cartesian coordinates
    @type  xyz: array N x 3 of float
    @param radius: radius of target sphere, if not provided the maximal
                   distance to center will be used (default: None)
    @type  radius: float
    @param center: center of the sphere, if not given the average of xyz
                   will be assigned to the center (default: None)
    @type  center: array 0 x 3 of float

    @return: array of cartesian coordinates (x, y, z)
    @rtype: array    
    """
    if center is None:
        center = N.average(xyz)

    if radius is None:
        radius = max(N.sqrt(N.sum(N.power(xyz - center, 2), 1)))

    rtp = cartesianToPolar(xyz - center)
    rtp[:, 0] = radius

    return polarToCartesian(rtp) + center
예제 #19
0
    def averageRms( self ):
        """
        @return: average pairwise rmsd and it's standard deviation
        @rtype: (float, float)

        @raise FlexError: if there are no results yet
        """
        r = self.rmsList()
        return N.average(r), mathUtils.SD(r)
예제 #20
0
 def centerModel( self, model ):
     """
     Geometric centar of model.
     
     @param model: model
     @type  model: PDBMode
     """
     center = N.average( model.getXyz() )
     model.setXyz( model.xyz - center )
예제 #21
0
 def centerModel(self, model):
     """
     Geometric centar of model.
     
     @param model: model
     @type  model: PDBMode
     """
     center = N.average(model.getXyz())
     model.setXyz(model.xyz - center)
예제 #22
0
def slidingAverage( y, window=2 ):
    if window == 0:
        return y

    assert window < len(y), 'window size too large for array'

    margin = int(round((window-1)/2.))

    return [ N.average( y[i-margin : i+margin] )
             for i in range(margin,len(y)-margin) ]
예제 #23
0
    def getFluct_global(self, mask=None):
        """
        Get RMS of each atom from it's average position in trajectory.
        The frames should be superimposed (fit() ) to a reference.

        @param mask: N x 1 list/Numpy array of 0|1, (N=atoms),
                     atoms to be considered.
        @type  mask: [1|0]

        @return: Numpy array ( N_unmasked x 1 ) of float.
        @rtype: array
        """
        frames = self.frames
        if mask is not None:
            frames = N.compress(mask, frames, 1)

        ## mean position of each atom in all frames
        avg = N.average(frames)

        return N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2)))
예제 #24
0
    def getFluct_global( self, mask=None ):
        """
        Get RMS of each atom from it's average position in trajectory.
        The frames should be superimposed (fit() ) to a reference.

        @param mask: N x 1 list/Numpy array of 0|1, (N=atoms),
                     atoms to be considered.
        @type  mask: [1|0]

        @return: Numpy array ( N_unmasked x 1 ) of float.
        @rtype: array
        """
        frames = self.frames
        if mask is not None:
            frames = N.compress( mask, frames, 1 )

        ## mean position of each atom in all frames
        avg = N.average( frames )

        return N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2) ))
예제 #25
0
def slidingAverage(y, window=2):
    if window == 0:
        return y

    assert window < len(y), 'window size too large for array'

    margin = int(round((window - 1) / 2.))

    return [
        N.average(y[i - margin:i + margin])
        for i in range(margin,
                       len(y) - margin)
    ]
예제 #26
0
    def avgModel( self ):
        """
        Returna a PDBModel with coordinates that are the average of
        all frames.

        @return: PDBModel with average structure of trajectory (no fitting!) 
                 this trajectory's ref is the source of result model
        @rtype: PDBModel
        """
        result = PDBModel( self.getRef(), noxyz=1 )
        result.setXyz( N.average( self.frames ) )

        return result
예제 #27
0
    def avgModel(self):
        """
        Returna a PDBModel with coordinates that are the average of
        all frames.

        @return: PDBModel with average structure of trajectory (no fitting!) 
                 this trajectory's ref is the source of result model
        @rtype: PDBModel
        """
        result = PDBModel(self.getRef(), noxyz=1)
        result.setXyz(N.average(self.frames))

        return result
예제 #28
0
    def calcProfiles(self, m):
        """
        Calculate needed profiles.

        @param m: PDBModel to calculate data for
        @type  m: PDBModel
        """
        if self.verbose: print "Initiating PDBDope..."
        d = PDBDope(m)

        if not self.profileName in m.atoms.keys():

            if self.profileName in ['MS', 'AS', 'curvature', 'relAS', 'relMS']:
                if self.verbose: print "Adding SurfaceRacer profile...",
                d.addSurfaceRacer()

            if self.profileName in ['relASA']:
                if self.verbose: print "Adding WhatIf ASA...",
                d.addASA()

            if self.profileName in ['density']:
                if self.verbose: print "Adding surface density...",
                d.addDensity()

        if not self.profileName in m.residues.keys():

            if self.profileName in ['cons_abs', 'cons_max', 'cons_ent']:
                if self.verbose: print "Adding conservation data...",
                d.addConservation()

            if self.profileName in ['ASA_total', 'ASA_sc', 'ASA_bb']:
                if self.verbose: print "Adding WhatIf ASA...",
                d.addASA()

        if self.verbose: print 'Done.'

        ## convert atom profiles to average residue profile
        if self.profileName in m.atoms.keys():
            prof = []
            aProfile = m.profile(self.profileName)
            resIdx = m.resIndex().tolist()
            resIdx += [m.lenAtoms()]
            for i in range(len(resIdx) - 1):
                prof += [
                    N.average(N.take(aProfile, range(resIdx[i],
                                                     resIdx[i + 1])))
                ]
        else:
            prof = m.profile(self.profileName)

        return prof
예제 #29
0
    def outliers(self,
                 z=1.0,
                 mask=None,
                 prof='rmsCA_last',
                 last=10,
                 step=1,
                 verbose=1):
        """
        Identify outlier trajectories. First we calculate the CA-RMS of every
        |step|th frame to the last frame. Outliers are member trajectories for
        which the slope of this rms profile is z standard deviations below the
        mean of all members.
        
        @param z: z-value threshold
        @type  z: float
        @param mask: atom mask used (default: ref.maskCA())
        @type  mask: [int]
        @param prof: name of pre-calculated profile to use
                     (default: 'rmsCA_last')
        @type  prof: str
        @param last: skip |last| last frames from linear regression
        @type  last: int
        @param step: frame offset
        @type  step: int
        
        @return: member mask of outlier trajectories
        @rtype: [0|1]
        """
        if mask is None: mask = self.ref.maskCA()

        traj = self.compressAtoms(mask)
        if step != 1:
            traj = traj.thin(step)

        if not prof in traj.profiles:
            traj.fitMembers(refIndex=-1, prof=prof, verbose=verbose)

        p_all = traj.profiles[prof]
        n = traj.n_members
        l = len(traj)

        pm = [p_all[member:l:n][:-last] for member in range(n)]

        slopes = [M.linfit(range(l / n - last), p)[0] for p in pm]

        mean, sd = N.average(slopes), M.SD(slopes)

        return [r - mean < -z * sd for r in slopes]
예제 #30
0
def variance(x, avg = None):
    """
    Variance, S{sigma}^2

    @param x: data
    @type  x: array('f') or float
    @param avg: use this average, otherwise calculated from x
    @type  avg: float OR None

    @return: float
    @rtype: float    
    """
    if avg is None:
        avg = N.average(x)

    if len(x) == 1:
        return 0.0

    return N.sum(N.power(N.array(x) - avg, 2)) / (len(x) - 1.)
예제 #31
0
def report( tc ):

    clTrajs = tc.memberTrajs()

    for i in range(0, tc.n_clusters ):
        
        t = clTrajs[i]
        rms = tc.avgRmsd( i, tc.aMask )

        names = [ '_'.join(T.stripFilename(s).split('_')[-2:])
                  for s in t.frameNames]

        print "%i <%4.2f +-%4.2f>: " % (i, rms[0],rms[1] ), names
        print

    tr = clTrajs[0].concat( *tuple( clTrajs[1:] ) )

    avgall = N.average( MaU.aboveDiagonal( tr.pairwiseRmsd( tc.aMask ) ) )
    print "avg rms all: %4.2f" %  avgall
예제 #32
0
def variance(x, avg=None):
    """
    Variance, S{sigma}^2

    @param x: data
    @type  x: array('f') or float
    @param avg: use this average, otherwise calculated from x
    @type  avg: float OR None

    @return: float
    @rtype: float    
    """
    if avg is None:
        avg = N.average(x)

    if len(x) == 1:
        return 0.0

    return N.sum(N.power(N.array(x) - avg, 2)) / (len(x) - 1.)
예제 #33
0
    def outliers( self, z=1.0, mask=None, prof='rmsCA_last', 
                  last=10, step=1, verbose=1  ):
        """
        Identify outlier trajectories. First we calculate the CA-RMS of every
        |step|th frame to the last frame. Outliers are member trajectories for
        which the slope of this rms profile is z standard deviations below the
        mean of all members.
        
        @param z: z-value threshold
        @type  z: float
        @param mask: atom mask used (default: ref.maskCA())
        @type  mask: [int]
        @param prof: name of pre-calculated profile to use
                     (default: 'rmsCA_last')
        @type  prof: str
        @param last: skip |last| last frames from linear regression
        @type  last: int
        @param step: frame offset
        @type  step: int
        
        @return: member mask of outlier trajectories
        @rtype: [0|1]
        """
        if mask is None: mask = self.ref.maskCA()

        traj = self.compressAtoms( mask )
        if step != 1:
            traj = traj.thin( step )

        if not prof in traj.profiles:
            traj.fitMembers( refIndex=-1, prof=prof, verbose=verbose )

        p_all = traj.profiles[ prof ]
        n = traj.n_members
        l = len( traj )

        pm = [ p_all[ member : l : n ][:-last] for member in range( n ) ]

        slopes = [ M.linfit( range( l/n - last ), p )[0] for p in pm ]

        mean, sd = N.average( slopes ), M.SD( slopes )

        return [ r - mean < - z * sd for r in slopes ]
예제 #34
0
def report(tc):

    clTrajs = tc.memberTrajs()

    for i in range(0, tc.n_clusters):

        t = clTrajs[i]
        rms = tc.avgRmsd(i, tc.aMask)

        names = [
            '_'.join(T.stripFilename(s).split('_')[-2:]) for s in t.frameNames
        ]

        print "%i <%4.2f +-%4.2f>: " % (i, rms[0], rms[1]), names
        print

    tr = clTrajs[0].concat(*tuple(clTrajs[1:]))

    avgall = N.average(MaU.aboveDiagonal(tr.pairwiseRmsd(tc.aMask)))
    print "avg rms all: %4.2f" % avgall
예제 #35
0
    def reduceAtomProfiles( self, from_model, to_model ):
        """
        reduce all atom profiles according to the calculated map by calculating
        the average over the grouped atoms.
        
        @param from_model: model
        @type  from_model: PDBModel
        @param to_model: model
        @type  to_model: PDBModel
        """
        for profname in from_model.atoms:

            p0 =  from_model.atoms.get(profname)
            info = from_model.profileInfo( profname )

            try:
                pr = [ N.average( N.take( p0, group ) ) for group in self.groups ]

                to_model.atoms.set( profname, pr )
            except:
                pass
                
            to_model.atoms.setInfo( profname, **info )
예제 #36
0
    def reduceAtomProfiles(self, from_model, to_model):
        """
        reduce all atom profiles according to the calculated map by calculating
        the average over the grouped atoms.
        
        @param from_model: model
        @type  from_model: PDBModel
        @param to_model: model
        @type  to_model: PDBModel
        """
        for profname in from_model.atoms:

            p0 = from_model.atoms.get(profname)
            info = from_model.profileInfo(profname)

            try:
                pr = [N.average(N.take(p0, group)) for group in self.groups]

                to_model.atoms.set(profname, pr)
            except:
                pass

            to_model.atoms.setInfo(profname, **info)
예제 #37
0
    def pca(self, atomMask=None, frameMask=None, fit=1):
        """
        Calculate principal components of trajectory frames.

        @param atomMask: 1 x N_atoms, [111001110..] atoms to consider
                         (default: all)
        @type  atomMask: [1|0]
        @param frameMask: 1 x N_frames, [001111..] frames to consider
                          (default all )
        @type  frameMask: [1|0]

        @return: (N_frames x N_frames), (1 x N_frames),
                 projection of each frame in PC space, eigenvalue of each PC
        @rtype: array, array, array
        """
        if frameMask is None: frameMask = N.ones(len(self.frames), N.int32)

        if atomMask is None:
            atomMask = N.ones(self.getRef().lenAtoms(), N.int32)

        if fit:
            self.fit(atomMask)

        refxyz = N.average(self.frames, 0)

        data = N.compress(frameMask, self.frames, 0)

        data = data - refxyz

        data = N.compress(atomMask, data, 1)

        ## reduce to 2D array
        data = N.array(map(N.ravel, data))

        V, L, U = LA.singular_value_decomposition(data)

        return U, V * L, N.power(L, 2)
예제 #38
0
    def pca( self, atomMask=None, frameMask=None, fit=1 ):
        """
        Calculate principal components of trajectory frames.

        @param atomMask: 1 x N_atoms, [111001110..] atoms to consider
                         (default: all)
        @type  atomMask: [1|0]
        @param frameMask: 1 x N_frames, [001111..] frames to consider
                          (default all )
        @type  frameMask: [1|0]

        @return: (N_frames x N_frames), (1 x N_frames),
                 projection of each frame in PC space, eigenvalue of each PC
        @rtype: array, array, array
        """
        if frameMask is None: frameMask = N.ones( len( self.frames ), N.int32 )

        if atomMask is None: atomMask = N.ones(self.getRef().lenAtoms(),
                                               N.int32)

        if fit:
            self.fit( atomMask )

        refxyz = N.average( self.frames, 0 )

        data = N.compress( frameMask, self.frames, 0 )

        data = data - refxyz

        data = N.compress( atomMask, data, 1 )

        ## reduce to 2D array
        data = N.array( map( N.ravel, data ) )

        V, L, U = LA.singular_value_decomposition( data )

        return U, V * L, N.power(L, 2)
예제 #39
0
def logConfidence(x, R, clip=1e-32):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.
    The exact solution to this problem is in L{Biskit.Statistics.lognormal}.

    @param x: observed value
    @type  x: float
    @param R: sample of random values; 0 -> don't clip (default: 1e-32)
    @type  R: [float]
    @param clip: clip zeros at this value
    @type  clip: float

    @return:  confidence that x is not random, mean of random distrib.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N.clip(R, clip, max(R))
    ## get mean and stdv of log-transformed random sample
    mean = N.average(N.log(R))

    n = len(R)

    stdv = N.sqrt(N.sum(N.power(N.log(R) - mean, 2)) / (n - 1.))

    ## create dense lognormal distribution representing the random sample
    stop = max(R) * 50.0
    step = stop / 100000
    start = step / 10.0

    X = [(v, p_lognormal(v, mean, stdv)) for v in N.arange(start, stop, step)]

    ## analyse distribution
    d = Density(X)

    return d.findConfidenceInterval(x * 1.0)[0], d.average()
예제 #40
0
파일: Density.py 프로젝트: ostrokach/biskit
def logConfidence( x, R, clip=1e-32 ):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.
    The exact solution to this problem is in L{Biskit.Statistics.lognormal}.

    @param x: observed value
    @type  x: float
    @param R: sample of random values; 0 -> don't clip (default: 1e-32)
    @type  R: [float]
    @param clip: clip zeros at this value
    @type  clip: float

    @return:  confidence that x is not random, mean of random distrib.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N.clip( R, clip, max( R ) )
    ## get mean and stdv of log-transformed random sample
    mean = N.average( N.log( R ) )

    n = len( R )

    stdv = N.sqrt(N.sum(N.power(N.log( R ) - mean, 2)) / (n - 1.))

    ## create dense lognormal distribution representing the random sample
    stop = max( R ) * 50.0
    step = stop / 100000
    start = step / 10.0

    X = [(v, p_lognormal(v, mean, stdv) ) for v in N.arange(start, stop, step)]

    ## analyse distribution
    d = Density( X )

    return d.findConfidenceInterval( x * 1.0 )[0], d.average()
예제 #41
0
def variance(x, avg = None):
    if avg is None:
        avg = Numeric.average(x)

    return Numeric.sum(Numeric.power(Numeric.array(x) - avg, 2)) / (len(x) - 1.)
예제 #42
0
    def getFluct_local( self, mask=None, border_res=1,
                        left_atoms=['C'], right_atoms=['N'], verbose=1 ):
        """
        Get mean displacement of each atom from it's average position after
        fitting of each residue to the reference backbone coordinates of itself
        and selected atoms of neighboring residues to the right and left.

        @param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation
                     should be calculated
        @type  mask: array
        @param border_res: number of neighboring residues to use for fitting
        @type  border_res: int
        @param left_atoms: atoms (names) to use from these neighbore residues
        @type  left_atoms: [str]
        @param right_atoms: atoms (names) to use from these neighbore residues
        @type  right_atoms: [str]

        @return: Numpy array ( N_unmasked x 1 ) of float
        @rtype: array
        """
        if mask is None:
            mask = N.ones( len( self.frames[0] ), N.int32 )

        if verbose: T.errWrite( "rmsd fitting per residue..." )

        residues = N.nonzero( self.ref.atom2resMask( mask ) )

        ## backbone atoms used for fit
        fit_atoms_right = N.nonzero( self.ref.mask( right_atoms ) )
        fit_atoms_left  = N.nonzero( self.ref.mask( left_atoms ) )
        ## chain index of each residue
        rchainMap = N.take( self.ref.chainMap(), self.ref.resIndex() )

        result = []

        for res in residues:

            i_res, i_border = self.__resWindow(res, border_res, rchainMap,
                                               fit_atoms_left, fit_atoms_right)

            try:
                if not len( i_res ): raise PDBError, 'empty residue'

                t_res = self.takeAtoms( i_res + i_border )

                i_center = range( len( i_res ) )

                mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy()

                ## fit with border atoms ..
                t_res.fit( ref=t_res.ref, mask=mask_BB, verbose=0 )
                ## .. but calculate only with center residue atoms
                frames = N.take( t_res.frames, i_center, 1 )

                avg = N.average( frames )

                rmsd = N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2) ))

                result.extend( rmsd )

                if verbose: T.errWrite('#')

            except ZeroDivisionError:
                result.extend( N.zeros( len(i_res), N.Float32 ) )
                T.errWrite('?' + str( res ))

        if verbose: T.errWriteln( "done" )

        return result
예제 #43
0
    def fit(self,
            mask=None,
            ref=None,
            n_it=1,
            prof='rms',
            verbose=1,
            fit=1,
            **profInfos):
        """
        Superimpose all coordinate frames on reference coordinates. Put rms
        values in a profile. If n_it > 1, the fraction of atoms considered
        for the fit is put into a profile called |prof|_considered
        (i.e. by default 'rms_considered').

        @param mask: atom mask, atoms to consider default: [all]
        @type  mask: [1|0]
        @param ref: use as reference, default: None, average Structure
        @type  ref: PDBModel
        @param n_it: number of fit iterations, kicking out outliers on the way
                     1 -> classic single fit, 0 -> until convergence
                     (default: 1)
        @type  n_it: int
        @param prof: save rms per frame in profile of this name, ['rms']
        @type  prof: str
        @param verbose: print progress info to STDERR (default: 1)
        @type  verbose: 1|0
        @param fit: transform frames after match, otherwise just calc rms
                    (default: 1)          
        @type  fit: 1|0
        @param profInfos: additional key=value pairs for rms profile info []
        @type profInfos: key=value
        """
        if ref == None:
            refxyz = N.average(self.frames, 0)
        else:
            refxyz = ref.getXyz()

        if mask is None:
            mask = N.ones(len(refxyz), N.int32)

        refxyz = N.compress(mask, refxyz, 0)

        if verbose: T.errWrite("rmsd fitting...")

        rms = []  ## rms value of each frame
        non_outliers = []  ## fraction of atoms considered for rms and fit
        iterations = []  ## number of iterations performed on each frame

        for i in range(0, len(self.frames)):

            xyz = self.frames[i]

            if n_it != 1:
                (r, t), rmsdList = rmsFit.match(refxyz,
                                                N.compress(mask, xyz, 0), n_it)
                iterations.append(len(rmsdList))
                non_outliers.append(rmsdList[-1][0])

                xyz_transformed = N.dot(xyz, N.transpose(r)) + t

                rms += [rmsdList[-1][1]]

            else:
                r, t = rmsFit.findTransformation(refxyz,
                                                 N.compress(mask, xyz, 0))

                xyz_transformed = N.dot(xyz, N.transpose(r)) + t

                d = N.sqrt(N.sum(N.power( N.compress(mask, xyz_transformed,0)\
                                          - refxyz, 2), 1))

                rms += [N.sqrt(N.average(d**2))]

            if fit:
                self.frames[i] = xyz_transformed.astype(N.float32)

            if verbose and i % 100 == 0:
                T.errWrite('#')

        self.setProfile(prof, rms, n_iterations=n_it, **profInfos)

        if non_outliers:
            self.setProfile(
                prof + '_considered',
                non_outliers,
                n_iterations=n_it,
                comment='fraction of atoms considered for iterative fit')

        if verbose: T.errWrite('done\n')
예제 #44
0
    def pcMovie( self, ev, steps, factor=1., ref=0, morph=1 ):
        """
        Morph between the two extreme values of a single principal
        component.

        @param ev: EigenVector to visualize
        @type  ev: int
        @param steps: number of intermediate frames
        @type  steps: int
        @param factor: exageration factor (default: 1 = No exageration)
        @type  factor: float
        @param ref: take other eigenvecors from this frame (default: 1)
        @type  ref: int
        @param morph: morph between min and max (1) or take real values (0)
                      (default: 1)
        @type  morph: 1|0

        @return: Trajectory with frames visualizing the morphing.
        @rtype: Trajectory
        """
        fit = 1
        if self.pc is not None:
            fit = self.pc['fit']
        pc = self.getPca( fit=fit )

        ## eigenvectors (rows)
        U = pc['u']

        ## raveled and centered frames
        x_avg = N.average(self.frames, 0)
        X = N.array( [N.ravel(x) for x in self.frames - x_avg] )

        ## ev'th eigenvector of reference frame
        alpha_0 = N.dot( X[ref], U[ev] )

        ## list of deviations of ev'th eigenvector of each frame from ref
        alpha_range = N.dot(X, U[ev]) - alpha_0

        ## get some representative alphas...
        if morph:
            a_min = factor * min(alpha_range)
            a_max = factor * max(alpha_range)
            delta = (a_max - a_min) / steps
            alpha_range = [ a_min + i*(delta) for i in range(0, steps) ]
        else:
            alpha_range = N.sort( alpha_range )
            delta = len(alpha_range) / (steps * 1.0)
            alpha_range = [ alpha_range[ int(round( i*delta )) ]
                            for i in range(0,steps) ]

        ## scale ev'th eigenvector of ref with different alphas 
        Y = N.array( [ X[ref] + alpha * U[ev] for alpha in alpha_range] )

        ## back convert to N x 3 coordinates
        Y = N.reshape(Y, (Y.shape[0], -1, 3))
        Y = x_avg + Y

        result = self.__class__()
        result.ref = self.ref

        result.frames = Y
        return result
예제 #45
0
    def fit( self, mask=None, ref=None, n_it=1,
             prof='rms', verbose=1, fit=1, **profInfos ):
        """
        Superimpose all coordinate frames on reference coordinates. Put rms
        values in a profile. If n_it > 1, the fraction of atoms considered
        for the fit is put into a profile called |prof|_considered
        (i.e. by default 'rms_considered').

        @param mask: atom mask, atoms to consider default: [all]
        @type  mask: [1|0]
        @param ref: use as reference, default: None, average Structure
        @type  ref: PDBModel
        @param n_it: number of fit iterations, kicking out outliers on the way
                     1 -> classic single fit, 0 -> until convergence
                     (default: 1)
        @type  n_it: int
        @param prof: save rms per frame in profile of this name, ['rms']
        @type  prof: str
        @param verbose: print progress info to STDERR (default: 1)
        @type  verbose: 1|0
        @param fit: transform frames after match, otherwise just calc rms
                    (default: 1)          
        @type  fit: 1|0
        @param profInfos: additional key=value pairs for rms profile info []
        @type profInfos: key=value
        """
        if ref == None:
            refxyz = N.average( self.frames, 0 )
        else:
            refxyz = ref.getXyz()

        if mask is None:
            mask = N.ones( len( refxyz ), N.int32 )

        refxyz = N.compress( mask, refxyz, 0 )

        if verbose: T.errWrite( "rmsd fitting..." )

        rms = []          ## rms value of each frame
        non_outliers = [] ## fraction of atoms considered for rms and fit
        iterations = []   ## number of iterations performed on each frame

        for i in range(0, len( self.frames) ):

            xyz = self.frames[i]

            if n_it != 1:
                (r, t), rmsdList = rmsFit.match( refxyz,
                                                 N.compress( mask, xyz, 0), n_it)
                iterations.append( len( rmsdList ) )
                non_outliers.append( rmsdList[-1][0] )

                xyz_transformed = N.dot( xyz, N.transpose(r)) + t

                rms += [ rmsdList[-1][1] ]

            else:
                r, t = rmsFit.findTransformation( refxyz,
                                                  N.compress( mask, xyz, 0))

                xyz_transformed = N.dot( xyz, N.transpose(r)) + t

                d = N.sqrt(N.sum(N.power( N.compress(mask, xyz_transformed,0)\
                                          - refxyz, 2), 1))


                rms += [ N.sqrt( N.average(d**2) ) ]


            if fit:
                self.frames[i] = xyz_transformed.astype(N.float32)

            if verbose and i%100 == 0:
                T.errWrite( '#' )

        self.setProfile( prof, rms, n_iterations=n_it, **profInfos )

        if non_outliers:
            self.setProfile( prof+'_considered', non_outliers,
                             n_iterations=n_it,
                             comment='fraction of atoms considered for iterative fit' )

        if verbose: T.errWrite( 'done\n' )
예제 #46
0
파일: Hmmer.py 프로젝트: ostrokach/biskit
    def parse_result( self ):
        """
        Extract some information about the profile as well as the
        match state emmission scores. Keys of the returned dictionary::
          'AA', 'name', 'NrSeq', 'emmScore', 'accession',
          'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum'
          
        @return: dictionary with warious information about the profile
        @rtype: dict
        """
        ## check that the outfut file is there and seems valid
        if not os.path.exists( self.f_out ):
            raise HmmerError,\
                  'Hmmerfetch result file %s does not exist.'%self.f_out
        
        if T.fileLength( self.f_out ) < 10:
            raise HmmerError,\
                  'Hmmerfetch result file %s seems incomplete.'%self.f_out
        
        profileDic = {}

        ## read result
        hmm = open( self.f_out, 'r')
        out = hmm.read()
        hmm.close()

        ## collect some data about the hmm profile
        profileDic['name'] =  self.hmmName 
        profileDic['profLength'] = \
                  int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] )
        profileDic['accession'] = \
                  string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] 
        profileDic['NrSeq'] = \
                  int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] )
        profileDic['AA'] = \
              string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:]

        ## collect null emmission scores
        pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20
        nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ]

        ## get emmision scores
        prob=[]
        for i in range(1, profileDic['profLength']+1):
            pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20
            e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ]
            prob += [ e ]

        profileDic['seqNr'] = N.transpose( N.take( prob, (0,),1 ) )
        profileDic['emmScore'] = N.array(prob)[:,1:]

        ## calculate emission probablitities
        emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore'])

        ent = [ N.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ]
        profileDic['ent'] = N.array(ent)

        ###### TEST #####

        proba = N.array(prob)[:,1:]

##         # test set all to max score
##         p = proba
##         p1 = []
##         for i in range( len(p) ):
##             p1 += [ N.resize( p[i][N.argmax( N.array( p[i] ) )] , N.shape( p[i] ) ) ]
##         profileDic['maxAll'] = p1

        # test set all to N.sum( abs( probabilities ) )
        p = proba
        p2 = []
        for i in range( len(p) ) :
            p2 += [ N.resize( N.sum( N.absolute( p[i] )), N.shape( p[i] ) ) ]
        profileDic['absSum'] = p2

        # set all to normalized max score 
        p = proba
        p4 = []
        for i in range( len(p) ) :
            p_scale = (p[i] - N.average(p[i]) )/ math.SD(p[i])
            p4 += [ N.resize( p_scale[N.argmax( N.array(p_scale) )] ,
                              N.shape( p[i] ) ) ]
        profileDic['maxAllScale'] = p4

        return profileDic
예제 #47
0
파일: rmsFit.py 프로젝트: ostrokach/biskit
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05):
    """
    Matches two arrays onto each other, while iteratively removing outliers.
    Superimposed array y would be C{ N.dot(y, N.transpose(r)) + t }.

    @param n_iterations: number of calculations::
                           1 .. no iteration 
                           0 .. until convergence
    @type  n_iterations: 1|0
    @param z: number of standard deviations for outlier definition (default: 2)
    @type  z: float
    @param eps_rmsd: tolerance in rmsd (default: 0.5)
    @type  eps_rmsd: float
    @param eps_stdv: tolerance in standard deviations (default: 0.05)
    @type  eps_stdv: float

    @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ]
    @rtype: (array, array), [float, float, int]
    """
    iter_trace = []

    rmsd_old = 0
    stdv_old = 0

    n = 0
    converged = 0

    mask = N.ones(len(y), N.int32 )

    while not converged:

        ## find transformation for best match
        r, t = findTransformation(N.compress(mask, x, 0),
                                  N.compress(mask, y, 0))

        ## transform coordinates
        xt = N.dot(y, N.transpose(r)) + t

        ## calculate row distances
        d = N.sqrt(N.sum(N.power(x - xt, 2), 1)) * mask

        ## calculate rmsd and stdv
        rmsd = N.sqrt(N.average(N.compress(mask, d)**2))
        stdv = MU.SD(N.compress(mask, d))

        ## check conditions for convergence
        d_rmsd = abs(rmsd - rmsd_old)
        d_stdv = abs(1 - stdv_old / stdv)

        if d_rmsd < eps_rmsd and d_stdv < eps_stdv:
            converged = 1
        else:
            rmsd_old = rmsd
            stdv_old = stdv

        ## store result
        perc = round(float(N.sum(mask)) / float(len(mask)), 2)

        ## throw out non-matching rows
        mask = N.logical_and(mask, N.less(d, rmsd + z * stdv))
        outliers = N.nonzero( N.logical_not( mask ) )
        iter_trace.append([perc, round(rmsd, 3), outliers])

        n += 1

        if n_iterations and n >= n_iterations:
            break

    return (r, t), iter_trace
예제 #48
0
def randomSurfaces( base_folder, label, mask ):
    """
    calculate surfaces for all peptides and return the
    average and SD
    """
    ## container for results and standard deviations
    MS,    AS    = {}, {}
    MS_sd, AS_sd = {}, {}

    ## loop over peptide directories
    for k in MOU.aaAtoms.keys():
        dir = base_folder + 'GLY-%s-GLY_pcr/pcr_00'%(k)
        fLst = glob.glob( dir + '/*.pdb')
        
        msLst = []
        asLst = []
        
        ## loop over pdb files for each peptide
        T.flushPrint( '\nNow collecting data in %s'%dir )
        for f in fLst:

            ## load peptide and remove waters and hydrogens
            m = PDBModel( f )
            m = m.compress( m.maskProtein() * m.maskHeavy() )
            T.flushPrint( '.')

            ## add surface data
            try:
                d = PDBDope( m )
                d.addSurfaceRacer( probe=1.4 )

                ## remove tailing GLY
                m = m.compress( m.res2atomMask(mask) )
                
                ## collect surface data for each peptide
                msLst += [ m.profile('MS') ]
                asLst += [ m.profile('AS') ]
                       
            except:
                print 'Failed calculating exposure for GLY-%s-GLY'%(k)
                print '\t and file %s'%f
                
        ## get result dictionary for peptide
        T.flushPrint('\nCollecting data ...\n')
        msDic = {}
        asDic = {}
        msDic_sd = {}
        asDic_sd = {}

        j = 0
        #atoms =  [ a['name'] for a in m.atoms ]
        for n in m['name']:
            msDic[n]    = N.average(msLst)[j]
            asDic[n]    = N.average(asLst)[j]
            msDic_sd[n] = MAU.SD( msLst )[j]
            asDic_sd[n] = MAU.SD( asLst )[j]
            j += 1

        MS[ k ] = msDic
        AS[ k ] = asDic
        MS_sd[ k ] = msDic_sd
        AS_sd[ k ] = asDic_sd

    return MS, AS, MS_sd, AS_sd
예제 #49
0
    def getFluct_local(self,
                       mask=None,
                       border_res=1,
                       left_atoms=['C'],
                       right_atoms=['N'],
                       verbose=1):
        """
        Get mean displacement of each atom from it's average position after
        fitting of each residue to the reference backbone coordinates of itself
        and selected atoms of neighboring residues to the right and left.

        @param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation
                     should be calculated
        @type  mask: array
        @param border_res: number of neighboring residues to use for fitting
        @type  border_res: int
        @param left_atoms: atoms (names) to use from these neighbore residues
        @type  left_atoms: [str]
        @param right_atoms: atoms (names) to use from these neighbore residues
        @type  right_atoms: [str]

        @return: Numpy array ( N_unmasked x 1 ) of float
        @rtype: array
        """
        if mask is None:
            mask = N.ones(len(self.frames[0]), N.int32)

        if verbose: T.errWrite("rmsd fitting per residue...")

        residues = N.nonzero(self.ref.atom2resMask(mask))

        ## backbone atoms used for fit
        fit_atoms_right = N.nonzero(self.ref.mask(right_atoms))
        fit_atoms_left = N.nonzero(self.ref.mask(left_atoms))
        ## chain index of each residue
        rchainMap = N.take(self.ref.chainMap(), self.ref.resIndex())

        result = []

        for res in residues:

            i_res, i_border = self.__resWindow(res, border_res, rchainMap,
                                               fit_atoms_left, fit_atoms_right)

            try:
                if not len(i_res): raise PDBError, 'empty residue'

                t_res = self.takeAtoms(i_res + i_border)

                i_center = range(len(i_res))

                mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy()

                ## fit with border atoms ..
                t_res.fit(ref=t_res.ref, mask=mask_BB, verbose=0)
                ## .. but calculate only with center residue atoms
                frames = N.take(t_res.frames, i_center, 1)

                avg = N.average(frames)

                rmsd = N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2)))

                result.extend(rmsd)

                if verbose: T.errWrite('#')

            except ZeroDivisionError:
                result.extend(N.zeros(len(i_res), N.Float32))
                T.errWrite('?' + str(res))

        if verbose: T.errWriteln("done")

        return result
예제 #50
0
    def pcMovie(self, ev, steps, factor=1., ref=0, morph=1):
        """
        Morph between the two extreme values of a single principal
        component.

        @param ev: EigenVector to visualize
        @type  ev: int
        @param steps: number of intermediate frames
        @type  steps: int
        @param factor: exageration factor (default: 1 = No exageration)
        @type  factor: float
        @param ref: take other eigenvecors from this frame (default: 1)
        @type  ref: int
        @param morph: morph between min and max (1) or take real values (0)
                      (default: 1)
        @type  morph: 1|0

        @return: Trajectory with frames visualizing the morphing.
        @rtype: Trajectory
        """
        fit = 1
        if self.pc is not None:
            fit = self.pc['fit']
        pc = self.getPca(fit=fit)

        ## eigenvectors (rows)
        U = pc['u']

        ## raveled and centered frames
        x_avg = N.average(self.frames, 0)
        X = N.array([N.ravel(x) for x in self.frames - x_avg])

        ## ev'th eigenvector of reference frame
        alpha_0 = N.dot(X[ref], U[ev])

        ## list of deviations of ev'th eigenvector of each frame from ref
        alpha_range = N.dot(X, U[ev]) - alpha_0

        ## get some representative alphas...
        if morph:
            a_min = factor * min(alpha_range)
            a_max = factor * max(alpha_range)
            delta = (a_max - a_min) / steps
            alpha_range = [a_min + i * (delta) for i in range(0, steps)]
        else:
            alpha_range = N.sort(alpha_range)
            delta = len(alpha_range) / (steps * 1.0)
            alpha_range = [
                alpha_range[int(round(i * delta))] for i in range(0, steps)
            ]

        ## scale ev'th eigenvector of ref with different alphas
        Y = N.array([X[ref] + alpha * U[ev] for alpha in alpha_range])

        ## back convert to N x 3 coordinates
        Y = N.reshape(Y, (Y.shape[0], -1, 3))
        Y = x_avg + Y

        result = self.__class__()
        result.ref = self.ref

        result.frames = Y
        return result
예제 #51
0
    def parse_result(self):
        """
        Extract some information about the profile as well as the
        match state emmission scores. Keys of the returned dictionary::
          'AA', 'name', 'NrSeq', 'emmScore', 'accession',
          'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum'
          
        @return: dictionary with warious information about the profile
        @rtype: dict
        """
        ## check that the outfut file is there and seems valid
        if not os.path.exists(self.f_out):
            raise HmmerError,\
                  'Hmmerfetch result file %s does not exist.'%self.f_out

        if T.fileLength(self.f_out) < 10:
            raise HmmerError,\
                  'Hmmerfetch result file %s seems incomplete.'%self.f_out

        profileDic = {}

        ## read result
        hmm = open(self.f_out, 'r')
        out = hmm.read()
        hmm.close()

        ## collect some data about the hmm profile
        profileDic['name'] = self.hmmName
        profileDic['profLength'] = \
                  int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] )
        profileDic['accession'] = \
                  string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1]
        profileDic['NrSeq'] = \
                  int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] )
        profileDic['AA'] = \
              string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:]

        ## collect null emmission scores
        pattern = 'NULE[ ]+' + '[-0-9]+[ ]+' * 20
        nullEmm = [
            float(j) for j in string.split(re.findall(pattern, out)[0])[1:]
        ]

        ## get emmision scores
        prob = []
        for i in range(1, profileDic['profLength'] + 1):
            pattern = "[ ]+%i" % i + "[ ]+[-0-9]+" * 20
            e = [float(j) for j in string.split(re.findall(pattern, out)[0])]
            prob += [e]

        profileDic['seqNr'] = N.transpose(N.take(prob, (0, ), 1))
        profileDic['emmScore'] = N.array(prob)[:, 1:]

        ## calculate emission probablitities
        emmProb, nullProb = self.hmmEmm2Prob(nullEmm, profileDic['emmScore'])

        ent = [
            N.resize(self.entropy(e, nullProb), (1, 20))[0] for e in emmProb
        ]
        profileDic['ent'] = N.array(ent)

        ###### TEST #####

        proba = N.array(prob)[:, 1:]

        ##         # test set all to max score
        ##         p = proba
        ##         p1 = []
        ##         for i in range( len(p) ):
        ##             p1 += [ N.resize( p[i][N.argmax( N.array( p[i] ) )] , N.shape( p[i] ) ) ]
        ##         profileDic['maxAll'] = p1

        # test set all to N.sum( abs( probabilities ) )
        p = proba
        p2 = []
        for i in range(len(p)):
            p2 += [N.resize(N.sum(N.absolute(p[i])), N.shape(p[i]))]
        profileDic['absSum'] = p2

        # set all to normalized max score
        p = proba
        p4 = []
        for i in range(len(p)):
            p_scale = (p[i] - N.average(p[i])) / math.SD(p[i])
            p4 += [
                N.resize(p_scale[N.argmax(N.array(p_scale))], N.shape(p[i]))
            ]
        profileDic['maxAllScale'] = p4

        return profileDic
예제 #52
0
파일: rmsFit.py 프로젝트: ostrokach/biskit
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05):
    """
    Matches two arrays onto each other, while iteratively removing outliers.
    Superimposed array y would be C{ N.dot(y, N.transpose(r)) + t }.

    @param n_iterations: number of calculations::
                           1 .. no iteration 
                           0 .. until convergence
    @type  n_iterations: 1|0
    @param z: number of standard deviations for outlier definition (default: 2)
    @type  z: float
    @param eps_rmsd: tolerance in rmsd (default: 0.5)
    @type  eps_rmsd: float
    @param eps_stdv: tolerance in standard deviations (default: 0.05)
    @type  eps_stdv: float

    @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ]
    @rtype: (array, array), [float, float, int]
    """
    iter_trace = []

    rmsd_old = 0
    stdv_old = 0

    n = 0
    converged = 0

    mask = N.ones(len(y), N.int32)

    while not converged:

        ## find transformation for best match
        r, t = findTransformation(N.compress(mask, x, 0),
                                  N.compress(mask, y, 0))

        ## transform coordinates
        xt = N.dot(y, N.transpose(r)) + t

        ## calculate row distances
        d = N.sqrt(N.sum(N.power(x - xt, 2), 1)) * mask

        ## calculate rmsd and stdv
        rmsd = N.sqrt(N.average(N.compress(mask, d)**2))
        stdv = MU.SD(N.compress(mask, d))

        ## check conditions for convergence
        d_rmsd = abs(rmsd - rmsd_old)
        d_stdv = abs(1 - stdv_old / stdv)

        if d_rmsd < eps_rmsd and d_stdv < eps_stdv:
            converged = 1
        else:
            rmsd_old = rmsd
            stdv_old = stdv

        ## store result
        perc = round(float(N.sum(mask)) / float(len(mask)), 2)

        ## throw out non-matching rows
        mask = N.logical_and(mask, N.less(d, rmsd + z * stdv))
        outliers = N.nonzero(N.logical_not(mask))
        iter_trace.append([perc, round(rmsd, 3), outliers])

        n += 1

        if n_iterations and n >= n_iterations:
            break

    return (r, t), iter_trace