Example #1
0
    def __atomContacts(self, cutoff, rec_mask, lig_mask, cache):
        """
        Intermolecular distances below cutoff after applying the two masks.
        
        @param cutoff: cutoff for B{atom-atom} contact in \AA
        @type  cutoff: float
        @param rec_mask: atom mask
        @type  rec_mask: [1|0]
        @param lig_mask: atom mask
        @type  lig_mask: [1|0]
        @param cache: cache pairwise atom distance matrix
        @type  cache: 1|0
        
        @return: atom contact matrix, array sum_rec_mask x sum_lig_mask
        @rtype: array
        """
        ## get atom coordinats as array 3 x all_atoms
        rec_xyz = self.rec().getXyz()
        lig_xyz = self.lig().getXyz()

        ## get pair-wise distances -> atoms_rec x atoms_lig
        dist = getattr( self, 'pw_dist', None )
        if dist is None or \
               N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ):
            dist = self.__pairwiseDistances(N0.compress( rec_mask, rec_xyz, 0),
                                            N0.compress( lig_mask, lig_xyz, 0) )
        if cache:
            self.pw_dist = dist

        ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig
        return N0.less( dist, cutoff )
Example #2
0
    def contactResDistribution(self, cm=None):
        """
        Count occurrence of residues in protein-protein interface.
        
        @param cm: pre-calculated contact matrix (default: None)
        @type  cm: matrix
        
        @return: dict {'A':3, 'C':1, .. } (20 standard amino acids)
        @rtype: dict
        """
        if cm is None:
            cm = self.resContacts()

        ## get mask for residues involved in contacts
        maskLig = N0.sum(cm)
        maskRec = N0.sum(N0.transpose(cm))

        ## get sequence of contact residues only
        seqLig = N0.compress(maskLig, self.lig().sequence())
        seqRec = N0.compress(maskRec, self.rec().sequence())
        seq = ''.join(seqLig) + ''.join(seqRec)  ## convert back to string

        ## count occurrence of letters
        result = {}
        for aa in molUtils.allAA():
            result[aa] = seq.count(aa)

        return result
Example #3
0
    def contactResDistribution( self, cm=None ):
        """
        Count occurrence of residues in protein-protein interface.
        
        @param cm: pre-calculated contact matrix (default: None)
        @type  cm: matrix
        
        @return: dict {'A':3, 'C':1, .. } (20 standard amino acids)
        @rtype: dict
        """
        if cm is None:
            cm = self.resContacts()

        ## get mask for residues involved in contacts
        maskLig = N0.sum( cm )
        maskRec = N0.sum( N0.transpose( cm ))

        ## get sequence of contact residues only
        seqLig = N0.compress( maskLig, self.lig().sequence() )
        seqRec = N0.compress( maskRec, self.rec().sequence() )
        seq    = ''.join( seqLig ) + ''.join(seqRec) ## convert back to string

        ## count occurrence of letters
        result = {}
        for aa in molUtils.allAA():
            result[aa] = seq.count( aa )

        return result
Example #4
0
    def __atomContacts(self, cutoff, rec_mask, lig_mask, cache):
        """
        Intermolecular distances below cutoff after applying the two masks.
        
        @param cutoff: cutoff for B{atom-atom} contact in \AA
        @type  cutoff: float
        @param rec_mask: atom mask
        @type  rec_mask: [1|0]
        @param lig_mask: atom mask
        @type  lig_mask: [1|0]
        @param cache: cache pairwise atom distance matrix
        @type  cache: 1|0
        
        @return: atom contact matrix, array sum_rec_mask x sum_lig_mask
        @rtype: array
        """
        ## get atom coordinats as array 3 x all_atoms
        rec_xyz = self.rec().getXyz()
        lig_xyz = self.lig().getXyz()

        ## get pair-wise distances -> atoms_rec x atoms_lig
        dist = getattr(self, 'pw_dist', None)
        if dist is None or \
               N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ):
            dist = self.__pairwiseDistances(N0.compress(rec_mask, rec_xyz, 0),
                                            N0.compress(lig_mask, lig_xyz, 0))
        if cache:
            self.pw_dist = dist

        ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig
        return N0.less(dist, cutoff)
Example #5
0
    def pairwiseRmsd( self, aMask=None, noFit=0 ):
        """
        Calculate rmsd between each 2 coordinate frames.

        :param aMask: atom mask
        :type  aMask: [1|0]
        :return: frames x frames array of float
        :rtype: array
        """
        frames = self.frames

        if aMask is not None:
            frames = N0.compress( aMask, frames, 1 )

        result = N0.zeros( (len( frames ), len( frames )), N0.Float32 )

        for i in range(0, len( frames ) ):

            for j in range( i+1, len( frames ) ):
                if noFit:
                    d = N0.sqrt(N0.sum(N0.power(frames[i]-frames[j], 2), 1))
                    result[i,j] = result[j,i] = N0.sqrt( N0.average(d**2) )

                else:
                    rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 )
                    result[i,j] = result[j,i] = rmsdLst[0][1]

        return result
Example #6
0
    def residusMaximus( self, atomValues, mask=None ):
        """
        Take list of value per atom, return list where all atoms of any
        residue are set to the highest value of any atom in that residue.
        (after applying mask)

        :param atomValues: list 1 x N, values per atom
        :type  atomValues: [ float ]
        :param mask: list 1 x N, 0|1, 'master' atoms of each residue
        :type  mask: [1|0]

        :return: Numpy array 1 x N of float
        :rtype: array
        """
        if mask is None:
            mask = N0.ones( len( self.frames[0] ), N0.Int32 )

        ## eliminate all values that do not belong to the selected atoms
        masked = atomValues * mask

        result = []

        ## set all atoms of each residue to uniform value
        for res in range( 0, self.resMap()[-1]+1 ):

            ## get atom entries for this residue
            resAtoms = N0.compress( N0.equal( self.resMap(), res ), masked )

            ## get maximum value
            masterValue = max( resAtoms )

            result += resAtoms * 0.0 + masterValue

        return N0.array( result )
Example #7
0
    def takeFrames( self, indices ):
        """
        Return a copy of the trajectory containing only the specified frames.

        :param indices: positions to take
        :type  indices: [int]

        :return: copy of this Trajectory (fewer frames, semi-deep copy of ref)
        :rtype: Trajectory
        """
        ## remove out-of-bound indices
        indices = N0.compress( N0.less( indices, len( self.frames) ), indices )

        r = self.__class__()

        ## this step takes some time for large frames !
        r.frames = N0.take( self.frames, indices, 0 )

        ## semi-deep copy of reference model
        r.setRef( self.ref.take( list(range( self.ref.lenAtoms()))) )

        if self.frameNames is not None:
            r.frameNames = N0.take( self.frameNames, indices, 0 )
            r.frameNames = list(map( ''.join, r.frameNames.tolist() ))

        r.pc = self.__takePca( indices )

        r.profiles = self.profiles.take( indices )

        r.resIndex = self.resIndex

        return r
Example #8
0
def centerSurfDist(model, surf_mask, mask=None):
    """
    Calculate the longest and shortest distance from
    the center of the molecule to the surface.

    @param mask: atoms not to be considerd (default: None)
    @type  mask: [1|0]
    @param surf_mask: atom surface mask, needed for minimum surface distance
    @type  surf_mask: [1|0]

    @return: max distance, min distance
    @rtype: float, float
    """
    if mask is None:
        mask = model.maskHeavy()

    ## calculate center of mass
    center = model.centerOfMass()

    ## surface atom coordinates
    surf_xyz = N0.compress(mask * surf_mask, model.getXyz(), 0)

    ## find the atom closest and furthest away from center
    dist = N0.sqrt(N0.sum((surf_xyz - center)**2, 1))
    minDist = min(dist)
    maxDist = max(dist)

    return maxDist, minDist
Example #9
0
    def addDensity( self, radius=6, minasa=None, profName='density' ):
        """
        Count the number of heavy atoms within the given radius.
        Values are only collected for atoms with |minasa| accessible surface
        area.

        @param minasa: relative exposed surface - 0 to 100%
        @type  minasa: float
        @param radius: in Angstrom
        @type  radius: float
        """
        mHeavy = self.m.maskHeavy()

        xyz = N0.compress( mHeavy, self.m.getXyz(), 0 )

        if minasa and self.m.profile( 'relAS', 0 ) == 0:
            self.addASA()

        if minasa:
            mSurf = self.m.profile2mask( 'relAS', minasa )
        else:
            mSurf = N0.ones( self.m.lenAtoms() )

        ## loop over all surface atoms
        surf_pos = N0.nonzero( mSurf )
        contacts = []

        for i in surf_pos:
            dist = N0.sum(( xyz - self.m.xyz[i])**2, 1)
            contacts += [ N0.sum( N0.less(dist, radius**2 )) -1]

        self.m.atoms.set( profName, contacts, mSurf, default=-1,
                          comment='atom density radius %3.1fA' % radius,
                          version= T.dateString() + ' ' + self.version() )
Example #10
0
    def __extractLigandMatrix(self, fcomplex):
        """
        Compare structure from hex complex with original ligand pdb
        and store transformation matrix of ligand in self.ligandMatrix.
        
        @param fcomplex: pdb file with hex complex
        @type  fcomplex: complec
        
        @return: rotation matrix and translation matrix as tuple
        @rtype: (array, array)
        """
        docked_pdb = self._extractLigandStructure(fcomplex)

        xyz_docked = N0.compress(docked_pdb.maskCA(), docked_pdb.xyz)
        xyz_template = N0.compress(self.lig_model.maskCA(), self.lig_model.xyz)

        (r, t) = self._findTransformation(xyz_docked, xyz_template)
        return (r, t)
Example #11
0
    def __extractLigandMatrix(self, fcomplex):
        """
        Compare structure from hex complex with original ligand pdb
        and store transformation matrix of ligand in self.ligandMatrix.
        
        @param fcomplex: pdb file with hex complex
        @type  fcomplex: complec
        
        @return: rotation matrix and translation matrix as tuple
        @rtype: (array, array)
        """
        docked_pdb = self._extractLigandStructure(fcomplex)

        xyz_docked = N0.compress( docked_pdb.maskCA(), docked_pdb.xyz )
        xyz_template = N0.compress( self.lig_model.maskCA(),
                                 self.lig_model.xyz )

        (r, t) = self._findTransformation(xyz_docked, xyz_template)
        return (r,t)
Example #12
0
    def phi_and_psi(self, model):
        """
        Calculate phi and psi torsion angles for all
        residues in model::
        
          phi - rotation about the N-CA bond
              - last position in a chain = None
          psi - rotation about CA-C
              - first position in a chain = None          

        @param model: PDBModel
        @type  model: PDBModel 
        """
        for c in range(model.lenChains(breaks=1)):
            cModel = model.takeChains([c], breaks=1)

            xyz = cModel.xyz

            xyz_CA = N0.compress(cModel.maskCA(), xyz, 0)
            xyz_N = N0.compress(cModel.mask(['N']), xyz, 0)
            xyz_C = N0.compress(cModel.mask(['C']), xyz, 0)

            ## phi: c1 - N
            ##      c2 - CA
            ##      c3 - C
            ##      c4 - N of next residue
            for i in range(len(xyz_N) - 1):
                self.phi += [
                    self.dihedral(xyz_N[i], xyz_CA[i], xyz_C[i], xyz_N[i + 1])
                ]
            self.phi += [None]

            ## psi: c1 - C of previous residue
            ##      c2 - N
            ##      c3 - CA
            ##      c4 - C
            self.psi += [None]
            for i in range(1, len(xyz_N)):
                self.psi += [
                    self.dihedral(xyz_C[i - 1], xyz_N[i], xyz_CA[i], xyz_C[i])
                ]
Example #13
0
    def plotContactDensity( self, step=1, cutoff=4.5 ):
        """
        Example. plot histogramm of contact density. Somehing wrong??

        @raise ComplexTrajError: if gnuplot program is not installed
        """
        if not gnuplot.installed:
            raise ComplexTrajError('gnuplot program is not installed')
        r = self.averageContacts( step, cutoff )
        r = N0.ravel( r )
        r = N0.compress( r, r )
        gnuplot.plot( hist.density( r, 10 ) )
Example #14
0
    def plotContactDensity(self, step=1, cutoff=4.5):
        """
        Example. plot histogramm of contact density. Somehing wrong??

        @raise ComplexTrajError: if gnuplot program is not installed
        """
        if not gnuplot.installed:
            raise ComplexTrajError('gnuplot program is not installed')
        r = self.averageContacts(step, cutoff)
        r = N0.ravel(r)
        r = N0.compress(r, r)
        gnuplot.plot(hist.density(r, 10))
Example #15
0
    def phi_and_psi( self, model ):
        """
        Calculate phi and psi torsion angles for all
        residues in model::
        
          phi - rotation about the N-CA bond
              - last position in a chain = None
          psi - rotation about CA-C
              - first position in a chain = None          

        @param model: PDBModel
        @type  model: PDBModel 
        """
        for c in range( model.lenChains(breaks=1) ):
            cModel = model.takeChains( [c], breaks=1 )

            xyz = cModel.xyz

            xyz_CA =  N0.compress( cModel.maskCA(), xyz, 0 )
            xyz_N  =  N0.compress( cModel.mask( ['N'] ), xyz, 0 )
            xyz_C  =  N0.compress( cModel.mask( ['C'] ), xyz, 0 )

            ## phi: c1 - N
            ##      c2 - CA
            ##      c3 - C
            ##      c4 - N of next residue
            for i in range( len(xyz_N)-1 ):
                self.phi += [self.dihedral( xyz_N[i], xyz_CA[i],
                                            xyz_C[i], xyz_N[i+1] )]
            self.phi += [None]

            ## psi: c1 - C of previous residue  
            ##      c2 - N
            ##      c3 - CA
            ##      c4 - C
            self.psi += [None]
            for i in range( 1, len(xyz_N) ):
                self.psi += [self.dihedral( xyz_C[i-1], xyz_N[i],
                                            xyz_CA[i], xyz_C[i] )]
Example #16
0
def random2DArray( matrix, ranNr=1, mask=None):
    """
    Create randomized 2D array containing ones and zeros.

    :param matrix: matrix to randomize
    :type  matrix: 2D array
    :param mask: mask OR None (default: None)
    :type  mask: list(1|0)
    :param ranNr: number of matricies to add up (default: 1)
    :type  ranNr: integer

    :return: 2D array or |ranNr| added contact matricies
    :rtype:2D array

    :raise MathUtilError: if mask does not fit matrix
    """
    ## get shape of matrix
    a,b = N0.shape( matrix )

    ## get array from matrix that is to be randomized
    if mask is not None:
        if len(mask) == len( N0.ravel(matrix) ):
            array = N0.compress( mask, N0.ravel(matrix) )

        if len(mask) != len( N0.ravel(matrix) ):
            raise MathUtilError(
                'MatUtils.random2DArray - mask of incorrect length' +
                '\tMatrix length: %i Mask length: %i'\
                %(len( N0.ravel(matrix) ), len(mask)))

    if not mask:
        array = N0.ravel(matrix)

    ## number of ones and length of array
    nOnes = int( N0.sum( array ) )
    lenArray = len( array )
    ranArray = N0.zeros( lenArray )

    ## create random array
    for n in range(ranNr):
        ranArray += randomMask( nOnes, lenArray )

    ## blow up to size of original matix
    if mask is not None:
        r = N0.zeros(a*b)
        N0.put( r, N0.nonzero(mask), ranArray)
        return N0.reshape( r, (a,b) )

    if not mask:
        return  N0.reshape( ranArray, (a,b) )
Example #17
0
def random2DArray(matrix, ranNr=1, mask=None):
    """
    Create randomized 2D array containing ones and zeros.

    :param matrix: matrix to randomize
    :type  matrix: 2D array
    :param mask: mask OR None (default: None)
    :type  mask: list(1|0)
    :param ranNr: number of matricies to add up (default: 1)
    :type  ranNr: integer

    :return: 2D array or |ranNr| added contact matricies
    :rtype:2D array

    :raise MathUtilError: if mask does not fit matrix
    """
    ## get shape of matrix
    a, b = N0.shape(matrix)

    ## get array from matrix that is to be randomized
    if mask is not None:
        if len(mask) == len(N0.ravel(matrix)):
            array = N0.compress(mask, N0.ravel(matrix))

        if len(mask) != len(N0.ravel(matrix)):
            raise MathUtilError(
                'MatUtils.random2DArray - mask of incorrect length' +
                '\tMatrix length: %i Mask length: %i'\
                %(len( N0.ravel(matrix) ), len(mask)))

    if not mask:
        array = N0.ravel(matrix)

    ## number of ones and length of array
    nOnes = int(N0.sum(array))
    lenArray = len(array)
    ranArray = N0.zeros(lenArray)

    ## create random array
    for n in range(ranNr):
        ranArray += randomMask(nOnes, lenArray)

    ## blow up to size of original matix
    if mask is not None:
        r = N0.zeros(a * b)
        N0.put(r, N0.nonzero(mask), ranArray)
        return N0.reshape(r, (a, b))

    if not mask:
        return N0.reshape(ranArray, (a, b))
Example #18
0
    def pca( self, atomMask=None, frameMask=None, fit=1 ):
        """
        Calculate principal components of trajectory frames.

        :param atomMask: 1 x N_atoms, [111001110..] atoms to consider
                         (default: all)
        :type  atomMask: [1|0]
        :param frameMask: 1 x N_frames, [001111..] frames to consider
                          (default all )
        :type  frameMask: [1|0]

        :return: (N_frames x N_frames), (1 x N_frames),
                 projection of each frame in PC space, eigenvalue of each PC
        :rtype: array, array, array
        """
        if frameMask is None: frameMask = N0.ones( len( self.frames ), N0.Int32 )

        if atomMask is None: atomMask = N0.ones(self.getRef().lenAtoms(),
                                                N0.Int32)

        if fit:
            self.fit( atomMask )

        refxyz = N0.average( self.frames, 0 )

        data = N0.compress( frameMask, self.frames, 0 )

        data = data - refxyz

        data = N0.compress( atomMask, data, 1 )

        ## reduce to 2D array
        data = N0.array( map( N0.ravel, data ) )

        V, L, U = LA.svd( data )

        return U, V * L, N0.power(L, 2)
Example #19
0
    def __init__( self, model,  maxPerCenter=4 ):
        """
        Prepare reduction of coordinates from a given model.
        
        @param model: reference model defining atom content and order
        @type  model: PDBModel
        @param maxPerCenter: max number of atoms per side chain center atom
                             (default: 4)
        @type  maxPerCenter: int
        """
        self.m = model
        self.__addMassProfile( self.m )

        ## sort atoms within residues into standard order
        def cmpAtoms( a1, a2 ):
            """
            Comparison function for bringing atoms into standard order
            within residues as defined by L{ aaAtoms }.

            @param a1: model
            @type  a1: PDBModel
            @param a2: model
            @type  a2: PDBModel

            @return: int or list of matching positions
            @rtype: [-1|0|1]            
            """
            ## cmp vanished in python 3.x (but still available in past.builtins)
            cmp = lambda x, y: (x > y) - (x < y)

            res = a1['residue_name']
            target = self.aaAtoms[ res ]
            try:
                return cmp(target.index( a1['name'] ), target.index( a2['name'] ))
            except ValueError as why:
                return cmp( a1['name'], a2['name'] )
##                 s = "Unknown atom for %s %i: %s or %s" % \
##                     (res, a1['residue_number'], a1['name'], a2['name'] )
##                 raise PDBError( s )

        self.a_indices = self.m.argsort( cmpAtoms )
        self.m_sorted = self.m.sort( self.a_indices )

        ## remove H from internal model and from list of atom positions
        maskH = self.m_sorted.remove( self.m_sorted.maskH() )
        self.a_indices = N0.compress( maskH, self.a_indices )

        self.makeMap( maxPerCenter )
Example #20
0
def area(curve, start=0.0, stop=1.0):
    """
    Numerically add up the area under the given curve.
    The curve is a 2-D array or list of tupples.
    The x-axis is the first column of this array (curve[:,0]).
    (originally taken from biskit.Statistics.ROCalyzer)

    :param curve: a list of x,y coordinates
    :type  curve: [ (y,x), ] or N0.array
    :param start: lower boundary (in x) (default: 0.0)
    :type  start: float
    :param stop: upper boundary (in x) (default: 1.0)
    :type  stop: float
    :return: the area underneath the curve between start and stop.
    :rtype: float
    """
    ## convert and swap axes
    curve = N0.array(curve)
    c = N0.zeros(N0.shape(curve), curve.dtype)
    c[:, 0] = curve[:, 1]
    c[:, 1] = curve[:, 0]

    assert len(N0.shape(c)) == 2

    ## apply boundaries  ## here we have a problem with flat curves
    mask = N0.greater_equal(c[:, 1], start)
    mask *= N0.less_equal(c[:, 1], stop)
    c = N0.compress(mask, c, axis=0)

    ## fill to boundaries -- not absolutely accurate: we actually should
    ## interpolate to the neighboring points instead
    c = N0.concatenate((N0.array([
        [c[0, 0], start],
    ]), c, N0.array([
        [c[-1, 0], stop],
    ])))
    x = c[:, 1]
    y = c[:, 0]

    dx = x[1:] - x[:-1]  # distance on x between points
    dy = y[1:] - y[:-1]  # distance on y between points

    areas1 = y[:-1] * dx  # the rectangles between all points
    areas2 = dx * dy / 2.0  # the triangles between all points

    return N0.sum(areas1) + N0.sum(areas2)
Example #21
0
def area(curve, start=0.0, stop=1.0 ):
    """
    Numerically add up the area under the given curve.
    The curve is a 2-D array or list of tupples.
    The x-axis is the first column of this array (curve[:,0]).
    (originally taken from biskit.Statistics.ROCalyzer)

    :param curve: a list of x,y coordinates
    :type  curve: [ (y,x), ] or N0.array
    :param start: lower boundary (in x) (default: 0.0)
    :type  start: float
    :param stop: upper boundary (in x) (default: 1.0)
    :type  stop: float
    :return: the area underneath the curve between start and stop.
    :rtype: float
    """
    ## convert and swap axes
    curve = N0.array( curve )
    c = N0.zeros( N0.shape(curve), curve.dtype )
    c[:,0] = curve[:,1]
    c[:,1] = curve[:,0]

    assert len( N0.shape( c ) ) == 2

    ## apply boundaries  ## here we have a problem with flat curves
    mask = N0.greater_equal( c[:,1], start )
    mask *= N0.less_equal( c[:,1], stop )
    c = N0.compress( mask, c, axis=0 )

    ## fill to boundaries -- not absolutely accurate: we actually should
    ## interpolate to the neighboring points instead
    c = N0.concatenate((N0.array([[c[0,0], start],]), c,
                       N0.array([[c[-1,0],stop ],])) )
    x = c[:,1]
    y = c[:,0]

    dx = x[1:] - x[:-1] # distance on x between points 
    dy = y[1:] - y[:-1] # distance on y between points

    areas1 = y[:-1] * dx  # the rectangles between all points
    areas2 = dx * dy / 2.0 # the triangles between all points

    return N0.sum(areas1) + N0.sum(areas2)
Example #22
0
    def getFluct_global( self, mask=None ):
        """
        Get RMS of each atom from it's average position in trajectory.
        The frames should be superimposed (fit() ) to a reference.

        :param mask: N x 1 list/Numpy array of 0|1, (N=atoms),
                     atoms to be considered.
        :type  mask: [1|0]

        :return: Numpy array ( N_unmasked x 1 ) of float.
        :rtype: array
        """
        frames = self.frames
        if mask is not None:
            frames = N0.compress( mask, frames, 1 )

        ## mean position of each atom in all frames
        avg = N0.average( frames )

        return N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) ))
Example #23
0
def outliers(a, z=5, it=5):
    """
    Iterative detection of outliers in a set of numeric values.
    Requirement: len(a) > 0; outlier detection is only performed if len(a)>2
    
    :param a: array or list of values
    :type  a: [ float ]
    :param z: z-score threshold for iterative refinement of median and SD
    :type  z: float
    :param it: maximum number of iterations
    :type  it: int
    
    :return: outlier mask, median and standard deviation of last iteration
    :rtype: N0.array( int ), float, float
    """
    assert (len(a) > 0)
    mask = N0.ones(len(a))
    out = N0.zeros(len(a))

    if len(a) < 3:
        return out, N0.median(a), N0.std(a)

    for i in range(it):
        b = N0.compress(N0.logical_not(out), a)
        me = N0.median(b)
        sd = N0.std(b)

        bz = N0.absolute(
            (N0.array(a) - me) / sd)  # pseudo z-score of each value
        o = bz > z
        ##        print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N0.sum(o))

        ## stop if converged or reached bottom
        if (N0.sum(o) == N0.sum(out)) or (N0.sum(o) > len(a) - 3):
            return o, me, sd

        out = o

    return out, me, sd
Example #24
0
def outliers( a, z=5, it=5 ):
    """
    Iterative detection of outliers in a set of numeric values.
    Requirement: len(a) > 0; outlier detection is only performed if len(a)>2
    
    :param a: array or list of values
    :type  a: [ float ]
    :param z: z-score threshold for iterative refinement of median and SD
    :type  z: float
    :param it: maximum number of iterations
    :type  it: int
    
    :return: outlier mask, median and standard deviation of last iteration
    :rtype: N0.array( int ), float, float
    """
    assert( len(a) > 0 )
    mask = N0.ones( len(a) )
    out  = N0.zeros( len(a) )
    
    if len(a) < 3:
        return out, N0.median(a), N0.std(a)
    
    for i in range( it ):
        b  = N0.compress( N0.logical_not(out), a )
        me = N0.median( b )
        sd = N0.std( b )
        
        bz = N0.absolute((N0.array( a ) - me) / sd)  # pseudo z-score of each value
        o  = bz > z
        ##        print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N0.sum(o))

        ## stop if converged or reached bottom
        if (N0.sum(o) == N0.sum(out)) or (N0.sum(o) > len(a) - 3):
            return o, me, sd
            
        out = o
    
    return out, me, sd
Example #25
0
    def addDensity(self, radius=6, minasa=None, profName='density'):
        """
        Count the number of heavy atoms within the given radius.
        Values are only collected for atoms with |minasa| accessible surface
        area.

        @param minasa: relative exposed surface - 0 to 100%
        @type  minasa: float
        @param radius: in Angstrom
        @type  radius: float
        """
        mHeavy = self.m.maskHeavy()

        xyz = N0.compress(mHeavy, self.m.getXyz(), 0)

        if minasa and self.m.profile('relAS', 0) == 0:
            self.addASA()

        if minasa:
            mSurf = self.m.profile2mask('relAS', minasa)
        else:
            mSurf = N0.ones(self.m.lenAtoms())

        ## loop over all surface atoms
        surf_pos = N0.nonzero(mSurf)
        contacts = []

        for i in surf_pos:
            dist = N0.sum((xyz - self.m.xyz[i])**2, 1)
            contacts += [N0.sum(N0.less(dist, radius**2)) - 1]

        self.m.atoms.set(profName,
                         contacts,
                         mSurf,
                         default=-1,
                         comment='atom density radius %3.1fA' % radius,
                         version=T.dateString() + ' ' + self.version())
Example #26
0
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05):
    """
    Matches two arrays onto each other, while iteratively removing outliers.
    Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }.

    :param n_iterations: number of calculations::
                           1 .. no iteration 
                           0 .. until convergence
    :type  n_iterations: 1|0
    :param z: number of standard deviations for outlier definition (default: 2)
    :type  z: float
    :param eps_rmsd: tolerance in rmsd (default: 0.5)
    :type  eps_rmsd: float
    :param eps_stdv: tolerance in standard deviations (default: 0.05)
    :type  eps_stdv: float

    :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ]
    :rtype: (array, array), [float, float, int]
    """
    iter_trace = []

    rmsd_old = 0
    stdv_old = 0

    n = 0
    converged = 0

    mask = N0.ones(len(y), N0.Int32)

    while not converged:

        ## find transformation for best match
        r, t = findTransformation(N0.compress(mask, x, 0),
                                  N0.compress(mask, y, 0))

        ## transform coordinates
        xt = N0.dot(y, N0.transpose(r)) + t

        ## calculate row distances
        d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask

        ## calculate rmsd and stdv
        rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2))
        stdv = MU.SD(N0.compress(mask, d))

        ## check conditions for convergence
        d_rmsd = abs(rmsd - rmsd_old)
        d_stdv = abs(1 - stdv_old / stdv)

        if d_rmsd < eps_rmsd and d_stdv < eps_stdv:
            converged = 1
        else:
            rmsd_old = rmsd
            stdv_old = stdv

        ## store result
        perc = round(float(N0.sum(mask)) / float(len(mask)), 2)

        ## throw out non-matching rows
        mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv))
        outliers = N0.nonzero(N0.logical_not(mask))
        iter_trace.append([perc, round(rmsd, 3), outliers])

        n += 1

        if n_iterations and n >= n_iterations:
            break

    return (r, t), iter_trace
Example #27
0
    def makeMap( self, maxPerCenter=4 ):
        """
        Calculate mapping between complete and reduced atom list.
        Creates a (list of lists of int, list of atom dictionaries)
        containing groups of atom indices into original model, new center atoms
        
        @param maxPerCenter: max number of atoms per side chain center atom
                             (default: 4)
        @type  maxPerCenter: int
        """

        resIndex = self.m_sorted.resIndex()
        resModels= self.m_sorted.resModels()
        m = self.m_sorted

        self.currentAtom = 0

        groups = []
        atoms = DictList()

        for i in range( len( resIndex ) ):

            first_atom = resIndex[ i ]

            if i < len( resIndex )-1:
                last_atom  = resIndex[ i+1 ] - 1
            else:
                last_atom = len( self.a_indices ) - 1

            a = m.atoms[ first_atom ]

##             res_name  = m.atoms[ first_atom ]['residue_name']
##             segid     = m.atoms[ first_atom ]['segment_id']
##             chainId   = m.atoms[ first_atom ]['chain_id']
##             res_number= m.atoms[ first_atom ]['serial_number']

            ## position of this residue's atoms in original PDBModel (unsorted)
            a_indices = self.a_indices[ first_atom : last_atom+1 ]

            ## for each center create list of atom indices and a center atom
            if a['residue_name'] != 'GLY' and a['residue_name'] != 'ALA':

                bb_a_indices = N0.compress( resModels[i].maskBB(), a_indices)
                sc_a_indices = N0.compress(
                    N0.logical_not( resModels[i].maskBB()), a_indices )

                sc_groups = self.group( sc_a_indices, maxPerCenter )

            else:
                bb_a_indices = a_indices
                sc_groups = []

            groups += [ bb_a_indices ]
            atoms  += [ self.nextAtom(a, 'BB') ]

            i = 0
            for g in sc_groups:
                groups += [ g ]
                atoms  += [ self.nextAtom( a, 'SC%i'%i) ]
                i += 1

        self.groups = groups
        self.atoms = atoms
Example #28
0
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05):
    """
    Matches two arrays onto each other, while iteratively removing outliers.
    Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }.

    :param n_iterations: number of calculations::
                           1 .. no iteration 
                           0 .. until convergence
    :type  n_iterations: 1|0
    :param z: number of standard deviations for outlier definition (default: 2)
    :type  z: float
    :param eps_rmsd: tolerance in rmsd (default: 0.5)
    :type  eps_rmsd: float
    :param eps_stdv: tolerance in standard deviations (default: 0.05)
    :type  eps_stdv: float

    :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ]
    :rtype: (array, array), [float, float, int]
    """
    iter_trace = []

    rmsd_old = 0
    stdv_old = 0

    n = 0
    converged = 0

    mask = N0.ones(len(y), N0.Int32 )

    while not converged:

        ## find transformation for best match
        r, t = findTransformation(N0.compress(mask, x, 0),
                                  N0.compress(mask, y, 0))

        ## transform coordinates
        xt = N0.dot(y, N0.transpose(r)) + t

        ## calculate row distances
        d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask

        ## calculate rmsd and stdv
        rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2))
        stdv = MU.SD(N0.compress(mask, d))

        ## check conditions for convergence
        d_rmsd = abs(rmsd - rmsd_old)
        d_stdv = abs(1 - stdv_old / stdv)

        if d_rmsd < eps_rmsd and d_stdv < eps_stdv:
            converged = 1
        else:
            rmsd_old = rmsd
            stdv_old = stdv

        ## store result
        perc = round(float(N0.sum(mask)) / float(len(mask)), 2)

        ## throw out non-matching rows
        mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv))
        outliers = N0.nonzero( N0.logical_not( mask ) )
        iter_trace.append([perc, round(rmsd, 3), outliers])

        n += 1

        if n_iterations and n >= n_iterations:
            break

    return (r, t), iter_trace
Example #29
0
    def fit( self, mask=None, ref=None, n_it=1,
             prof='rms', verbose=1, fit=1, **profInfos ):
        """
        Superimpose all coordinate frames on reference coordinates. Put rms
        values in a profile. If n_it > 1, the fraction of atoms considered
        for the fit is put into a profile called |prof|_considered
        (i.e. by default 'rms_considered').

        :param mask: atom mask, atoms to consider default: [all]
        :type  mask: [1|0]
        :param ref: use as reference, default: None, average Structure
        :type  ref: PDBModel
        :param n_it: number of fit iterations, kicking out outliers on the way
                     1 -> classic single fit, 0 -> until convergence
                     (default: 1)
        :type  n_it: int
        :param prof: save rms per frame in profile of this name, ['rms']
        :type  prof: str
        :param verbose: print progress info to STDERR (default: 1)
        :type  verbose: 1|0
        :param fit: transform frames after match, otherwise just calc rms
                    (default: 1)          
        :type  fit: 1|0
        :param profInfos: additional key=value pairs for rms profile info []
        :type profInfos: key=value
        """
        if ref is None:
            refxyz = N0.average( self.frames, 0 )
        else:
            refxyz = ref.getXyz()

        if mask is None:
            mask = N0.ones( len( refxyz ), N0.Int32 )

        refxyz = N0.compress( mask, refxyz, 0 )

        if verbose: T.errWrite( "rmsd fitting..." )

        rms = []          ## rms value of each frame
        non_outliers = [] ## fraction of atoms considered for rms and fit
        iterations = []   ## number of iterations performed on each frame

        for i in range(0, len( self.frames) ):

            xyz = self.frames[i]

            if n_it != 1:
                (r, t), rmsdList = rmsFit.match( refxyz,
                                                 N0.compress( mask, xyz, 0), n_it)
                iterations.append( len( rmsdList ) )
                non_outliers.append( rmsdList[-1][0] )

                xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t

                rms += [ rmsdList[-1][1] ]

            else:
                r, t = rmsFit.findTransformation( refxyz,
                                                  N0.compress( mask, xyz, 0))

                xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t

                d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\
                                             - refxyz, 2), 1))


                rms += [ N0.sqrt( N0.average(d**2) ) ]


            if fit:
                self.frames[i] = xyz_transformed.astype(N0.Float32)

            if verbose and i%100 == 0:
                T.errWrite( '#' )

        self.setProfile( prof, rms, n_iterations=n_it, **profInfos )

        if non_outliers:
            self.setProfile( prof+'_considered', non_outliers,
                             n_iterations=n_it,
                             comment='fraction of atoms considered for iterative fit' )

        if verbose: T.errWrite( 'done\n' )