Esempio n. 1
0
    def __collectFrames(self, pdbs, castAll=0):
        """
        Read coordinates from list of pdb files.

        @param pdbs: list of file names
        @type  pdbs: [str]
        @param castAll: analyze atom content of each frame for casting
                        (default: 0)
        @type  castAll: 0|1

        @return: frames x (N x 3) Numpy array (of float)
        @rtype: array
        """
        frameList = []
        i = 0
        atomCast = None

        if self.verbose: T.errWrite('reading %i pdbs...' % len(pdbs))

        refNames = self.ref.atomNames()  ## cache for atom checking

        for f in pdbs:

            ## Load
            m = PDBModel(f)

            ## compare atom order & content of first frame to reference pdb
            if castAll or i == 0:
                atomCast, castRef = m.compareAtoms(self.ref)

                if castRef != range(len(self.ref)):
                    ## we can take away atoms from each frame but not from ref
                    raise TrajError("Reference PDB doesn't match %s." %
                                    m.fileName)

                if N.all(atomCast == range(len(m))):
                    atomCast = None  ## no casting necessary
                else:
                    if self.verbose: T.errWrite(' casting ')

            ## assert that frame fits reference
            if atomCast:
                m = m.take(atomCast)

            ## additional check on each 100st frame
            if i % 100 == 0 and m.atomNames() <> refNames:
                raise TrajError("%s doesn't match reference pdb." % m.fileName)

            frameList.append(m.xyz)

            i += 1
            if i % 10 == 0 and self.verbose:
                T.errWrite('#')

        if self.verbose: T.errWrite('done\n')

        ## convert to 3-D Numpy Array
        return N.array(frameList).astype(N.Float32)
Esempio n. 2
0
    def __collectFrames( self, pdbs, castAll=0 ):
        """
        Read coordinates from list of pdb files.

        @param pdbs: list of file names
        @type  pdbs: [str]
        @param castAll: analyze atom content of each frame for casting
                        (default: 0)
        @type  castAll: 0|1

        @return: frames x (N x 3) Numpy array (of float)
        @rtype: array
        """
        frameList = []
        i = 0
        atomCast = None

        if self.verbose: T.errWrite('reading %i pdbs...' % len(pdbs) )

        refNames = self.ref.atomNames()  ## cache for atom checking

        for f in pdbs:

            ## Load
            m = PDBModel(f)

            ## compare atom order & content of first frame to reference pdb
            if castAll or i==0:
                atomCast, castRef = m.compareAtoms( self.ref )

                if castRef != range( len( self.ref ) ):
                    ## we can take away atoms from each frame but not from ref
                    raise TrajError("Reference PDB doesn't match %s."
                                    %m.fileName)

                if N.all( atomCast == range( len( m ) ) ):
                    atomCast = None   ## no casting necessary
                else:
                    if self.verbose: T.errWrite(' casting ')

            ## assert that frame fits reference
            if atomCast:
                m = m.take( atomCast )

            ## additional check on each 100st frame
            if i%100 == 0 and m.atomNames() <> refNames:
                raise TrajError("%s doesn't match reference pdb."%m.fileName )

            frameList.append( m.xyz )

            i += 1
            if i%10 == 0 and self.verbose:
                T.errWrite('#')

        if self.verbose: T.errWrite( 'done\n' )

        ## convert to 3-D Numpy Array
        return N.array(frameList).astype(N.Float32)
Esempio n. 3
0
    def go(self, errorthreshold, n_iterations=1e10, nstep=10, verbose=1):
        """
        Start the cluestering. Run until the error is below the error
        treshold or the max number of iterations have been run.

        @param errorthreshold: treshold value for error 
        @type  errorthreshold: float
        @param n_iterations: treshold value for number of iterations
                             (default: 1e10)
        @type  n_iterations: int
        @param nstep: print information for every n'th step in the iteration
        @type  nstep: int

        @return: array with cluster centers
        @rtype: array('f')
        """
        iteration = 0
        rel_err = 1e10
        error = 1.e10

        msm = self.create_membership_matrix()
        centers = self.calc_cluster_center(msm)

        while rel_err > errorthreshold and iteration < n_iterations:
            d2, msm, centers = self.iterate(centers)
            old_error = error
            error = self.error(msm, d2)
            rel_err = abs(1. - error/old_error)
            iteration = iteration+1
            if not iteration % nstep and verbose:
                tools.errWrite( "%i %f\n" % (iteration, error) )

        self.centers = centers
        self.msm = msm
        self.d2 = d2

        return centers
Esempio n. 4
0
    def go(self, errorthreshold, n_iterations=1e10, nstep=10, verbose=1):
        """
        Start the cluestering. Run until the error is below the error
        treshold or the max number of iterations have been run.

        @param errorthreshold: treshold value for error 
        @type  errorthreshold: float
        @param n_iterations: treshold value for number of iterations
                             (default: 1e10)
        @type  n_iterations: int
        @param nstep: print information for every n'th step in the iteration
        @type  nstep: int

        @return: array with cluster centers
        @rtype: array('f')
        """
        iteration = 0
        rel_err = 1e10
        error = 1.e10

        msm = self.create_membership_matrix()
        centers = self.calc_cluster_center(msm)

        while rel_err > errorthreshold and iteration < n_iterations:
            d2, msm, centers = self.iterate(centers)
            old_error = error
            error = self.error(msm, d2)
            rel_err = abs(1. - error/old_error)
            iteration = iteration+1
            if not iteration % nstep and verbose:
                tools.errWrite( "%i %f\n" % (iteration, error) )

        self.centers = centers
        self.msm = msm
        self.d2 = d2

        return centers
Esempio n. 5
0
    def fit( self, mask=None, ref=None, n_it=1,
             prof='rms', verbose=1, fit=1, **profInfos ):
        """
        Superimpose all coordinate frames on reference coordinates. Put rms
        values in a profile. If n_it > 1, the fraction of atoms considered
        for the fit is put into a profile called |prof|_considered
        (i.e. by default 'rms_considered').

        @param mask: atom mask, atoms to consider default: [all]
        @type  mask: [1|0]
        @param ref: use as reference, default: None, average Structure
        @type  ref: PDBModel
        @param n_it: number of fit iterations, kicking out outliers on the way
                     1 -> classic single fit, 0 -> until convergence
                     (default: 1)
        @type  n_it: int
        @param prof: save rms per frame in profile of this name, ['rms']
        @type  prof: str
        @param verbose: print progress info to STDERR (default: 1)
        @type  verbose: 1|0
        @param fit: transform frames after match, otherwise just calc rms
                    (default: 1)          
        @type  fit: 1|0
        @param profInfos: additional key=value pairs for rms profile info []
        @type profInfos: key=value
        """
        if ref == None:
            refxyz = N.average( self.frames, 0 )
        else:
            refxyz = ref.getXyz()

        if mask is None:
            mask = N.ones( len( refxyz ), N.int32 )

        refxyz = N.compress( mask, refxyz, 0 )

        if verbose: T.errWrite( "rmsd fitting..." )

        rms = []          ## rms value of each frame
        non_outliers = [] ## fraction of atoms considered for rms and fit
        iterations = []   ## number of iterations performed on each frame

        for i in range(0, len( self.frames) ):

            xyz = self.frames[i]

            if n_it != 1:
                (r, t), rmsdList = rmsFit.match( refxyz,
                                                 N.compress( mask, xyz, 0), n_it)
                iterations.append( len( rmsdList ) )
                non_outliers.append( rmsdList[-1][0] )

                xyz_transformed = N.dot( xyz, N.transpose(r)) + t

                rms += [ rmsdList[-1][1] ]

            else:
                r, t = rmsFit.findTransformation( refxyz,
                                                  N.compress( mask, xyz, 0))

                xyz_transformed = N.dot( xyz, N.transpose(r)) + t

                d = N.sqrt(N.sum(N.power( N.compress(mask, xyz_transformed,0)\
                                          - refxyz, 2), 1))


                rms += [ N.sqrt( N.average(d**2) ) ]


            if fit:
                self.frames[i] = xyz_transformed.astype(N.float32)

            if verbose and i%100 == 0:
                T.errWrite( '#' )

        self.setProfile( prof, rms, n_iterations=n_it, **profInfos )

        if non_outliers:
            self.setProfile( prof+'_considered', non_outliers,
                             n_iterations=n_it,
                             comment='fraction of atoms considered for iterative fit' )

        if verbose: T.errWrite( 'done\n' )
Esempio n. 6
0
    def getFluct_local( self, mask=None, border_res=1,
                        left_atoms=['C'], right_atoms=['N'], verbose=1 ):
        """
        Get mean displacement of each atom from it's average position after
        fitting of each residue to the reference backbone coordinates of itself
        and selected atoms of neighboring residues to the right and left.

        @param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation
                     should be calculated
        @type  mask: array
        @param border_res: number of neighboring residues to use for fitting
        @type  border_res: int
        @param left_atoms: atoms (names) to use from these neighbore residues
        @type  left_atoms: [str]
        @param right_atoms: atoms (names) to use from these neighbore residues
        @type  right_atoms: [str]

        @return: Numpy array ( N_unmasked x 1 ) of float
        @rtype: array
        """
        if mask is None:
            mask = N.ones( len( self.frames[0] ), N.int32 )

        if verbose: T.errWrite( "rmsd fitting per residue..." )

        residues = N.nonzero( self.ref.atom2resMask( mask ) )

        ## backbone atoms used for fit
        fit_atoms_right = N.nonzero( self.ref.mask( right_atoms ) )
        fit_atoms_left  = N.nonzero( self.ref.mask( left_atoms ) )
        ## chain index of each residue
        rchainMap = N.take( self.ref.chainMap(), self.ref.resIndex() )

        result = []

        for res in residues:

            i_res, i_border = self.__resWindow(res, border_res, rchainMap,
                                               fit_atoms_left, fit_atoms_right)

            try:
                if not len( i_res ): raise PDBError, 'empty residue'

                t_res = self.takeAtoms( i_res + i_border )

                i_center = range( len( i_res ) )

                mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy()

                ## fit with border atoms ..
                t_res.fit( ref=t_res.ref, mask=mask_BB, verbose=0 )
                ## .. but calculate only with center residue atoms
                frames = N.take( t_res.frames, i_center, 1 )

                avg = N.average( frames )

                rmsd = N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2) ))

                result.extend( rmsd )

                if verbose: T.errWrite('#')

            except ZeroDivisionError:
                result.extend( N.zeros( len(i_res), N.Float32 ) )
                T.errWrite('?' + str( res ))

        if verbose: T.errWriteln( "done" )

        return result
Esempio n. 7
0
    def fit(self,
            mask=None,
            ref=None,
            n_it=1,
            prof='rms',
            verbose=1,
            fit=1,
            **profInfos):
        """
        Superimpose all coordinate frames on reference coordinates. Put rms
        values in a profile. If n_it > 1, the fraction of atoms considered
        for the fit is put into a profile called |prof|_considered
        (i.e. by default 'rms_considered').

        @param mask: atom mask, atoms to consider default: [all]
        @type  mask: [1|0]
        @param ref: use as reference, default: None, average Structure
        @type  ref: PDBModel
        @param n_it: number of fit iterations, kicking out outliers on the way
                     1 -> classic single fit, 0 -> until convergence
                     (default: 1)
        @type  n_it: int
        @param prof: save rms per frame in profile of this name, ['rms']
        @type  prof: str
        @param verbose: print progress info to STDERR (default: 1)
        @type  verbose: 1|0
        @param fit: transform frames after match, otherwise just calc rms
                    (default: 1)          
        @type  fit: 1|0
        @param profInfos: additional key=value pairs for rms profile info []
        @type profInfos: key=value
        """
        if ref == None:
            refxyz = N.average(self.frames, 0)
        else:
            refxyz = ref.getXyz()

        if mask is None:
            mask = N.ones(len(refxyz), N.int32)

        refxyz = N.compress(mask, refxyz, 0)

        if verbose: T.errWrite("rmsd fitting...")

        rms = []  ## rms value of each frame
        non_outliers = []  ## fraction of atoms considered for rms and fit
        iterations = []  ## number of iterations performed on each frame

        for i in range(0, len(self.frames)):

            xyz = self.frames[i]

            if n_it != 1:
                (r, t), rmsdList = rmsFit.match(refxyz,
                                                N.compress(mask, xyz, 0), n_it)
                iterations.append(len(rmsdList))
                non_outliers.append(rmsdList[-1][0])

                xyz_transformed = N.dot(xyz, N.transpose(r)) + t

                rms += [rmsdList[-1][1]]

            else:
                r, t = rmsFit.findTransformation(refxyz,
                                                 N.compress(mask, xyz, 0))

                xyz_transformed = N.dot(xyz, N.transpose(r)) + t

                d = N.sqrt(N.sum(N.power( N.compress(mask, xyz_transformed,0)\
                                          - refxyz, 2), 1))

                rms += [N.sqrt(N.average(d**2))]

            if fit:
                self.frames[i] = xyz_transformed.astype(N.float32)

            if verbose and i % 100 == 0:
                T.errWrite('#')

        self.setProfile(prof, rms, n_iterations=n_it, **profInfos)

        if non_outliers:
            self.setProfile(
                prof + '_considered',
                non_outliers,
                n_iterations=n_it,
                comment='fraction of atoms considered for iterative fit')

        if verbose: T.errWrite('done\n')
Esempio n. 8
0
    def getFluct_local(self,
                       mask=None,
                       border_res=1,
                       left_atoms=['C'],
                       right_atoms=['N'],
                       verbose=1):
        """
        Get mean displacement of each atom from it's average position after
        fitting of each residue to the reference backbone coordinates of itself
        and selected atoms of neighboring residues to the right and left.

        @param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation
                     should be calculated
        @type  mask: array
        @param border_res: number of neighboring residues to use for fitting
        @type  border_res: int
        @param left_atoms: atoms (names) to use from these neighbore residues
        @type  left_atoms: [str]
        @param right_atoms: atoms (names) to use from these neighbore residues
        @type  right_atoms: [str]

        @return: Numpy array ( N_unmasked x 1 ) of float
        @rtype: array
        """
        if mask is None:
            mask = N.ones(len(self.frames[0]), N.int32)

        if verbose: T.errWrite("rmsd fitting per residue...")

        residues = N.nonzero(self.ref.atom2resMask(mask))

        ## backbone atoms used for fit
        fit_atoms_right = N.nonzero(self.ref.mask(right_atoms))
        fit_atoms_left = N.nonzero(self.ref.mask(left_atoms))
        ## chain index of each residue
        rchainMap = N.take(self.ref.chainMap(), self.ref.resIndex())

        result = []

        for res in residues:

            i_res, i_border = self.__resWindow(res, border_res, rchainMap,
                                               fit_atoms_left, fit_atoms_right)

            try:
                if not len(i_res): raise PDBError, 'empty residue'

                t_res = self.takeAtoms(i_res + i_border)

                i_center = range(len(i_res))

                mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy()

                ## fit with border atoms ..
                t_res.fit(ref=t_res.ref, mask=mask_BB, verbose=0)
                ## .. but calculate only with center residue atoms
                frames = N.take(t_res.frames, i_center, 1)

                avg = N.average(frames)

                rmsd = N.average(N.sqrt(N.sum(N.power(frames - avg, 2), 2)))

                result.extend(rmsd)

                if verbose: T.errWrite('#')

            except ZeroDivisionError:
                result.extend(N.zeros(len(i_res), N.Float32))
                T.errWrite('?' + str(res))

        if verbose: T.errWriteln("done")

        return result