예제 #1
파일: Complex.py 프로젝트: tybiot/biskit
    def contactResDistribution(self, cm=None):
        Count occurrence of residues in protein-protein interface.
        @param cm: pre-calculated contact matrix (default: None)
        @type  cm: matrix
        @return: dict {'A':3, 'C':1, .. } (20 standard amino acids)
        @rtype: dict
        if cm is None:
            cm = self.resContacts()

        ## get mask for residues involved in contacts
        maskLig = N0.sum(cm)
        maskRec = N0.sum(N0.transpose(cm))

        ## get sequence of contact residues only
        seqLig = N0.compress(maskLig, self.lig().sequence())
        seqRec = N0.compress(maskRec, self.rec().sequence())
        seq = ''.join(seqLig) + ''.join(seqRec)  ## convert back to string

        ## count occurrence of letters
        result = {}
        for aa in molUtils.allAA():
            result[aa] = seq.count(aa)

        return result
예제 #2
파일: Hmmer.py 프로젝트: graik/biskit
    def castHmmDic( self, hmmDic, repete, hmmGap, key ):
        Blow up hmmDic to the number of repetes of the profile used.
        Correct scores for possible deletions in the search sequence.

        @param hmmDic: dictionary from L{getHmmProfile}
        @type  hmmDic: dict
        @param repete: repete information from L{align}
        @type  repete: int
        @param hmmGap: information about gaps from L{align}
        @type  hmmGap: [int]
        @param key: name of scoring method to adjust for gaps and repetes
        @type  key: str
        @return: dictionary with information about the profile
        @rtype: dict        
        s = hmmDic[key]

        for i in range( repete ):
            mask = N0.ones( len(s) )
            N0.put( mask, hmmGap[i], 0 )
            if i == 0:
                score = N0.compress( mask, s, 0 )
            if i > 0:
                score = N0.concatenate( ( N0.compress( mask, s, 0 ), score ) )

        hmmDic[key] = score

        return hmmDic
예제 #3
    def castHmmDic(self, hmmDic, repete, hmmGap, key):
        Blow up hmmDic to the number of repetes of the profile used.
        Correct scores for possible deletions in the search sequence.

        @param hmmDic: dictionary from L{getHmmProfile}
        @type  hmmDic: dict
        @param repete: repete information from L{align}
        @type  repete: int
        @param hmmGap: information about gaps from L{align}
        @type  hmmGap: [int]
        @param key: name of scoring method to adjust for gaps and repetes
        @type  key: str
        @return: dictionary with information about the profile
        @rtype: dict        
        s = hmmDic[key]

        for i in range(repete):
            mask = N0.ones(len(s))
            N0.put(mask, hmmGap[i], 0)
            if i == 0:
                score = N0.compress(mask, s, 0)
            if i > 0:
                score = N0.concatenate((N0.compress(mask, s, 0), score))

        hmmDic[key] = score

        return hmmDic
예제 #4
파일: Complex.py 프로젝트: tybiot/biskit
    def __atomContacts(self, cutoff, rec_mask, lig_mask, cache):
        Intermolecular distances below cutoff after applying the two masks.
        @param cutoff: cutoff for B{atom-atom} contact in \AA
        @type  cutoff: float
        @param rec_mask: atom mask
        @type  rec_mask: [1|0]
        @param lig_mask: atom mask
        @type  lig_mask: [1|0]
        @param cache: cache pairwise atom distance matrix
        @type  cache: 1|0
        @return: atom contact matrix, array sum_rec_mask x sum_lig_mask
        @rtype: array
        ## get atom coordinats as array 3 x all_atoms
        rec_xyz = self.rec().getXyz()
        lig_xyz = self.lig().getXyz()

        ## get pair-wise distances -> atoms_rec x atoms_lig
        dist = getattr(self, 'pw_dist', None)
        if dist is None or \
               N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ):
            dist = self.__pairwiseDistances(N0.compress(rec_mask, rec_xyz, 0),
                                            N0.compress(lig_mask, lig_xyz, 0))
        if cache:
            self.pw_dist = dist

        ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig
        return N0.less(dist, cutoff)
예제 #5
    def takeFrames( self, indices ):
        Return a copy of the trajectory containing only the specified frames.

        @param indices: positions to take
        @type  indices: [int]

        @return: copy of this Trajectory (fewer frames, semi-deep copy of ref)
        @rtype: Trajectory
        ## remove out-of-bound indices
        indices = N0.compress( N0.less( indices, len( self.frames) ), indices )

        r = self.__class__()

        ## this step takes some time for large frames !
        r.frames = N0.take( self.frames, indices, 0 )

        ## semi-deep copy of reference model
        r.setRef( self.ref.take( range( self.ref.lenAtoms() )) )

        if self.frameNames is not None:
            r.frameNames = N0.take( self.frameNames, indices, 0 )
            r.frameNames = map( ''.join, r.frameNames.tolist() )

        r.pc = self.__takePca( indices )

        r.profiles = self.profiles.take( indices )

        r.resIndex = self.resIndex

        return r
예제 #6
    def residusMaximus( self, atomValues, mask=None ):
        Take list of value per atom, return list where all atoms of any
        residue are set to the highest value of any atom in that residue.
        (after applying mask)

        @param atomValues: list 1 x N, values per atom
        @type  atomValues: [ float ]
        @param mask: list 1 x N, 0|1, 'master' atoms of each residue
        @type  mask: [1|0]

        @return: Numpy array 1 x N of float
        @rtype: array
        if mask is None:
            mask = N0.ones( len( self.frames[0] ), N0.Int32 )

        ## eliminate all values that do not belong to the selected atoms
        masked = atomValues * mask

        result = []

        ## set all atoms of each residue to uniform value
        for res in range( 0, self.resMap()[-1]+1 ):

            ## get atom entries for this residue
            resAtoms = N0.compress( N0.equal( self.resMap(), res ), masked )

            ## get maximum value
            masterValue = max( resAtoms )

            result += resAtoms * 0.0 + masterValue

        return N0.array( result )
예제 #7
def logConfidence( x, R, clip=0 ):
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.

    @param x: observed value
    @type  x: float
    @param R: sample of random values
    @type  R: [float]
    @param clip: clip zeros at this value  0->don't clip (default: 0)
    @type  clip: float

    @return: confidence that x is not random, median of random distr.
    @rtype: (float, float)
    if clip and 0 in R:
        R = N0.clip( R, clip, max( R ) )
    if clip and x == 0:
        x = clip

    ## remove 0 instead of clipping
    R = N0.compress( R, R )
    if x == 0:
        return 0, 0

    ## get mean and stdv of log-transformed random sample
    alpha = N0.average( N0.log( R ) )

    n = len( R )

    beta = N0.sqrt(N0.sum(N0.power(N0.log( R ) - alpha, 2)) / (n - 1.))

    return logArea( x, alpha, beta ), logMedian( alpha )
예제 #8
    def pairwiseRmsd( self, aMask=None, noFit=0 ):
        Calculate rmsd between each 2 coordinate frames.

        @param aMask: atom mask
        @type  aMask: [1|0]
        @return: frames x frames array of float
        @rtype: array
        frames = self.frames

        if aMask is not None:
            frames = N0.compress( aMask, frames, 1 )

        result = N0.zeros( (len( frames ), len( frames )), N0.Float32 )

        for i in range(0, len( frames ) ):

            for j in range( i+1, len( frames ) ):
                if noFit:
                    d = N0.sqrt(N0.sum(N0.power(frames[i]-frames[j], 2), 1))
                    result[i,j] = result[j,i] = N0.sqrt( N0.average(d**2) )

                    rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 )
                    result[i,j] = result[j,i] = rmsdLst[0][1]

        return result
예제 #9
파일: hexTools.py 프로젝트: tybiot/biskit
def centerSurfDist(model, surf_mask, mask=None):
    Calculate the longest and shortest distance from
    the center of the molecule to the surface.

    @param mask: atoms not to be considerd (default: None)
    @type  mask: [1|0]
    @param surf_mask: atom surface mask, needed for minimum surface distance
    @type  surf_mask: [1|0]

    @return: max distance, min distance
    @rtype: float, float
    if mask is None:
        mask = model.maskHeavy()

    ## calculate center of mass
    center = model.centerOfMass()

    ## surface atom coordinates
    surf_xyz = N0.compress(mask * surf_mask, model.getXyz(), 0)

    ## find the atom closest and furthest away from center
    dist = N0.sqrt(N0.sum((surf_xyz - center)**2, 1))
    minDist = min(dist)
    maxDist = max(dist)

    return maxDist, minDist
예제 #10
파일: Complex.py 프로젝트: tybiot/biskit
    def __extractLigandMatrix(self, fcomplex):
        Compare structure from hex complex with original ligand pdb
        and store transformation matrix of ligand in self.ligandMatrix.
        @param fcomplex: pdb file with hex complex
        @type  fcomplex: complec
        @return: rotation matrix and translation matrix as tuple
        @rtype: (array, array)
        docked_pdb = self._extractLigandStructure(fcomplex)

        xyz_docked = N0.compress(docked_pdb.maskCA(), docked_pdb.xyz)
        xyz_template = N0.compress(self.lig_model.maskCA(), self.lig_model.xyz)

        (r, t) = self._findTransformation(xyz_docked, xyz_template)
        return (r, t)
예제 #11
    def phi_and_psi(self, model):
        Calculate phi and psi torsion angles for all
        residues in model::
          phi - rotation about the N-CA bond
              - last position in a chain = None
          psi - rotation about CA-C
              - first position in a chain = None          

        @param model: PDBModel
        @type  model: PDBModel 
        for c in range(model.lenChains(breaks=1)):
            cModel = model.takeChains([c], breaks=1)

            xyz = cModel.xyz

            xyz_CA = N0.compress(cModel.maskCA(), xyz, 0)
            xyz_N = N0.compress(cModel.mask(['N']), xyz, 0)
            xyz_C = N0.compress(cModel.mask(['C']), xyz, 0)

            ## phi: c1 - N
            ##      c2 - CA
            ##      c3 - C
            ##      c4 - N of next residue
            for i in range(len(xyz_N) - 1):
                self.phi += [
                    self.dihedral(xyz_N[i], xyz_CA[i], xyz_C[i], xyz_N[i + 1])
            self.phi += [None]

            ## psi: c1 - C of previous residue
            ##      c2 - N
            ##      c3 - CA
            ##      c4 - C
            self.psi += [None]
            for i in range(1, len(xyz_N)):
                self.psi += [
                    self.dihedral(xyz_C[i - 1], xyz_N[i], xyz_CA[i], xyz_C[i])
예제 #12
    def plotContactDensity(self, step=1, cutoff=4.5):
        Example. plot histogramm of contact density. Somehing wrong??

        @raise ComplexTrajError: if gnuplot program is not installed
        if not gnuplot.installed:
            raise ComplexTrajError, 'gnuplot program is not installed'
        r = self.averageContacts(step, cutoff)
        r = N0.ravel(r)
        r = N0.compress(r, r)
        gnuplot.plot(hist.density(r, 10))
예제 #13
def compareSequences(seqAA_1, seqAA_2):
    seqAA_1 = list(seqAA_1)
    seqAA_2 = list(seqAA_2)
    seqNr_1 = range(len(seqAA_1))
    seqNr_2 = range(len(seqAA_2))

    # get mask
    mask_1 = N0.zeros(len(seqNr_1))
    mask_2 = N0.zeros(len(seqNr_2))

    # compare sequences
    seqDiff = getOpCodes(seqAA_1, seqAA_2)

    # get delete lists
    del_1, del_2 = getSkipLists(seqDiff)

    del_1 = [expandRepeats(seqAA_1, *pos) for pos in del_1]
    del_2 = [expandRepeats(seqAA_2, *pos) for pos in del_2]

    mask1 = del2mask(seqAA_1, *del_1)
    mask2 = del2mask(seqAA_2, *del_2)

    seqAA_1 = N0.compress(mask1, seqAA_1).tolist()
    seqNr_1 = N0.compress(mask1, seqNr_1).tolist()
    seqAA_2 = N0.compress(mask2, seqAA_2).tolist()
    seqNr_2 = N0.compress(mask2, seqNr_2).tolist()

    # get equal parts
    seqDiff = getOpCodes(seqAA_1, seqAA_2)
    equal_1, equal_2 = getEqualLists(seqDiff)
    seqAA_1, seqNr_1 = getEqual(seqAA_1, seqNr_1, equal_1)
    seqAA_2, seqNr_2 = getEqual(seqAA_2, seqNr_2, equal_2)

    N0.put(mask_1, seqNr_1, 1)
    N0.put(mask_2, seqNr_2, 1)

    return mask_1, mask_2
예제 #14
    def pca( self, atomMask=None, frameMask=None, fit=1 ):
        Calculate principal components of trajectory frames.

        @param atomMask: 1 x N_atoms, [111001110..] atoms to consider
                         (default: all)
        @type  atomMask: [1|0]
        @param frameMask: 1 x N_frames, [001111..] frames to consider
                          (default all )
        @type  frameMask: [1|0]

        @return: (N_frames x N_frames), (1 x N_frames),
                 projection of each frame in PC space, eigenvalue of each PC
        @rtype: array, array, array
        if frameMask is None: frameMask = N0.ones( len( self.frames ), N0.Int32 )

        if atomMask is None: atomMask = N0.ones(self.getRef().lenAtoms(),

        if fit:
            self.fit( atomMask )

        refxyz = N0.average( self.frames, 0 )

        data = N0.compress( frameMask, self.frames, 0 )

        data = data - refxyz

        data = N0.compress( atomMask, data, 1 )

        ## reduce to 2D array
        data = N0.array( map( N0.ravel, data ) )

        V, L, U = LA.svd( data )

        return U, V * L, N0.power(L, 2)
예제 #15
파일: Benchmark.py 프로젝트: graik/biskit
    def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model):
        Takes the two fitted structures (with and without iterative fitting),
        the known structure (reference), and the associated model inside the
        pdb_list. Calculates the different RMSD and set the profiles

        @param fitted_model_if: itteratively fitted model
        @type  fitted_model_if: PDBModel
        @param fitted_model_wo_if: normaly fitted model
        @type  fitted_model_wo_if: PDBModel
        @param reference: reference model
        @type  reference: PDBModel
        @param model: model
        @type  model: PDBModel
        ## first calculate rmsd for heavy atoms and CA without
        ## removing any residues from the model
        mask_CA = fitted_model_wo_if.maskCA()

        rmsd_aa = fitted_model_wo_if.rms( reference, fit=0 )
        rmsd_ca = fitted_model_wo_if.rms( reference, mask=mask_CA, fit=1 )

        model.info["rmsd2ref_aa_wo_if"] = rmsd_aa
        model.info["rmsd2ref_ca_wo_if"] = rmsd_ca

        outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers"))

        ## Now remove the residues that were outliers in the iterative fit
        ## and calculate the rmsd again
        fitted_model_if = fitted_model_if.compress( outliers_mask )
        reference = reference.compress( outliers_mask )

        mask_CA = fitted_model_if.maskCA()

        rmsd_aa_if = fitted_model_if.rms( reference, fit=0 )
        rmsd_ca_if = fitted_model_if.rms( reference, mask=mask_CA, fit=1 )

        model.info["rmsd2ref_aa_if"] = rmsd_aa_if
        model.info["rmsd2ref_ca_if"] = rmsd_ca_if
        model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \
                                                 - N0.sum(outliers_mask)) / len(outliers_mask)
        model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \
                                                 - N0.sum(N0.compress(mask_CA, outliers_mask))) \
             / N0.sum(mask_CA)
예제 #16
    def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model):
        Takes the two fitted structures (with and without iterative fitting),
        the known structure (reference), and the associated model inside the
        pdb_list. Calculates the different RMSD and set the profiles

        @param fitted_model_if: itteratively fitted model
        @type  fitted_model_if: PDBModel
        @param fitted_model_wo_if: normaly fitted model
        @type  fitted_model_wo_if: PDBModel
        @param reference: reference model
        @type  reference: PDBModel
        @param model: model
        @type  model: PDBModel
        ## first calculate rmsd for heavy atoms and CA without
        ## removing any residues from the model
        mask_CA = fitted_model_wo_if.maskCA()

        rmsd_aa = fitted_model_wo_if.rms(reference, fit=0)
        rmsd_ca = fitted_model_wo_if.rms(reference, mask=mask_CA, fit=1)

        model.info["rmsd2ref_aa_wo_if"] = rmsd_aa
        model.info["rmsd2ref_ca_wo_if"] = rmsd_ca

        outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers"))

        ## Now remove the residues that were outliers in the iterative fit
        ## and calculate the rmsd again
        fitted_model_if = fitted_model_if.compress(outliers_mask)
        reference = reference.compress(outliers_mask)

        mask_CA = fitted_model_if.maskCA()

        rmsd_aa_if = fitted_model_if.rms(reference, fit=0)
        rmsd_ca_if = fitted_model_if.rms(reference, mask=mask_CA, fit=1)

        model.info["rmsd2ref_aa_if"] = rmsd_aa_if
        model.info["rmsd2ref_ca_if"] = rmsd_ca_if
        model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \
                                                 - N0.sum(outliers_mask)) / len(outliers_mask)
        model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \
                                                 - N0.sum(N0.compress(mask_CA, outliers_mask))) \
             / N0.sum(mask_CA)
예제 #17
    def getFluct_global( self, mask=None ):
        Get RMS of each atom from it's average position in trajectory.
        The frames should be superimposed (fit() ) to a reference.

        @param mask: N x 1 list/Numpy array of 0|1, (N=atoms),
                     atoms to be considered.
        @type  mask: [1|0]

        @return: Numpy array ( N_unmasked x 1 ) of float.
        @rtype: array
        frames = self.frames
        if mask is not None:
            frames = N0.compress( mask, frames, 1 )

        ## mean position of each atom in all frames
        avg = N0.average( frames )

        return N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) ))
예제 #18
    def test_Ramachandran(self):
        """Ramachandran test"""
        self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat')

        self.traj.ref.atoms.set('mass', self.traj.ref.masses())

        self.mdl = [self.traj[0], self.traj[11]]
        self.mdl = [md.compress(md.maskProtein()) for md in self.mdl]

        self.rama = Ramachandran(self.mdl,

        self.psi = N0.array(self.rama.psi)

        if self.local:

        r = N0.sum(
            N0.compress(N0.logical_not(N0.equal(self.psi, None)), self.psi))
        self.assertAlmostEqual(r, -11717.909796797909, 2)
예제 #19
    def addDensity(self, radius=6, minasa=None, profName='density'):
        Count the number of heavy atoms within the given radius.
        Values are only collected for atoms with |minasa| accessible surface

        @param minasa: relative exposed surface - 0 to 100%
        @type  minasa: float
        @param radius: in Angstrom
        @type  radius: float
        mHeavy = self.m.maskHeavy()

        xyz = N0.compress(mHeavy, self.m.getXyz(), 0)

        if minasa and self.m.profile('relAS', 0) == 0:

        if minasa:
            mSurf = self.m.profile2mask('relAS', minasa)
            mSurf = N0.ones(self.m.lenAtoms())

        ## loop over all surface atoms
        surf_pos = N0.nonzero(mSurf)
        contacts = []

        for i in surf_pos:
            dist = N0.sum((xyz - self.m.xyz[i])**2, 1)
            contacts += [N0.sum(N0.less(dist, radius**2)) - 1]

                         comment='atom density radius %3.1fA' % radius,
                         version=T.dateString() + ' ' + self.version())
예제 #20
    def go(self, model_list=None, reference=None):
        Run benchmarking.

        @param model_list: list of models
                           (default: None S{->} outFolder/L{F_PDBModels})
        @type  model_list: ModelList
        @param reference: reference model
                        (default: None S{->} outFolder/L{F_INPUT_REFERENCE})
        @type  reference: PDBModel
        model_list = model_list or self.outFolder + self.F_PDBModels
        reference = reference or self.outFolder + self.F_INPUT_REFERENCE

        pdb_list = T.load('%s' % model_list)
        reference = PDBModel(reference)

        # check with python 2.4
        iref, imodel = reference.compareAtoms(pdb_list[0])

        mask_casting = N0.zeros(len(pdb_list[0]))
        N0.put(mask_casting, imodel, 1)

        reference = reference.take(iref)
        #reference_mask_CA = reference_rmsd.maskCA()

        atom_mask = N0.zeros(len(pdb_list[0]))
        N0.put(atom_mask, imodel, 1)

        rmask = pdb_list[0].profile2mask("n_templates", 1, 1000)
        amask = pdb_list[0].res2atomMask(rmask)

        mask_final_ref = N0.compress(mask_casting, amask)
        mask_final = mask_casting * amask

        reference = reference.compress(mask_final_ref)

        for i in range(len(pdb_list)):

            #self.cad(reference, pdb_list[i])

            pdb_list[i], pdb_wo_if = self.output_fittedStructures(\
                pdb_list[i], reference, i, mask_final)

            fitted_model_if = pdb_list[i].compress(mask_final)
            fitted_model_wo_if = pdb_wo_if.compress(mask_final)

            coord1 = reference.getXyz()
            coord2 = fitted_model_if.getXyz()

            aprofile = self.rmsd_res(coord1, coord2)

            self.calc_rmsd(fitted_model_if, fitted_model_wo_if, reference,

                                  comment="rmsd to known reference structure")


예제 #21
    def _removeDuplicateChains(self, chainMask=None):
        Get rid of identical chains by comparing all chains with Blast2seq.

        @param chainMask: chain mask for overriding the
                          chain identity checking (default: None)
        @type  chainMask: [int]
        @return: number of chains removed
        @rtype: int
        chainCount = len(self.chains)
        matrix = 1.0 * N0.zeros((chainCount, chainCount))
        chain_ids = []

        ## create identity matrix for all chains against all chains
        for i in range(0, chainCount):
            chain_ids = chain_ids + [self.chains[i].chain_id
                                     ]  # collect for log file
            for j in range(i, len(self.chains)):

                # convert 3-letter-code res list into 1-letter-code String
                seq1 = singleAA(self.chains[i].sequence())
                seq2 = singleAA(self.chains[j].sequence())

                ##                 if len(seq1) > len(seq2):           # take shorter sequence
                ##                 # aln len at least half the len of the shortest sequence
                ##                     alnCutoff = len(seq2) * 0.5
                ##                 else:
                ##                     alnCutoff = len(seq1) * 0.5
                ##                 if id['aln_len'] > alnCutoff:
                ##                     matrix[i,j] = id['aln_id']
                ##                 else:                           # aln length too short, ignore
                ##                     matrix[i,j] = 0

                matrix[i, j] = self._compareSequences(seq1, seq2)

        ## report activity
        self.log.add("\n  Chain ID's of compared chains: " + str(chain_ids))
        self.log.add("  Cross-Identity between chains:\n" + str(matrix))
        self.log.add("  Identity threshold used: " + str(self.threshold))

        ## override the automatic chain deletion by supplying a
        ## chain mask to this function
        if chainMask:
            if len(chainMask) == chainCount:
                self.chains = N0.compress(chainMask, self.chains)
                    "NOTE: chain mask %s used for removing chains.\n" %

                self.log.add("########## ERROR ###############")
                self.log.add("# Chain mask is only %i chains long" %
                self.log.add("# when a mask of length %i is needed" %
                self.log.add("# No cleaning will be performed.\n")

        if not chainMask:
            ## look at diagonals in "identity matrix"
            ## (each chain against each)
            duplicate = len(self.chains)
            for offset in range(1, chainCount):
                diag = N0.diagonal(matrix, offset, 0, 1)
                # diagonal of 1's mark begin of duplicate
                avg = 1.0 * N0.sum(diag) / len(diag)
                if (avg >= self.threshold):
                    duplicate = offset
            self.chains = self.chains[:duplicate]
                "NOTE: Identity matrix will be used for removing identical chains."

        ## report activit
        self.log.add(str(chainCount - len(self.chains))+\
                     " chains have been removed.\n")

        # how many chains have been removed?
        return (chainCount - len(self.chains))
예제 #22
파일: rmsFit.py 프로젝트: tybiot/biskit
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05):
    Matches two arrays onto each other, while iteratively removing outliers.
    Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }.

    @param n_iterations: number of calculations::
                           1 .. no iteration 
                           0 .. until convergence
    @type  n_iterations: 1|0
    @param z: number of standard deviations for outlier definition (default: 2)
    @type  z: float
    @param eps_rmsd: tolerance in rmsd (default: 0.5)
    @type  eps_rmsd: float
    @param eps_stdv: tolerance in standard deviations (default: 0.05)
    @type  eps_stdv: float

    @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ]
    @rtype: (array, array), [float, float, int]
    iter_trace = []

    rmsd_old = 0
    stdv_old = 0

    n = 0
    converged = 0

    mask = N0.ones(len(y), N0.Int32)

    while not converged:

        ## find transformation for best match
        r, t = findTransformation(N0.compress(mask, x, 0),
                                  N0.compress(mask, y, 0))

        ## transform coordinates
        xt = N0.dot(y, N0.transpose(r)) + t

        ## calculate row distances
        d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask

        ## calculate rmsd and stdv
        rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2))
        stdv = MU.SD(N0.compress(mask, d))

        ## check conditions for convergence
        d_rmsd = abs(rmsd - rmsd_old)
        d_stdv = abs(1 - stdv_old / stdv)

        if d_rmsd < eps_rmsd and d_stdv < eps_stdv:
            converged = 1
            rmsd_old = rmsd
            stdv_old = stdv

        ## store result
        perc = round(float(N0.sum(mask)) / float(len(mask)), 2)

        ## throw out non-matching rows
        mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv))
        outliers = N0.nonzero(N0.logical_not(mask))
        iter_trace.append([perc, round(rmsd, 3), outliers])

        n += 1

        if n_iterations and n >= n_iterations:

    return (r, t), iter_trace
예제 #23
    surf_lig = lig.profile2mask('MS', 0.0001, 101)

    ## kick out non-surface
    rec = rec.compress(surf_rec)
    lig = lig.compress(surf_lig)

    com = Complex(rec, lig)

    ## get interface patch
    cont = com.atomContacts(cutoff=6.0)
    rec_if = N0.sum(cont, 1)
    lig_if = N0.sum(cont, 0)

    ## center distance
    c2c = N0.sqrt(N0.sum((rec.center() - lig.center())**2, 0))
    print "Center2Center: ", c2c

    ## get patches and put them into Pymoler for display
    print "Patching"
    excl = N0.compress(N0.ones(len(rec_if)), rec_if)
    pm = test(rec, c2c, nAtoms=len(N0.nonzero(rec_if)), exclude=rec_if)

    pm.addPdb(rec.compress(rec_if), 'rec_interface')
    pm.addPdb(lig.compress(lig_if), 'lig_interface')
    pm.addPdb(com.model(), 'complex')

    ## show everything
    ## the patches are as movie in 'model'
예제 #24
class ReduceCoordinates:
    Translate a PDBModel or frames from a trajectory to structure(s) with
    only one backbone and up to 2 side chain atoms per residue.
    The new atoms are the centers of mass of several atoms and carry the
    weight of the pooled atoms in an atom profile called 'mass'.

      >>> ## create with reference PDBModel
      >>> reducer = ReduceCoordinates( m_ref )
      >>> ## creates reduced PDBModel from m_ref
      >>> m_red = reducer.reduceToModel()

      >>> m_red_1 = reducer.reduceToModel( m1.getXyz() ) ## reduce many models
      >>> m_red_2 = reducer.reduceToModel( m2.getXyz() ) ## with identical atoms

      >>> ## reduce a complete Trajectory
      >>> reducer = ReduceCoordinates( traj.ref )
      >>> red_ref= reducer.reduceToModel()
      >>> frames = reducer.reduceXyz( traj.frames )
      >>> traj_red = Trajectory( ref=red_ref )
      >>> traj_red.frames = frames
    ## modify order of TYR/PHE ring atoms to move centers away from ring axis
    aaAtoms = MU.aaAtoms
    aaAtoms['TYR'] = [
        'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CE1', 'CD2', 'CE2', 'CZ',
        'OH', 'OXT'
    aaAtoms['PHE'] = [
        'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CE1', 'CD2', 'CE2', 'CZ',

    def __init__(self, model, maxPerCenter=4):
        Prepare reduction of coordinates from a given model.
        @param model: reference model defining atom content and order
        @type  model: PDBModel
        @param maxPerCenter: max number of atoms per side chain center atom
                             (default: 4)
        @type  maxPerCenter: int
        self.m = model

        ## sort atoms within residues into standard order
        def cmpAtoms(a1, a2):
            Comparison function for bringing atoms into standard order
            within residues as defined by L{ aaAtoms }.

            @param a1: model
            @type  a1: PDBModel
            @param a2: model
            @type  a2: PDBModel

            @return: int or list of matching positions
            @rtype: [-1|0|1]            
            res = a1['residue_name']
            target = self.aaAtoms[res]
                return cmp(target.index(a1['name']), target.index(a2['name']))
            except ValueError, why:
                return cmp(a1['name'], a2['name'])
##                 s = "Unknown atom for %s %i: %s or %s" % \
##                     (res, a1['residue_number'], a1['name'], a2['name'] )
##                 raise PDBError( s )

        self.a_indices = self.m.argsort(cmpAtoms)
        self.m_sorted = self.m.sort(self.a_indices)

        ## remove H from internal model and from list of atom positions
        maskH = self.m_sorted.remove(self.m_sorted.maskH())
        self.a_indices = N0.compress(maskH, self.a_indices)

예제 #25
    def makeMap(self, maxPerCenter=4):
        Calculate mapping between complete and reduced atom list.
        Creates a (list of lists of int, list of atom dictionaries)
        containing groups of atom indices into original model, new center atoms
        @param maxPerCenter: max number of atoms per side chain center atom
                             (default: 4)
        @type  maxPerCenter: int

        resIndex = self.m_sorted.resIndex()
        resModels = self.m_sorted.resModels()
        m = self.m_sorted

        self.currentAtom = 0

        groups = []
        atoms = DictList()

        for i in range(len(resIndex)):

            first_atom = resIndex[i]

            if i < len(resIndex) - 1:
                last_atom = resIndex[i + 1] - 1
                last_atom = len(self.a_indices) - 1

            a = m.atoms[first_atom]

            ##             res_name  = m.atoms[ first_atom ]['residue_name']
            ##             segid     = m.atoms[ first_atom ]['segment_id']
            ##             chainId   = m.atoms[ first_atom ]['chain_id']
            ##             res_number= m.atoms[ first_atom ]['serial_number']

            ## position of this residue's atoms in original PDBModel (unsorted)
            a_indices = self.a_indices[first_atom:last_atom + 1]

            ## for each center create list of atom indices and a center atom
            if a['residue_name'] != 'GLY' and a['residue_name'] != 'ALA':

                bb_a_indices = N0.compress(resModels[i].maskBB(), a_indices)
                sc_a_indices = N0.compress(
                    N0.logical_not(resModels[i].maskBB()), a_indices)

                sc_groups = self.group(sc_a_indices, maxPerCenter)

                bb_a_indices = a_indices
                sc_groups = []

            groups += [bb_a_indices]
            atoms += [self.nextAtom(a, 'BB')]

            i = 0
            for g in sc_groups:
                groups += [g]
                atoms += [self.nextAtom(a, 'SC%i' % i)]
                i += 1

        self.groups = groups
        self.atoms = atoms
예제 #26
    surf_lig = lig.profile2mask( 'MS', 0.0001, 101 )

    ## kick out non-surface
    rec = rec.compress( surf_rec )
    lig = lig.compress( surf_lig )

    com = Complex( rec, lig )

    ## get interface patch
    cont = com.atomContacts( cutoff=6.0 )
    rec_if = N0.sum( cont, 1 )
    lig_if = N0.sum( cont, 0 )

    ## center distance
    c2c = N0.sqrt( N0.sum( (rec.center() - lig.center())**2, 0 ) )
    print "Center2Center: ", c2c

    ## get patches and put them into Pymoler for display
    print "Patching"
    excl = N0.compress( N0.ones( len( rec_if ) ), rec_if )
    pm = test( rec, c2c, nAtoms=len(N0.nonzero(rec_if)), exclude=rec_if )

    pm.addPdb( rec.compress( rec_if ), 'rec_interface' )
    pm.addPdb( lig.compress( lig_if ), 'lig_interface' )
    pm.addPdb( com.model(), 'complex')

    ## show everything
    ## the patches are as movie in 'model' 
예제 #27
    def fit( self, mask=None, ref=None, n_it=1,
             prof='rms', verbose=1, fit=1, **profInfos ):
        Superimpose all coordinate frames on reference coordinates. Put rms
        values in a profile. If n_it > 1, the fraction of atoms considered
        for the fit is put into a profile called |prof|_considered
        (i.e. by default 'rms_considered').

        @param mask: atom mask, atoms to consider default: [all]
        @type  mask: [1|0]
        @param ref: use as reference, default: None, average Structure
        @type  ref: PDBModel
        @param n_it: number of fit iterations, kicking out outliers on the way
                     1 -> classic single fit, 0 -> until convergence
                     (default: 1)
        @type  n_it: int
        @param prof: save rms per frame in profile of this name, ['rms']
        @type  prof: str
        @param verbose: print progress info to STDERR (default: 1)
        @type  verbose: 1|0
        @param fit: transform frames after match, otherwise just calc rms
                    (default: 1)          
        @type  fit: 1|0
        @param profInfos: additional key=value pairs for rms profile info []
        @type profInfos: key=value
        if ref is None:
            refxyz = N0.average( self.frames, 0 )
            refxyz = ref.getXyz()

        if mask is None:
            mask = N0.ones( len( refxyz ), N0.Int32 )

        refxyz = N0.compress( mask, refxyz, 0 )

        if verbose: T.errWrite( "rmsd fitting..." )

        rms = []          ## rms value of each frame
        non_outliers = [] ## fraction of atoms considered for rms and fit
        iterations = []   ## number of iterations performed on each frame

        for i in range(0, len( self.frames) ):

            xyz = self.frames[i]

            if n_it != 1:
                (r, t), rmsdList = rmsFit.match( refxyz,
                                                 N0.compress( mask, xyz, 0), n_it)
                iterations.append( len( rmsdList ) )
                non_outliers.append( rmsdList[-1][0] )

                xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t

                rms += [ rmsdList[-1][1] ]

                r, t = rmsFit.findTransformation( refxyz,
                                                  N0.compress( mask, xyz, 0))

                xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t

                d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\
                                          - refxyz, 2), 1))

                rms += [ N0.sqrt( N0.average(d**2) ) ]

            if fit:
                self.frames[i] = xyz_transformed.astype(N0.Float32)

            if verbose and i%100 == 0:
                T.errWrite( '#' )

        self.setProfile( prof, rms, n_iterations=n_it, **profInfos )

        if non_outliers:
            self.setProfile( prof+'_considered', non_outliers,
                             comment='fraction of atoms considered for iterative fit' )

        if verbose: T.errWrite( 'done\n' )
예제 #28
파일: Benchmark.py 프로젝트: graik/biskit
    def go(self, model_list = None, reference = None):
        Run benchmarking.

        @param model_list: list of models
                           (default: None S{->} outFolder/L{F_PDBModels})
        @type  model_list: ModelList
        @param reference: reference model
                        (default: None S{->} outFolder/L{F_INPUT_REFERENCE})
        @type  reference: PDBModel
        model_list = model_list or self.outFolder + self.F_PDBModels
        reference = reference or self.outFolder + self.F_INPUT_REFERENCE

        pdb_list = T.load('%s'%model_list)
        reference = PDBModel(reference)

        # check with python 2.4
        iref, imodel = reference.compareAtoms(pdb_list[0])

        mask_casting = N0.zeros(len(pdb_list[0]))
        N0.put(mask_casting, imodel, 1)

        reference = reference.take(iref)
        #reference_mask_CA = reference_rmsd.maskCA()

        atom_mask = N0.zeros(len(pdb_list[0]))

        rmask = pdb_list[0].profile2mask("n_templates", 1,1000)
        amask = pdb_list[0].res2atomMask(rmask)

        mask_final_ref = N0.compress(mask_casting, amask)
        mask_final = mask_casting * amask

        reference = reference.compress(mask_final_ref)

        for i in range(len(pdb_list)):

            #self.cad(reference, pdb_list[i])

            pdb_list[i], pdb_wo_if = self.output_fittedStructures(\
                pdb_list[i], reference, i, mask_final)

            fitted_model_if = pdb_list[i].compress(mask_final)
            fitted_model_wo_if = pdb_wo_if.compress(mask_final)

            coord1 = reference.getXyz()
            coord2 = fitted_model_if.getXyz()

            aprofile = self.rmsd_res(coord1,coord2)

            self.calc_rmsd(fitted_model_if, fitted_model_wo_if,
                           reference, pdb_list[i])

            pdb_list[i].atoms.set('rmsd2ref_if', aprofile,
                                  mask=mask_final, default = -1,
                                  comment="rmsd to known reference structure")

