예제 #1
0
파일: Complex.py 프로젝트: tybiot/biskit
    def contactResDistribution(self, cm=None):
        """
        Count occurrence of residues in protein-protein interface.
        
        @param cm: pre-calculated contact matrix (default: None)
        @type  cm: matrix
        
        @return: dict {'A':3, 'C':1, .. } (20 standard amino acids)
        @rtype: dict
        """
        if cm is None:
            cm = self.resContacts()

        ## get mask for residues involved in contacts
        maskLig = N0.sum(cm)
        maskRec = N0.sum(N0.transpose(cm))

        ## get sequence of contact residues only
        seqLig = N0.compress(maskLig, self.lig().sequence())
        seqRec = N0.compress(maskRec, self.rec().sequence())
        seq = ''.join(seqLig) + ''.join(seqRec)  ## convert back to string

        ## count occurrence of letters
        result = {}
        for aa in molUtils.allAA():
            result[aa] = seq.count(aa)

        return result
예제 #2
0
파일: Hmmer.py 프로젝트: graik/biskit
    def castHmmDic( self, hmmDic, repete, hmmGap, key ):
        """
        Blow up hmmDic to the number of repetes of the profile used.
        Correct scores for possible deletions in the search sequence.

        @param hmmDic: dictionary from L{getHmmProfile}
        @type  hmmDic: dict
        @param repete: repete information from L{align}
        @type  repete: int
        @param hmmGap: information about gaps from L{align}
        @type  hmmGap: [int]
        @param key: name of scoring method to adjust for gaps and repetes
        @type  key: str
        
        @return: dictionary with information about the profile
        @rtype: dict        
        """
        s = hmmDic[key]

        for i in range( repete ):
            mask = N0.ones( len(s) )
            N0.put( mask, hmmGap[i], 0 )
            if i == 0:
                score = N0.compress( mask, s, 0 )
            if i > 0:
                score = N0.concatenate( ( N0.compress( mask, s, 0 ), score ) )

        hmmDic[key] = score

        return hmmDic
예제 #3
0
    def castHmmDic(self, hmmDic, repete, hmmGap, key):
        """
        Blow up hmmDic to the number of repetes of the profile used.
        Correct scores for possible deletions in the search sequence.

        @param hmmDic: dictionary from L{getHmmProfile}
        @type  hmmDic: dict
        @param repete: repete information from L{align}
        @type  repete: int
        @param hmmGap: information about gaps from L{align}
        @type  hmmGap: [int]
        @param key: name of scoring method to adjust for gaps and repetes
        @type  key: str
        
        @return: dictionary with information about the profile
        @rtype: dict        
        """
        s = hmmDic[key]

        for i in range(repete):
            mask = N0.ones(len(s))
            N0.put(mask, hmmGap[i], 0)
            if i == 0:
                score = N0.compress(mask, s, 0)
            if i > 0:
                score = N0.concatenate((N0.compress(mask, s, 0), score))

        hmmDic[key] = score

        return hmmDic
예제 #4
0
파일: Complex.py 프로젝트: tybiot/biskit
    def __atomContacts(self, cutoff, rec_mask, lig_mask, cache):
        """
        Intermolecular distances below cutoff after applying the two masks.
        
        @param cutoff: cutoff for B{atom-atom} contact in \AA
        @type  cutoff: float
        @param rec_mask: atom mask
        @type  rec_mask: [1|0]
        @param lig_mask: atom mask
        @type  lig_mask: [1|0]
        @param cache: cache pairwise atom distance matrix
        @type  cache: 1|0
        
        @return: atom contact matrix, array sum_rec_mask x sum_lig_mask
        @rtype: array
        """
        ## get atom coordinats as array 3 x all_atoms
        rec_xyz = self.rec().getXyz()
        lig_xyz = self.lig().getXyz()

        ## get pair-wise distances -> atoms_rec x atoms_lig
        dist = getattr(self, 'pw_dist', None)
        if dist is None or \
               N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ):
            dist = self.__pairwiseDistances(N0.compress(rec_mask, rec_xyz, 0),
                                            N0.compress(lig_mask, lig_xyz, 0))
        if cache:
            self.pw_dist = dist

        ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig
        return N0.less(dist, cutoff)
예제 #5
0
    def takeFrames( self, indices ):
        """
        Return a copy of the trajectory containing only the specified frames.

        @param indices: positions to take
        @type  indices: [int]

        @return: copy of this Trajectory (fewer frames, semi-deep copy of ref)
        @rtype: Trajectory
        """
        ## remove out-of-bound indices
        indices = N0.compress( N0.less( indices, len( self.frames) ), indices )

        r = self.__class__()

        ## this step takes some time for large frames !
        r.frames = N0.take( self.frames, indices, 0 )

        ## semi-deep copy of reference model
        r.setRef( self.ref.take( range( self.ref.lenAtoms() )) )

        if self.frameNames is not None:
            r.frameNames = N0.take( self.frameNames, indices, 0 )
            r.frameNames = map( ''.join, r.frameNames.tolist() )

        r.pc = self.__takePca( indices )

        r.profiles = self.profiles.take( indices )

        r.resIndex = self.resIndex

        return r
예제 #6
0
    def residusMaximus( self, atomValues, mask=None ):
        """
        Take list of value per atom, return list where all atoms of any
        residue are set to the highest value of any atom in that residue.
        (after applying mask)

        @param atomValues: list 1 x N, values per atom
        @type  atomValues: [ float ]
        @param mask: list 1 x N, 0|1, 'master' atoms of each residue
        @type  mask: [1|0]

        @return: Numpy array 1 x N of float
        @rtype: array
        """
        if mask is None:
            mask = N0.ones( len( self.frames[0] ), N0.Int32 )

        ## eliminate all values that do not belong to the selected atoms
        masked = atomValues * mask

        result = []

        ## set all atoms of each residue to uniform value
        for res in range( 0, self.resMap()[-1]+1 ):

            ## get atom entries for this residue
            resAtoms = N0.compress( N0.equal( self.resMap(), res ), masked )

            ## get maximum value
            masterValue = max( resAtoms )

            result += resAtoms * 0.0 + masterValue

        return N0.array( result )
예제 #7
0
def logConfidence( x, R, clip=0 ):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.

    @param x: observed value
    @type  x: float
    @param R: sample of random values
    @type  R: [float]
    @param clip: clip zeros at this value  0->don't clip (default: 0)
    @type  clip: float

    @return: confidence that x is not random, median of random distr.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N0.clip( R, clip, max( R ) )
    if clip and x == 0:
        x = clip

    ## remove 0 instead of clipping
    R = N0.compress( R, R )
    if x == 0:
        return 0, 0

    ## get mean and stdv of log-transformed random sample
    alpha = N0.average( N0.log( R ) )

    n = len( R )

    beta = N0.sqrt(N0.sum(N0.power(N0.log( R ) - alpha, 2)) / (n - 1.))

    return logArea( x, alpha, beta ), logMedian( alpha )
예제 #8
0
    def pairwiseRmsd( self, aMask=None, noFit=0 ):
        """
        Calculate rmsd between each 2 coordinate frames.

        @param aMask: atom mask
        @type  aMask: [1|0]
        @return: frames x frames array of float
        @rtype: array
        """
        frames = self.frames

        if aMask is not None:
            frames = N0.compress( aMask, frames, 1 )

        result = N0.zeros( (len( frames ), len( frames )), N0.Float32 )

        for i in range(0, len( frames ) ):

            for j in range( i+1, len( frames ) ):
                if noFit:
                    d = N0.sqrt(N0.sum(N0.power(frames[i]-frames[j], 2), 1))
                    result[i,j] = result[j,i] = N0.sqrt( N0.average(d**2) )

                else:
                    rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 )
                    result[i,j] = result[j,i] = rmsdLst[0][1]

        return result
예제 #9
0
파일: hexTools.py 프로젝트: tybiot/biskit
def centerSurfDist(model, surf_mask, mask=None):
    """
    Calculate the longest and shortest distance from
    the center of the molecule to the surface.

    @param mask: atoms not to be considerd (default: None)
    @type  mask: [1|0]
    @param surf_mask: atom surface mask, needed for minimum surface distance
    @type  surf_mask: [1|0]

    @return: max distance, min distance
    @rtype: float, float
    """
    if mask is None:
        mask = model.maskHeavy()

    ## calculate center of mass
    center = model.centerOfMass()

    ## surface atom coordinates
    surf_xyz = N0.compress(mask * surf_mask, model.getXyz(), 0)

    ## find the atom closest and furthest away from center
    dist = N0.sqrt(N0.sum((surf_xyz - center)**2, 1))
    minDist = min(dist)
    maxDist = max(dist)

    return maxDist, minDist
예제 #10
0
파일: Complex.py 프로젝트: tybiot/biskit
    def __extractLigandMatrix(self, fcomplex):
        """
        Compare structure from hex complex with original ligand pdb
        and store transformation matrix of ligand in self.ligandMatrix.
        
        @param fcomplex: pdb file with hex complex
        @type  fcomplex: complec
        
        @return: rotation matrix and translation matrix as tuple
        @rtype: (array, array)
        """
        docked_pdb = self._extractLigandStructure(fcomplex)

        xyz_docked = N0.compress(docked_pdb.maskCA(), docked_pdb.xyz)
        xyz_template = N0.compress(self.lig_model.maskCA(), self.lig_model.xyz)

        (r, t) = self._findTransformation(xyz_docked, xyz_template)
        return (r, t)
예제 #11
0
    def phi_and_psi(self, model):
        """
        Calculate phi and psi torsion angles for all
        residues in model::
        
          phi - rotation about the N-CA bond
              - last position in a chain = None
          psi - rotation about CA-C
              - first position in a chain = None          

        @param model: PDBModel
        @type  model: PDBModel 
        """
        for c in range(model.lenChains(breaks=1)):
            cModel = model.takeChains([c], breaks=1)

            xyz = cModel.xyz

            xyz_CA = N0.compress(cModel.maskCA(), xyz, 0)
            xyz_N = N0.compress(cModel.mask(['N']), xyz, 0)
            xyz_C = N0.compress(cModel.mask(['C']), xyz, 0)

            ## phi: c1 - N
            ##      c2 - CA
            ##      c3 - C
            ##      c4 - N of next residue
            for i in range(len(xyz_N) - 1):
                self.phi += [
                    self.dihedral(xyz_N[i], xyz_CA[i], xyz_C[i], xyz_N[i + 1])
                ]
            self.phi += [None]

            ## psi: c1 - C of previous residue
            ##      c2 - N
            ##      c3 - CA
            ##      c4 - C
            self.psi += [None]
            for i in range(1, len(xyz_N)):
                self.psi += [
                    self.dihedral(xyz_C[i - 1], xyz_N[i], xyz_CA[i], xyz_C[i])
                ]
예제 #12
0
    def plotContactDensity(self, step=1, cutoff=4.5):
        """
        Example. plot histogramm of contact density. Somehing wrong??

        @raise ComplexTrajError: if gnuplot program is not installed
        """
        if not gnuplot.installed:
            raise ComplexTrajError, 'gnuplot program is not installed'
        r = self.averageContacts(step, cutoff)
        r = N0.ravel(r)
        r = N0.compress(r, r)
        gnuplot.plot(hist.density(r, 10))
예제 #13
0
def compareSequences(seqAA_1, seqAA_2):
    """
    """
    seqAA_1 = list(seqAA_1)
    seqAA_2 = list(seqAA_2)
    seqNr_1 = range(len(seqAA_1))
    seqNr_2 = range(len(seqAA_2))

    # get mask
    mask_1 = N0.zeros(len(seqNr_1))
    mask_2 = N0.zeros(len(seqNr_2))

    # compare sequences
    seqDiff = getOpCodes(seqAA_1, seqAA_2)

    # get delete lists
    del_1, del_2 = getSkipLists(seqDiff)

    del_1 = [expandRepeats(seqAA_1, *pos) for pos in del_1]
    del_2 = [expandRepeats(seqAA_2, *pos) for pos in del_2]

    mask1 = del2mask(seqAA_1, *del_1)
    mask2 = del2mask(seqAA_2, *del_2)

    seqAA_1 = N0.compress(mask1, seqAA_1).tolist()
    seqNr_1 = N0.compress(mask1, seqNr_1).tolist()
    seqAA_2 = N0.compress(mask2, seqAA_2).tolist()
    seqNr_2 = N0.compress(mask2, seqNr_2).tolist()

    # get equal parts
    seqDiff = getOpCodes(seqAA_1, seqAA_2)
    equal_1, equal_2 = getEqualLists(seqDiff)
    seqAA_1, seqNr_1 = getEqual(seqAA_1, seqNr_1, equal_1)
    seqAA_2, seqNr_2 = getEqual(seqAA_2, seqNr_2, equal_2)

    N0.put(mask_1, seqNr_1, 1)
    N0.put(mask_2, seqNr_2, 1)

    return mask_1, mask_2
예제 #14
0
    def pca( self, atomMask=None, frameMask=None, fit=1 ):
        """
        Calculate principal components of trajectory frames.

        @param atomMask: 1 x N_atoms, [111001110..] atoms to consider
                         (default: all)
        @type  atomMask: [1|0]
        @param frameMask: 1 x N_frames, [001111..] frames to consider
                          (default all )
        @type  frameMask: [1|0]

        @return: (N_frames x N_frames), (1 x N_frames),
                 projection of each frame in PC space, eigenvalue of each PC
        @rtype: array, array, array
        """
        if frameMask is None: frameMask = N0.ones( len( self.frames ), N0.Int32 )

        if atomMask is None: atomMask = N0.ones(self.getRef().lenAtoms(),
                                               N0.Int32)

        if fit:
            self.fit( atomMask )

        refxyz = N0.average( self.frames, 0 )

        data = N0.compress( frameMask, self.frames, 0 )

        data = data - refxyz

        data = N0.compress( atomMask, data, 1 )

        ## reduce to 2D array
        data = N0.array( map( N0.ravel, data ) )

        V, L, U = LA.svd( data )

        return U, V * L, N0.power(L, 2)
예제 #15
0
파일: Benchmark.py 프로젝트: graik/biskit
    def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model):
        """
        Takes the two fitted structures (with and without iterative fitting),
        the known structure (reference), and the associated model inside the
        pdb_list. Calculates the different RMSD and set the profiles

        @param fitted_model_if: itteratively fitted model
        @type  fitted_model_if: PDBModel
        @param fitted_model_wo_if: normaly fitted model
        @type  fitted_model_wo_if: PDBModel
        @param reference: reference model
        @type  reference: PDBModel
        @param model: model
        @type  model: PDBModel
        """
        ## first calculate rmsd for heavy atoms and CA without
        ## removing any residues from the model
        mask_CA = fitted_model_wo_if.maskCA()

        rmsd_aa = fitted_model_wo_if.rms( reference, fit=0 )
        rmsd_ca = fitted_model_wo_if.rms( reference, mask=mask_CA, fit=1 )

        model.info["rmsd2ref_aa_wo_if"] = rmsd_aa
        model.info["rmsd2ref_ca_wo_if"] = rmsd_ca

        outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers"))

        ## Now remove the residues that were outliers in the iterative fit
        ## and calculate the rmsd again
        fitted_model_if = fitted_model_if.compress( outliers_mask )
        reference = reference.compress( outliers_mask )

        mask_CA = fitted_model_if.maskCA()

        rmsd_aa_if = fitted_model_if.rms( reference, fit=0 )
        rmsd_ca_if = fitted_model_if.rms( reference, mask=mask_CA, fit=1 )

        model.info["rmsd2ref_aa_if"] = rmsd_aa_if
        model.info["rmsd2ref_ca_if"] = rmsd_ca_if
        model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \
                                                 - N0.sum(outliers_mask)) / len(outliers_mask)
        model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \
                                                 - N0.sum(N0.compress(mask_CA, outliers_mask))) \
             / N0.sum(mask_CA)
예제 #16
0
    def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model):
        """
        Takes the two fitted structures (with and without iterative fitting),
        the known structure (reference), and the associated model inside the
        pdb_list. Calculates the different RMSD and set the profiles

        @param fitted_model_if: itteratively fitted model
        @type  fitted_model_if: PDBModel
        @param fitted_model_wo_if: normaly fitted model
        @type  fitted_model_wo_if: PDBModel
        @param reference: reference model
        @type  reference: PDBModel
        @param model: model
        @type  model: PDBModel
        """
        ## first calculate rmsd for heavy atoms and CA without
        ## removing any residues from the model
        mask_CA = fitted_model_wo_if.maskCA()

        rmsd_aa = fitted_model_wo_if.rms(reference, fit=0)
        rmsd_ca = fitted_model_wo_if.rms(reference, mask=mask_CA, fit=1)

        model.info["rmsd2ref_aa_wo_if"] = rmsd_aa
        model.info["rmsd2ref_ca_wo_if"] = rmsd_ca

        outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers"))

        ## Now remove the residues that were outliers in the iterative fit
        ## and calculate the rmsd again
        fitted_model_if = fitted_model_if.compress(outliers_mask)
        reference = reference.compress(outliers_mask)

        mask_CA = fitted_model_if.maskCA()

        rmsd_aa_if = fitted_model_if.rms(reference, fit=0)
        rmsd_ca_if = fitted_model_if.rms(reference, mask=mask_CA, fit=1)

        model.info["rmsd2ref_aa_if"] = rmsd_aa_if
        model.info["rmsd2ref_ca_if"] = rmsd_ca_if
        model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \
                                                 - N0.sum(outliers_mask)) / len(outliers_mask)
        model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \
                                                 - N0.sum(N0.compress(mask_CA, outliers_mask))) \
             / N0.sum(mask_CA)
예제 #17
0
    def getFluct_global( self, mask=None ):
        """
        Get RMS of each atom from it's average position in trajectory.
        The frames should be superimposed (fit() ) to a reference.

        @param mask: N x 1 list/Numpy array of 0|1, (N=atoms),
                     atoms to be considered.
        @type  mask: [1|0]

        @return: Numpy array ( N_unmasked x 1 ) of float.
        @rtype: array
        """
        frames = self.frames
        if mask is not None:
            frames = N0.compress( mask, frames, 1 )

        ## mean position of each atom in all frames
        avg = N0.average( frames )

        return N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) ))
예제 #18
0
    def test_Ramachandran(self):
        """Ramachandran test"""
        self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat')

        self.traj.ref.atoms.set('mass', self.traj.ref.masses())

        self.mdl = [self.traj[0], self.traj[11]]
        self.mdl = [md.compress(md.maskProtein()) for md in self.mdl]

        self.rama = Ramachandran(self.mdl,
                                 name='test',
                                 profileName='mass',
                                 verbose=self.local)

        self.psi = N0.array(self.rama.psi)

        if self.local:
            self.rama.show()

        r = N0.sum(
            N0.compress(N0.logical_not(N0.equal(self.psi, None)), self.psi))
        self.assertAlmostEqual(r, -11717.909796797909, 2)
예제 #19
0
    def addDensity(self, radius=6, minasa=None, profName='density'):
        """
        Count the number of heavy atoms within the given radius.
        Values are only collected for atoms with |minasa| accessible surface
        area.

        @param minasa: relative exposed surface - 0 to 100%
        @type  minasa: float
        @param radius: in Angstrom
        @type  radius: float
        """
        mHeavy = self.m.maskHeavy()

        xyz = N0.compress(mHeavy, self.m.getXyz(), 0)

        if minasa and self.m.profile('relAS', 0) == 0:
            self.addASA()

        if minasa:
            mSurf = self.m.profile2mask('relAS', minasa)
        else:
            mSurf = N0.ones(self.m.lenAtoms())

        ## loop over all surface atoms
        surf_pos = N0.nonzero(mSurf)
        contacts = []

        for i in surf_pos:
            dist = N0.sum((xyz - self.m.xyz[i])**2, 1)
            contacts += [N0.sum(N0.less(dist, radius**2)) - 1]

        self.m.atoms.set(profName,
                         contacts,
                         mSurf,
                         default=-1,
                         comment='atom density radius %3.1fA' % radius,
                         version=T.dateString() + ' ' + self.version())
예제 #20
0
    def go(self, model_list=None, reference=None):
        """
        Run benchmarking.

        @param model_list: list of models
                           (default: None S{->} outFolder/L{F_PDBModels})
        @type  model_list: ModelList
        @param reference: reference model
                        (default: None S{->} outFolder/L{F_INPUT_REFERENCE})
        @type  reference: PDBModel
        """
        model_list = model_list or self.outFolder + self.F_PDBModels
        reference = reference or self.outFolder + self.F_INPUT_REFERENCE

        pdb_list = T.load('%s' % model_list)
        reference = PDBModel(reference)

        # check with python 2.4
        iref, imodel = reference.compareAtoms(pdb_list[0])

        mask_casting = N0.zeros(len(pdb_list[0]))
        N0.put(mask_casting, imodel, 1)

        reference = reference.take(iref)
        #reference_mask_CA = reference_rmsd.maskCA()

        atom_mask = N0.zeros(len(pdb_list[0]))
        N0.put(atom_mask, imodel, 1)

        rmask = pdb_list[0].profile2mask("n_templates", 1, 1000)
        amask = pdb_list[0].res2atomMask(rmask)

        mask_final_ref = N0.compress(mask_casting, amask)
        mask_final = mask_casting * amask

        reference = reference.compress(mask_final_ref)

        for i in range(len(pdb_list)):

            #self.cad(reference, pdb_list[i])

            pdb_list[i], pdb_wo_if = self.output_fittedStructures(\
                pdb_list[i], reference, i, mask_final)

            fitted_model_if = pdb_list[i].compress(mask_final)
            fitted_model_wo_if = pdb_wo_if.compress(mask_final)

            coord1 = reference.getXyz()
            coord2 = fitted_model_if.getXyz()

            aprofile = self.rmsd_res(coord1, coord2)

            self.calc_rmsd(fitted_model_if, fitted_model_wo_if, reference,
                           pdb_list[i])

            pdb_list[i].atoms.set('rmsd2ref_if',
                                  aprofile,
                                  mask=mask_final,
                                  default=-1,
                                  comment="rmsd to known reference structure")

        self.output_rmsd_aa(pdb_list)
        self.output_rmsd_ca(pdb_list)
        self.output_rmsd_res(pdb_list)

        self.write_PDBModels(pdb_list)
예제 #21
0
    def _removeDuplicateChains(self, chainMask=None):
        """
        Get rid of identical chains by comparing all chains with Blast2seq.

        @param chainMask: chain mask for overriding the
                          chain identity checking (default: None)
        @type  chainMask: [int]
        
        @return: number of chains removed
        @rtype: int
        """
        chainCount = len(self.chains)
        matrix = 1.0 * N0.zeros((chainCount, chainCount))
        chain_ids = []

        ## create identity matrix for all chains against all chains
        for i in range(0, chainCount):
            chain_ids = chain_ids + [self.chains[i].chain_id
                                     ]  # collect for log file
            for j in range(i, len(self.chains)):

                # convert 3-letter-code res list into 1-letter-code String
                seq1 = singleAA(self.chains[i].sequence())
                seq2 = singleAA(self.chains[j].sequence())

                ##                 if len(seq1) > len(seq2):           # take shorter sequence
                ##                 # aln len at least half the len of the shortest sequence
                ##                     alnCutoff = len(seq2) * 0.5
                ##                 else:
                ##                     alnCutoff = len(seq1) * 0.5
                ##                 if id['aln_len'] > alnCutoff:
                ##                     matrix[i,j] = id['aln_id']
                ##                 else:                           # aln length too short, ignore
                ##                     matrix[i,j] = 0

                matrix[i, j] = self._compareSequences(seq1, seq2)

        ## report activity
        self.log.add("\n  Chain ID's of compared chains: " + str(chain_ids))
        self.log.add("  Cross-Identity between chains:\n" + str(matrix))
        self.log.add("  Identity threshold used: " + str(self.threshold))

        ## override the automatic chain deletion by supplying a
        ## chain mask to this function
        if chainMask:
            if len(chainMask) == chainCount:
                self.chains = N0.compress(chainMask, self.chains)
                self.log.add(
                    "NOTE: chain mask %s used for removing chains.\n" %
                    chainMask)

            else:
                self.log.add("########## ERROR ###############")
                self.log.add("# Chain mask is only %i chains long" %
                             len(chainMask))
                self.log.add("# when a mask of length %i is needed" %
                             chainCount)
                self.log.add("# No cleaning will be performed.\n")

        if not chainMask:
            ## look at diagonals in "identity matrix"
            ## (each chain against each)
            duplicate = len(self.chains)
            for offset in range(1, chainCount):
                diag = N0.diagonal(matrix, offset, 0, 1)
                # diagonal of 1's mark begin of duplicate
                avg = 1.0 * N0.sum(diag) / len(diag)
                if (avg >= self.threshold):
                    duplicate = offset
                    break
            self.chains = self.chains[:duplicate]
            self.log.add(
                "NOTE: Identity matrix will be used for removing identical chains."
            )

        ## report activit
        self.log.add(str(chainCount - len(self.chains))+\
                     " chains have been removed.\n")

        # how many chains have been removed?
        return (chainCount - len(self.chains))
예제 #22
0
파일: rmsFit.py 프로젝트: tybiot/biskit
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05):
    """
    Matches two arrays onto each other, while iteratively removing outliers.
    Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }.

    @param n_iterations: number of calculations::
                           1 .. no iteration 
                           0 .. until convergence
    @type  n_iterations: 1|0
    @param z: number of standard deviations for outlier definition (default: 2)
    @type  z: float
    @param eps_rmsd: tolerance in rmsd (default: 0.5)
    @type  eps_rmsd: float
    @param eps_stdv: tolerance in standard deviations (default: 0.05)
    @type  eps_stdv: float

    @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ]
    @rtype: (array, array), [float, float, int]
    """
    iter_trace = []

    rmsd_old = 0
    stdv_old = 0

    n = 0
    converged = 0

    mask = N0.ones(len(y), N0.Int32)

    while not converged:

        ## find transformation for best match
        r, t = findTransformation(N0.compress(mask, x, 0),
                                  N0.compress(mask, y, 0))

        ## transform coordinates
        xt = N0.dot(y, N0.transpose(r)) + t

        ## calculate row distances
        d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask

        ## calculate rmsd and stdv
        rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2))
        stdv = MU.SD(N0.compress(mask, d))

        ## check conditions for convergence
        d_rmsd = abs(rmsd - rmsd_old)
        d_stdv = abs(1 - stdv_old / stdv)

        if d_rmsd < eps_rmsd and d_stdv < eps_stdv:
            converged = 1
        else:
            rmsd_old = rmsd
            stdv_old = stdv

        ## store result
        perc = round(float(N0.sum(mask)) / float(len(mask)), 2)

        ## throw out non-matching rows
        mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv))
        outliers = N0.nonzero(N0.logical_not(mask))
        iter_trace.append([perc, round(rmsd, 3), outliers])

        n += 1

        if n_iterations and n >= n_iterations:
            break

    return (r, t), iter_trace
예제 #23
0
    doper.addSurfaceRacer(probe=1.4)
    surf_lig = lig.profile2mask('MS', 0.0001, 101)

    ## kick out non-surface
    rec = rec.compress(surf_rec)
    lig = lig.compress(surf_lig)

    com = Complex(rec, lig)

    ## get interface patch
    cont = com.atomContacts(cutoff=6.0)
    rec_if = N0.sum(cont, 1)
    lig_if = N0.sum(cont, 0)

    ## center distance
    c2c = N0.sqrt(N0.sum((rec.center() - lig.center())**2, 0))
    print "Center2Center: ", c2c

    ## get patches and put them into Pymoler for display
    print "Patching"
    excl = N0.compress(N0.ones(len(rec_if)), rec_if)
    pm = test(rec, c2c, nAtoms=len(N0.nonzero(rec_if)), exclude=rec_if)

    pm.addPdb(rec.compress(rec_if), 'rec_interface')
    pm.addPdb(lig.compress(lig_if), 'lig_interface')
    pm.addPdb(com.model(), 'complex')

    ## show everything
    ## the patches are as movie in 'model'
    pm.show()
예제 #24
0
class ReduceCoordinates:
    """
    ReduceCoordinates
    =================
    
    Translate a PDBModel or frames from a trajectory to structure(s) with
    only one backbone and up to 2 side chain atoms per residue.
    The new atoms are the centers of mass of several atoms and carry the
    weight of the pooled atoms in an atom profile called 'mass'.

    Examples
    --------
      >>> ## create with reference PDBModel
      >>> reducer = ReduceCoordinates( m_ref )
      >>> ## creates reduced PDBModel from m_ref
      >>> m_red = reducer.reduceToModel()

    OR:
      >>> m_red_1 = reducer.reduceToModel( m1.getXyz() ) ## reduce many models
      >>> m_red_2 = reducer.reduceToModel( m2.getXyz() ) ## with identical atoms

    OR:
      >>> ## reduce a complete Trajectory
      >>> reducer = ReduceCoordinates( traj.ref )
      >>> red_ref= reducer.reduceToModel()
      >>> frames = reducer.reduceXyz( traj.frames )
      >>> traj_red = Trajectory( ref=red_ref )
      >>> traj_red.frames = frames
    """
    ## modify order of TYR/PHE ring atoms to move centers away from ring axis
    aaAtoms = MU.aaAtoms
    aaAtoms['TYR'] = [
        'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CE1', 'CD2', 'CE2', 'CZ',
        'OH', 'OXT'
    ]
    aaAtoms['PHE'] = [
        'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CE1', 'CD2', 'CE2', 'CZ',
        'OXT'
    ]

    def __init__(self, model, maxPerCenter=4):
        """
        Prepare reduction of coordinates from a given model.
        
        @param model: reference model defining atom content and order
        @type  model: PDBModel
        @param maxPerCenter: max number of atoms per side chain center atom
                             (default: 4)
        @type  maxPerCenter: int
        """
        self.m = model
        self.__addMassProfile(self.m)

        ## sort atoms within residues into standard order
        def cmpAtoms(a1, a2):
            """
            Comparison function for bringing atoms into standard order
            within residues as defined by L{ aaAtoms }.

            @param a1: model
            @type  a1: PDBModel
            @param a2: model
            @type  a2: PDBModel

            @return: int or list of matching positions
            @rtype: [-1|0|1]            
            """
            res = a1['residue_name']
            target = self.aaAtoms[res]
            try:
                return cmp(target.index(a1['name']), target.index(a2['name']))
            except ValueError, why:
                return cmp(a1['name'], a2['name'])
##                 s = "Unknown atom for %s %i: %s or %s" % \
##                     (res, a1['residue_number'], a1['name'], a2['name'] )
##                 raise PDBError( s )

        self.a_indices = self.m.argsort(cmpAtoms)
        self.m_sorted = self.m.sort(self.a_indices)

        ## remove H from internal model and from list of atom positions
        maskH = self.m_sorted.remove(self.m_sorted.maskH())
        self.a_indices = N0.compress(maskH, self.a_indices)

        self.makeMap(maxPerCenter)
예제 #25
0
    def makeMap(self, maxPerCenter=4):
        """
        Calculate mapping between complete and reduced atom list.
        Creates a (list of lists of int, list of atom dictionaries)
        containing groups of atom indices into original model, new center atoms
        
        @param maxPerCenter: max number of atoms per side chain center atom
                             (default: 4)
        @type  maxPerCenter: int
        """

        resIndex = self.m_sorted.resIndex()
        resModels = self.m_sorted.resModels()
        m = self.m_sorted

        self.currentAtom = 0

        groups = []
        atoms = DictList()

        for i in range(len(resIndex)):

            first_atom = resIndex[i]

            if i < len(resIndex) - 1:
                last_atom = resIndex[i + 1] - 1
            else:
                last_atom = len(self.a_indices) - 1

            a = m.atoms[first_atom]

            ##             res_name  = m.atoms[ first_atom ]['residue_name']
            ##             segid     = m.atoms[ first_atom ]['segment_id']
            ##             chainId   = m.atoms[ first_atom ]['chain_id']
            ##             res_number= m.atoms[ first_atom ]['serial_number']

            ## position of this residue's atoms in original PDBModel (unsorted)
            a_indices = self.a_indices[first_atom:last_atom + 1]

            ## for each center create list of atom indices and a center atom
            if a['residue_name'] != 'GLY' and a['residue_name'] != 'ALA':

                bb_a_indices = N0.compress(resModels[i].maskBB(), a_indices)
                sc_a_indices = N0.compress(
                    N0.logical_not(resModels[i].maskBB()), a_indices)

                sc_groups = self.group(sc_a_indices, maxPerCenter)

            else:
                bb_a_indices = a_indices
                sc_groups = []

            groups += [bb_a_indices]
            atoms += [self.nextAtom(a, 'BB')]

            i = 0
            for g in sc_groups:
                groups += [g]
                atoms += [self.nextAtom(a, 'SC%i' % i)]
                i += 1

        self.groups = groups
        self.atoms = atoms
예제 #26
0
    surf_lig = lig.profile2mask( 'MS', 0.0001, 101 )

    ## kick out non-surface
    rec = rec.compress( surf_rec )
    lig = lig.compress( surf_lig )

    com = Complex( rec, lig )

    ## get interface patch
    cont = com.atomContacts( cutoff=6.0 )
    rec_if = N0.sum( cont, 1 )
    lig_if = N0.sum( cont, 0 )

    ## center distance
    c2c = N0.sqrt( N0.sum( (rec.center() - lig.center())**2, 0 ) )
    print "Center2Center: ", c2c

    ## get patches and put them into Pymoler for display
    print "Patching"
    excl = N0.compress( N0.ones( len( rec_if ) ), rec_if )
    pm = test( rec, c2c, nAtoms=len(N0.nonzero(rec_if)), exclude=rec_if )


    pm.addPdb( rec.compress( rec_if ), 'rec_interface' )
    pm.addPdb( lig.compress( lig_if ), 'lig_interface' )
    pm.addPdb( com.model(), 'complex')

    ## show everything
    ## the patches are as movie in 'model' 
    pm.show()
예제 #27
0
    def fit( self, mask=None, ref=None, n_it=1,
             prof='rms', verbose=1, fit=1, **profInfos ):
        """
        Superimpose all coordinate frames on reference coordinates. Put rms
        values in a profile. If n_it > 1, the fraction of atoms considered
        for the fit is put into a profile called |prof|_considered
        (i.e. by default 'rms_considered').

        @param mask: atom mask, atoms to consider default: [all]
        @type  mask: [1|0]
        @param ref: use as reference, default: None, average Structure
        @type  ref: PDBModel
        @param n_it: number of fit iterations, kicking out outliers on the way
                     1 -> classic single fit, 0 -> until convergence
                     (default: 1)
        @type  n_it: int
        @param prof: save rms per frame in profile of this name, ['rms']
        @type  prof: str
        @param verbose: print progress info to STDERR (default: 1)
        @type  verbose: 1|0
        @param fit: transform frames after match, otherwise just calc rms
                    (default: 1)          
        @type  fit: 1|0
        @param profInfos: additional key=value pairs for rms profile info []
        @type profInfos: key=value
        """
        if ref is None:
            refxyz = N0.average( self.frames, 0 )
        else:
            refxyz = ref.getXyz()

        if mask is None:
            mask = N0.ones( len( refxyz ), N0.Int32 )

        refxyz = N0.compress( mask, refxyz, 0 )

        if verbose: T.errWrite( "rmsd fitting..." )

        rms = []          ## rms value of each frame
        non_outliers = [] ## fraction of atoms considered for rms and fit
        iterations = []   ## number of iterations performed on each frame

        for i in range(0, len( self.frames) ):

            xyz = self.frames[i]

            if n_it != 1:
                (r, t), rmsdList = rmsFit.match( refxyz,
                                                 N0.compress( mask, xyz, 0), n_it)
                iterations.append( len( rmsdList ) )
                non_outliers.append( rmsdList[-1][0] )

                xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t

                rms += [ rmsdList[-1][1] ]

            else:
                r, t = rmsFit.findTransformation( refxyz,
                                                  N0.compress( mask, xyz, 0))

                xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t

                d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\
                                          - refxyz, 2), 1))


                rms += [ N0.sqrt( N0.average(d**2) ) ]


            if fit:
                self.frames[i] = xyz_transformed.astype(N0.Float32)

            if verbose and i%100 == 0:
                T.errWrite( '#' )

        self.setProfile( prof, rms, n_iterations=n_it, **profInfos )

        if non_outliers:
            self.setProfile( prof+'_considered', non_outliers,
                             n_iterations=n_it,
                             comment='fraction of atoms considered for iterative fit' )

        if verbose: T.errWrite( 'done\n' )
예제 #28
0
파일: Benchmark.py 프로젝트: graik/biskit
    def go(self, model_list = None, reference = None):
        """
        Run benchmarking.

        @param model_list: list of models
                           (default: None S{->} outFolder/L{F_PDBModels})
        @type  model_list: ModelList
        @param reference: reference model
                        (default: None S{->} outFolder/L{F_INPUT_REFERENCE})
        @type  reference: PDBModel
        """
        model_list = model_list or self.outFolder + self.F_PDBModels
        reference = reference or self.outFolder + self.F_INPUT_REFERENCE

        pdb_list = T.load('%s'%model_list)
        reference = PDBModel(reference)

        # check with python 2.4
        iref, imodel = reference.compareAtoms(pdb_list[0])

        mask_casting = N0.zeros(len(pdb_list[0]))
        N0.put(mask_casting, imodel, 1)

        reference = reference.take(iref)
        #reference_mask_CA = reference_rmsd.maskCA()

        atom_mask = N0.zeros(len(pdb_list[0]))
        N0.put(atom_mask,imodel,1)

        rmask = pdb_list[0].profile2mask("n_templates", 1,1000)
        amask = pdb_list[0].res2atomMask(rmask)

        mask_final_ref = N0.compress(mask_casting, amask)
        mask_final = mask_casting * amask

        reference = reference.compress(mask_final_ref)

        for i in range(len(pdb_list)):

            #self.cad(reference, pdb_list[i])

            pdb_list[i], pdb_wo_if = self.output_fittedStructures(\
                pdb_list[i], reference, i, mask_final)

            fitted_model_if = pdb_list[i].compress(mask_final)
            fitted_model_wo_if = pdb_wo_if.compress(mask_final)

            coord1 = reference.getXyz()
            coord2 = fitted_model_if.getXyz()

            aprofile = self.rmsd_res(coord1,coord2)

            self.calc_rmsd(fitted_model_if, fitted_model_wo_if,
                           reference, pdb_list[i])

            pdb_list[i].atoms.set('rmsd2ref_if', aprofile,
                                  mask=mask_final, default = -1,
                                  comment="rmsd to known reference structure")

        self.output_rmsd_aa(pdb_list)
        self.output_rmsd_ca(pdb_list)
        self.output_rmsd_res(pdb_list)

        self.write_PDBModels(pdb_list)