Python sum Examples, Biskit.oldnumeric.sum Python Examples

Example #1

0

Show file

File: Complex.py Project: tybiot/biskit

    def contactResDistribution(self, cm=None):
        """
        Count occurrence of residues in protein-protein interface.
        
        @param cm: pre-calculated contact matrix (default: None)
        @type  cm: matrix
        
        @return: dict {'A':3, 'C':1, .. } (20 standard amino acids)
        @rtype: dict
        """
        if cm is None:
            cm = self.resContacts()

        ## get mask for residues involved in contacts
        maskLig = N0.sum(cm)
        maskRec = N0.sum(N0.transpose(cm))

        ## get sequence of contact residues only
        seqLig = N0.compress(maskLig, self.lig().sequence())
        seqRec = N0.compress(maskRec, self.rec().sequence())
        seq = ''.join(seqLig) + ''.join(seqRec)  ## convert back to string

        ## count occurrence of letters
        result = {}
        for aa in molUtils.allAA():
            result[aa] = seq.count(aa)

        return result

Example #2

0

Show file

File: ContactSlave.py Project: tybiot/biskit

    def calcReducedContacts(self, soln, c):
        """
        Get contact matrices and/or fnarc from reduced-atom models.

        @param soln: solution number
        @type  soln: int
        @param c: Complex
        @type  c: Complex       
        """
        if not (self.reduced_recs and self.reduced_ligs):
            return

        if not self.requested(c, 'c_ratom_10', 'fnarc_10'):
            return

        try:
            ## create Complex with same orientation but reduced coordinates
            red_rec = self.reduced_recs[c.rec_model.source]
            red_lig = self.reduced_ligs[c.lig_model.source]
            red_com = Complex(red_rec, red_lig, c.ligandMatrix)

            contacts = red_com.atomContacts(10.0, cache=1)

            if self.requested(c, 'c_ratom_10'):
                c['c_ratom_10'] = MU.packBinaryMatrix(contacts)

            if self.c_ref_ratom_10 is not None:
                ref = N0.ravel(self.c_ref_ratom_10)
                c['fnarc_10'] = N0.sum( N0.ravel(contacts) * ref )\
                 / float( N0.sum(ref))

        except:
            self.reportError('reduced contacts error', soln)

Example #3

0

Show file

    def test_molTools(self):
        """molTools test"""
        from Biskit import PDBModel

        ## Loading PDB...
        self.m = PDBModel(T.testRoot() + '/lig/1A19.pdb')
        self.m = self.m.compress(self.m.maskProtein())

        hb = hbonds(self.m)

        xyz = xyzOfNearestCovalentNeighbour(40, self.m)

        if self.local:
            print '\nThe nearest covalently attached atom to the'
            print '  atom with index 40 has the coordinates:'
            print xyz

            print 'Potential h-bonds in model:'
            print '(donor index, acceptor index, distance and angle)'
            for h in hb:
                print h

            globals().update(locals())

        self.r = N0.sum(N0.ravel(hb[3:5])) + N0.sum(xyz)

        self.assertAlmostEqual(self.r, self.EXPECT, 3)

Example #4

0

Show file

File: molTools.py Project: graik/biskit

    def test_molTools(self):
        """molTools test"""
        from Biskit import PDBModel
        
        ## Loading PDB...
        self.m = PDBModel( T.testRoot() + '/lig/1A19.pdb' )
        self.m = self.m.compress( self.m.maskProtein() )

        hb = hbonds( self.m )

        xyz = xyzOfNearestCovalentNeighbour( 40, self.m )
        
        if self.local:
            print '\nThe nearest covalently attached atom to the'
            print '  atom with index 40 has the coordinates:'
            print xyz
    
            print 'Potential h-bonds in model:'
            print '(donor index, acceptor index, distance and angle)'
            for h in hb:
                print h
                
            globals().update( locals() )
                              
        self.r = N0.sum(N0.ravel(hb[3:5])) + N0.sum(xyz)

        self.assertAlmostEqual( self.r, self.EXPECT, 3 )

Example #5

0

Show file

File: Complex.py Project: tybiot/biskit

    def __findTransformation(self, x, y):
        """
        Match two arrays by rotation and translation. Returns the
        rotation matrix and the translation vector.
        Back transformation:
        for atom i new coordinates will be::
            y_new[i] = N0.dot(r, y[i]) + t
            
        for all atoms in one step::
            y_new = N0.dot(y, N0.transpose(r)) + t

        @param x: coordinates
        @type  x: array
        @param y: coordinates
        @type  y: array

        @return: rotation matrix, translation vector
        @rtype: array, array      
        
        @author: Michael Habeck
        """
        from numpy.linalg import svd

        ## center configurations
        x_av = N0.sum(x) / len(x)
        y_av = N0.sum(y) / len(y)
        x = x - x_av
        y = y - y_av
        ## svd of correlation matrix
        v, l, u = svd(N0.dot(N0.transpose(x), y))
        ## build rotation matrix and translation vector
        r = N0.dot(v, u)
        t = x_av - N0.dot(r, y_av)

        return r, t

Example #6

0

Show file

File: ContactSlave.py Project: graik/biskit

    def calcReducedContacts( self, soln, c ):
        """
        Get contact matrices and/or fnarc from reduced-atom models.

        @param soln: solution number
        @type  soln: int
        @param c: Complex
        @type  c: Complex       
        """
        if not (self.reduced_recs and self.reduced_ligs):
            return

        if not self.requested(c,'c_ratom_10','fnarc_10'):
            return

        try:
            ## create Complex with same orientation but reduced coordinates
            red_rec = self.reduced_recs[ c.rec_model.source ]
            red_lig = self.reduced_ligs[ c.lig_model.source ]
            red_com = Complex( red_rec, red_lig, c.ligandMatrix )

            contacts = red_com.atomContacts( 10.0, cache=1 )

            if self.requested(c, 'c_ratom_10'):
                c['c_ratom_10'] = MU.packBinaryMatrix(contacts)

            if self.c_ref_ratom_10 is not None:
                ref = N0.ravel( self.c_ref_ratom_10 )
                c['fnarc_10'] = N0.sum( N0.ravel(contacts) * ref )\
                 / float( N0.sum(ref))

        except:
            self.reportError('reduced contacts error', soln)

Example #7

0

Show file

File: Complex.py Project: tybiot/biskit

    def __atomContacts(self, cutoff, rec_mask, lig_mask, cache):
        """
        Intermolecular distances below cutoff after applying the two masks.
        
        @param cutoff: cutoff for B{atom-atom} contact in \AA
        @type  cutoff: float
        @param rec_mask: atom mask
        @type  rec_mask: [1|0]
        @param lig_mask: atom mask
        @type  lig_mask: [1|0]
        @param cache: cache pairwise atom distance matrix
        @type  cache: 1|0
        
        @return: atom contact matrix, array sum_rec_mask x sum_lig_mask
        @rtype: array
        """
        ## get atom coordinats as array 3 x all_atoms
        rec_xyz = self.rec().getXyz()
        lig_xyz = self.lig().getXyz()

        ## get pair-wise distances -> atoms_rec x atoms_lig
        dist = getattr(self, 'pw_dist', None)
        if dist is None or \
               N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ):
            dist = self.__pairwiseDistances(N0.compress(rec_mask, rec_xyz, 0),
                                            N0.compress(lig_mask, lig_xyz, 0))
        if cache:
            self.pw_dist = dist

        ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig
        return N0.less(dist, cutoff)

Example #8

0

Show file

    def randomPatches(self,
                      size,
                      n=None,
                      exclude=None,
                      max_overlap=0,
                      exclude_all=None):
        """
        size - int, number of atoms per patch
        n    - int, number of patches (None -> as many as possible, max 100)
        exclude     - [ 1|0 ], don't touch more than |max_overlap| of these
                      atoms (atom mask)
        max_overlap - int
        exclude_all - [ 1|0 ], don't touch ANY of these atoms
        -> [ [ 1|0 ] ], list of atom masks
        """
        if exclude is None:
            exclude = N0.zeros(self.model.lenAtoms(), 'i')

        if exclude_all is None:
            exclude_all = N0.zeros(self.model.lenAtoms(), 'i')

        n = n or 500

        centers = self.random_translations(n=n, center=self.center)

        ## start from excluded patch (if given) working outwards
        origin = centers[0]

        tabu = exclude_all
        if not N0.any(tabu):
            tabu = exclude
        else:
            origin = self.model.center(mask=tabu)

        centers = self.orderCenters(centers, origin)

        r = []

        for i in range(n):

            m = self.patchAround(centers[i], size)

            if N0.sum( m * exclude ) <= max_overlap \
               and N0.sum( m * exclude_all ) == 0:

                exclude = exclude + m
                r += [m]

        return r

Example #9

0

Show file

File: Complex.py Project: tybiot/biskit

 def contactsDiff(self, ref, cutoff=None):
     """
     Number of different B{residue-residue} contacts in this and
     reference complex.
     
     @param ref: to compare this one with
     @type  ref: Complex
     @param cutoff: maximal atom-atom distance, None .. previous setting
     @type  cutoff: float
     
     @return: number of contacts different in this and refererence complex.
     @rtype: int
     """
     both = N0.logical_or(self.resContacts(cutoff), ref.resContacts(cutoff))
     return N0.sum(N0.sum(both)) - self.contactsShared(ref, cutoff)

Example #10

0

Show file

File: Analyse.py Project: tybiot/biskit

    def get_identities(self, nb_templates, validation_folder=None):
        """
        Calculate the mean of the percentage of identities for each
        template with the others.
        
        @param nb_templates: number of templates used in the cross-validation
        @type  nb_templates: int
        @param validation_folder: folder vith validation data
                           (defult: None S{->} outFolder/L{F_TEMPLATE_FOLDER})
        @type  validation_folder: str
        
        @return: dictionary with mean percent identities for each template
        @rtype: {str:float}
        """

        validation_folder = validation_folder or self.outFolder + \
                            self.F_TEMPLATE_FOLDER

        folders = self.__listDir(validation_folder)
        identities = {}

        for folder in folders:
            file = "%s/%s"%(validation_folder, folder + \
                            CI.F_OUTPUT_IDENTITIES_COV)

            lst = self.parseFile(file)

            ## identity to mean template
            identities[folder] = N0.sum(lst[0][1:]) / nb_templates

        return identities

Example #11

0

Show file

    def test_ComplexTraj(self):
        """Dock.ComplexTraj test"""

        import Biskit.tools as T

        ## there is no complex trajectory in the test folder so will have
        ## to create a fake trajectory with a complex
        f = [T.testRoot() + '/com/1BGS.pdb'] * 5
        t = Trajectory(f, verbose=self.local)

        t = ComplexTraj(t, recChains=[0])

        #if self.local:
        #print 'plotting contact density...'
        #t.plotContactDensity( step=2 )

        ## create a fake second chain in the ligand
        for i in range(1093 + 98, 1968):
            t.ref.atoms['chain_id'][i] = 'B'

        t.ref.chainIndex(force=1, cache=1)
        t.cl = [1, 2]

        r = N0.concatenate((range(1093, 1191), range(0,
                                                     1093), range(1191, 1968)))

        tt = t.takeAtoms(r)

        contactMat = tt.atomContacts(1)

        if self.local:
            print 'Receptor chains: %s    Ligand chains: %s' % (t.cr, t.cl)

        self.assertEqual(N0.sum(N0.ravel(contactMat)), 308)

Example #12

0

Show file

    def mergeProfiles(self, p0, p1, maxOverlap=3):
        """
        Merge profile p0 with profile p1, as long as they overlap in
        at most maxOverlap positions

        @param p0: profile
        @type  p0: [float]
        @param p1: profile
        @type  p1: [float]
        @param maxOverlap: maximal allowed overlap between profiles
        @type  maxOverlap: int
        
        @return: array
        @rtype: 
        """
        p0 = self.__list2array(p0)
        p1 = self.__list2array(p1)

        overlap = N0.greater(N0.greater(p0, 0) + N0.greater(p1, 0), 1)

        if N0.sum(overlap) <= maxOverlap:
            ## one of the two profiles will in most cases not belong to these
            ## positions. We can't decide which one is wrong, let's eliminate
            ## both values. Alternatively we could keep one, or the average, ..
            N0.put(p1, N0.nonzero(overlap), 0)
            N0.put(p0, N0.nonzero(overlap), 0)

            p0 = p0 + p1

        return p0

Example #13

0

Show file

File: Hmmer.py Project: graik/biskit

    def mergeProfiles( self, p0, p1, maxOverlap=3 ):
        """
        Merge profile p0 with profile p1, as long as they overlap in
        at most maxOverlap positions

        @param p0: profile
        @type  p0: [float]
        @param p1: profile
        @type  p1: [float]
        @param maxOverlap: maximal allowed overlap between profiles
        @type  maxOverlap: int
        
        @return: array
        @rtype: 
        """
        p0 = self.__list2array( p0 )
        p1 = self.__list2array( p1 )

        overlap = N0.greater( N0.greater(p0,0) + N0.greater(p1,0), 1 )

        if N0.sum( overlap ) <= maxOverlap:
            ## one of the two profiles will in most cases not belong to these
            ## positions. We can't decide which one is wrong, let's eliminate
            ## both values. Alternatively we could keep one, or the average, ..
            N0.put( p1, N0.nonzero( overlap ), 0 )
            N0.put( p0, N0.nonzero( overlap ), 0 )

            p0 = p0 + p1

        return p0

Example #14

0

Show file

    def test_EnsembleTraj(self):
        """EnsembleTraj.fit/fitMembers/plotMembers test """
        ## The second part of the test will fail with the slimmed
        ## down test trajectory of T.testRoot(). To run the full
        ## test pease select a larger trajectory.

        self.tr = traj2ensemble(self.tr)

        mask = self.tr.memberMask(1)

        self.tr.fit(ref=self.tr.ref,
                    mask=self.tr.ref.maskCA(),
                    prof='rms_CA_ref',
                    verbose=self.local)

        self.tr.fitMembers(mask=self.tr.ref.maskCA(),
                           prof='rms_CA_0',
                           refIndex=0,
                           verbose=self.local)

        self.tr.fitMembers(mask=self.tr.ref.maskCA(),
                           prof='rms_CA_av',
                           verbose=self.local)

        self.p = self.tr.plotMemberProfiles('rms_CA_av',
                                            'rms_CA_0',
                                            'rms_CA_ref',
                                            xlabel='frame')
        if self.local or self.VERBOSITY > 2:
            self.p.show()

        self.assertAlmostEqual(26.19851, N0.sum(self.tr.profile('rms_CA_av')),
                               2)

Example #15

0

Show file

File: WhatIf.py Project: graik/biskit

    def __exposedResidues( self, ASA_values, sidechainCut=0.0,
                         backboneCut=0.0, totalCut=0.0  ):
        """
        Decide what is a surface exposed residue and what is not.
        sidechainCut, backboneCut, totalCut - float, cutoff value
        for what will be considered as a exposed residue. All
        three values have to pass the test.

        @param ASA_values: array with ASA values for side chains, backbone
                           and total calculated in L{__read_residueASA}.
        @type  ASA_values: array
        @param sidechainCut: cutoff ASA value for considering the side chain
                             to consider thew residue being exposed
                             (default: 0.0) 
        @type  sidechainCut: float
        @param backboneCut: cutoffvalue for back bone ASA
        @type  backboneCut: float 
        @param totalCut: cutoff for total ASA
        @type  totalCut: float   

        @return: residue mask, where 0 = burried
        @rtype: [1|0]
        """
        col_0 = N0.greater( N0.transpose(ASA_values)[0], totalCut )
        col_1 = N0.greater( N0.transpose(ASA_values)[1], backboneCut )
        col_2 = N0.greater( N0.transpose(ASA_values)[2], sidechainCut )

        col_012 = N0.concatenate( ([col_0],[col_1],[col_2]) ) 

        exposedList = N0.greater(N0.sum(col_012), 0)

        return exposedList

Example #16

0

Show file

File: hexTools.py Project: tybiot/biskit

def centerSurfDist(model, surf_mask, mask=None):
    """
    Calculate the longest and shortest distance from
    the center of the molecule to the surface.

    @param mask: atoms not to be considerd (default: None)
    @type  mask: [1|0]
    @param surf_mask: atom surface mask, needed for minimum surface distance
    @type  surf_mask: [1|0]

    @return: max distance, min distance
    @rtype: float, float
    """
    if mask is None:
        mask = model.maskHeavy()

    ## calculate center of mass
    center = model.centerOfMass()

    ## surface atom coordinates
    surf_xyz = N0.compress(mask * surf_mask, model.getXyz(), 0)

    ## find the atom closest and furthest away from center
    dist = N0.sqrt(N0.sum((surf_xyz - center)**2, 1))
    minDist = min(dist)
    maxDist = max(dist)

    return maxDist, minDist

Example #17

0

Show file

    def pairwiseRmsd( self, aMask=None, noFit=0 ):
        """
        Calculate rmsd between each 2 coordinate frames.

        @param aMask: atom mask
        @type  aMask: [1|0]
        @return: frames x frames array of float
        @rtype: array
        """
        frames = self.frames

        if aMask is not None:
            frames = N0.compress( aMask, frames, 1 )

        result = N0.zeros( (len( frames ), len( frames )), N0.Float32 )

        for i in range(0, len( frames ) ):

            for j in range( i+1, len( frames ) ):
                if noFit:
                    d = N0.sqrt(N0.sum(N0.power(frames[i]-frames[j], 2), 1))
                    result[i,j] = result[j,i] = N0.sqrt( N0.average(d**2) )

                else:
                    rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 )
                    result[i,j] = result[j,i] = rmsdLst[0][1]

        return result

Example #18

0

Show file

File: molUtils.py Project: neb9/biskit

    def test_molUtils(self):
        """molUtils test"""
        from Biskit import PDBModel

        S = self

        ## load a structure
        S.m = PDBModel(t.testRoot() + '/lig/1A19.pdb')
        S.model_1 = S.m.compress(S.m.maskProtein())

        ## now sort in standard order
        S.model_2 = sortAtomsOfModel(S.model_1)

        ## compare the atom order
        cmp = []
        for a in S.model_1.atomRange():
            cmp += [cmpAtoms(S.model_1.atoms[a], S.model_2.atoms[a])]

        self.assertEqual(N0.sum(cmp), 159)

        ## get the primaty sequence as a string
        S.seq = S.model_1.sequence()

        ## convert it to a list of three letter code
        S.seq = single2longAA(S.seq)

        ## convert it to a list in one letter code
        S.seq = singleAA(S.seq)

        self.assertEqual(''.join(S.seq), S.model_1.sequence())

Example #19

0

Show file

File: Analyse.py Project: graik/biskit

    def get_identities(self, nb_templates, validation_folder = None):
        """
        Calculate the mean of the percentage of identities for each
        template with the others.
        
        @param nb_templates: number of templates used in the cross-validation
        @type  nb_templates: int
        @param validation_folder: folder vith validation data
                           (defult: None S{->} outFolder/L{F_TEMPLATE_FOLDER})
        @type  validation_folder: str
        
        @return: dictionary with mean percent identities for each template
        @rtype: {str:float}
        """

        validation_folder = validation_folder or self.outFolder + \
                            self.F_TEMPLATE_FOLDER

        folders = self.__listDir(validation_folder)
        identities = {}

        for folder in folders:
            file = "%s/%s"%(validation_folder, folder + \
                            CI.F_OUTPUT_IDENTITIES_COV)

            lst = self.parseFile( file )

            ## identity to mean template
            identities[folder] = N0.sum(lst[0][1:])/nb_templates

        return identities

Example #20

0

Show file

File: WhatIf.py Project: suliat16/biskit

    def __exposedResidues(self,
                          ASA_values,
                          sidechainCut=0.0,
                          backboneCut=0.0,
                          totalCut=0.0):
        """
        Decide what is a surface exposed residue and what is not.
        sidechainCut, backboneCut, totalCut - float, cutoff value
        for what will be considered as a exposed residue. All
        three values have to pass the test.

        @param ASA_values: array with ASA values for side chains, backbone
                           and total calculated in L{__read_residueASA}.
        @type  ASA_values: array
        @param sidechainCut: cutoff ASA value for considering the side chain
                             to consider thew residue being exposed
                             (default: 0.0) 
        @type  sidechainCut: float
        @param backboneCut: cutoffvalue for back bone ASA
        @type  backboneCut: float 
        @param totalCut: cutoff for total ASA
        @type  totalCut: float   

        @return: residue mask, where 0 = burried
        @rtype: [1|0]
        """
        col_0 = N0.greater(N0.transpose(ASA_values)[0], totalCut)
        col_1 = N0.greater(N0.transpose(ASA_values)[1], backboneCut)
        col_2 = N0.greater(N0.transpose(ASA_values)[2], sidechainCut)

        col_012 = N0.concatenate(([col_0], [col_1], [col_2]))

        exposedList = N0.greater(N0.sum(col_012), 0)

        return exposedList

Example #21

0

Show file

File: ReduceCoordinates.py Project: tybiot/biskit

    def reduceToModel(self, xyz=None, reduce_profiles=1):
        """
        Create a reduced PDBModel from coordinates. Atom profiles the source
        PDBModel are reduced by averaging over the grouped atoms.
        
        @param xyz: coordinte array (N_atoms x 3) or
                    None (->use reference coordinates)
        @type  xyz: array OR None
        
        @return: PDBModel with reduced atom set and profile 'mass'
        @rtype: PDBModel
        """

        mass = self.m.atoms.get('mass')
        if xyz is None: xyz = self.m.getXyz()

        mProf = [N0.sum(N0.take(mass, group)) for group in self.groups]
        xyz = self.reduceXyz(xyz)

        result = PDBModel()

        for k in self.atoms.keys():
            result.atoms.set(k, self.atoms.valuesOf(k))

##         result.setAtoms( self.atoms )

        result.setXyz(xyz)
        result.atoms.set('mass', mProf)

        if reduce_profiles:
            self.reduceAtomProfiles(self.m, result)

            result.residues = self.m.residues

        return result

Example #22

0

Show file

File: ReduceCoordinates.py Project: tybiot/biskit

    def group(self, a_indices, maxPerCenter):
        """
        Group a bunch of integers (atom indices in PDBModel) so that each
        group has at most maxPerCenter items.
        
        @param a_indices: atom indices
        @type  a_indices: [int]
        @param maxPerCenter: max entries per group
        @type  maxPerCenter: int
        
        @return: list of lists of int
        @rtype: [[int],[int]..]
        """
        ## how many groups are necessary?
        n_centers = len(a_indices) / maxPerCenter
        if len(a_indices) % maxPerCenter:
            n_centers += 1

        ## how many items/atoms go into each group?
        nAtoms = N0.ones(n_centers, N0.Int) * int(len(a_indices) / n_centers)
        i = 0
        while N0.sum(nAtoms) != len(a_indices):
            nAtoms[i] += 1
            i += 1

        ## distribute atom indices into groups
        result = []
        pos = 0
        for n in nAtoms:
            result += [N0.take(a_indices, N0.arange(n) + pos)]
            pos += n

        return result

Example #23

0

Show file

File: rmsFit.py Project: tybiot/biskit

    def test_rmsFit(self):
        """rmsFit test"""
        import Biskit.tools as T

        self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat')

        rt, rmsdLst = match(self.traj.ref.xyz, self.traj[-1].xyz)

        if self.local:
            print 'RMSD: %.2f' % rmsdLst[0][1]

        # return rotation matrix
        r = abs(N0.sum(N0.ravel(rt[0])))
        e = abs(N0.sum(N0.ravel(self.EXPECT)))

        self.assertAlmostEqual(r, e, 6)

Example #24

0

Show file

def logConfidence( x, R, clip=0 ):
    """
    Estimate the probability of x NOT beeing a random observation from a
    lognormal distribution that is described by a set of random values.

    @param x: observed value
    @type  x: float
    @param R: sample of random values
    @type  R: [float]
    @param clip: clip zeros at this value  0->don't clip (default: 0)
    @type  clip: float

    @return: confidence that x is not random, median of random distr.
    @rtype: (float, float)
    """
    if clip and 0 in R:
        R = N0.clip( R, clip, max( R ) )
    if clip and x == 0:
        x = clip

    ## remove 0 instead of clipping
    R = N0.compress( R, R )
    if x == 0:
        return 0, 0

    ## get mean and stdv of log-transformed random sample
    alpha = N0.average( N0.log( R ) )

    n = len( R )

    beta = N0.sqrt(N0.sum(N0.power(N0.log( R ) - alpha, 2)) / (n - 1.))

    return logArea( x, alpha, beta ), logMedian( alpha )

Example #25

0

Show file

 def prepare(self):
     """
     Overrides Executor method.
     """
     self.model = self.model.compress(self.model.maskHeavy())
     if self.model.lenAtoms() == N0.sum(self.model.maskCA):
         raise Dssp_Error, 'The structure you want to calculate the secondary structure for seems to be a carbon alpha trace. Terminating'
     self.model.writePdb(self.f_pdb)

Example #26

0

Show file

File: Complex.py Project: tybiot/biskit

 def contactsShared(self, reference, cutoff=None):
     """
     Number of equal B{residue-residue} contacts in this and
     reference complex.
     
     @param reference: reference complex
     @type  reference: Complex
     @param cutoff: cutoff for atom-atom contact to be counted
     @type  cutoff: float
     @return: the number or residue-residue contacts that are common to
              both this and reference::
                abs( N0.sum( N0.sum( contactMatrix_a - contactMatrix_b )))
     @rtype: int
     """
     equality = N0.logical_and(self.resContacts(cutoff=cutoff),
                               reference.resContacts(cutoff=cutoff))
     return abs(N0.sum(N0.sum(equality)))

Example #27

0

Show file

File: Complex.py Project: tybiot/biskit

    def fractionNativeContacts(self, ref, cutoff=None):
        """
        Fraction of native B{residue-residue} contacts.
        
        @param ref: native complex
        @type  ref: Complex
        @param cutoff: maximal atom-atom distance, None .. previous setting
        @type  cutoff: float

        @return: fraction of native contacts
        @rtype: float
        """
        cont = self.resContacts(cutoff, refComplex=ref)
        ref_cont = ref.resContacts(cutoff)

        result = N0.sum(N0.sum(ref_cont * cont)) * 1.0
        return result / N0.sum(N0.sum(ref_cont))

Example #28

0

Show file

File: Analyzer.py Project: suliat16/biskit

    def random_contacts( self, contMat, n, maskRec=None, maskLig=None ):
        """
        Create randomized surface contact matrix with same number of
        contacts and same shape as given contact matrix.
        
        @param contMat: template contact matrix
        @type  contMat: matrix
        @param n: number of matrices to generate
        @type  n: int
        @param maskRec: surface masks (or something similar)
        @type  maskRec: [1|0]
        @param maskLig: surface masks (or something similar)
        @type  maskLig: [1|0]
        
        @return: list of [n] random contact matricies
        @rtype: [matrix]
        """
        a,b = N0.shape( contMat )
        nContacts = N0.sum( N0.sum( contMat ))

        if not maskLig:
            r_size, l_size = N0.shape( contMat )
            maskLig = N0.ones( l_size )
            maskRec = N0.ones( r_size )

        c_mask = N0.ravel( N0.outerproduct( maskRec, maskLig ) )
        c_pos = N0.nonzero( c_mask )

        # get array with surface positions from complex
        cont = N0.take( N0.ravel(contMat), c_pos )
        length = len( cont )

        result = []

        for i in range( n ):
            # create random array
            ranCont = mathUtils.randomMask( nContacts,length )

            # blow up to size of original matrix
            r = N0.zeros(a*b)
            N0.put( r, c_pos, ranCont)

            result += [ N0.reshape( r, (a,b) ) ]

        return result

Example #29

0

Show file

File: PatchGeneratorFromOrbit.py Project: graik/biskit

    def randomPatches( self, size, n=None, exclude=None,
                       max_overlap=0, exclude_all=None ):
        """
        size - int, number of atoms per patch
        n    - int, number of patches (None -> as many as possible, max 100)
        exclude     - [ 1|0 ], don't touch more than |max_overlap| of these
                      atoms (atom mask)
        max_overlap - int
        exclude_all - [ 1|0 ], don't touch ANY of these atoms
        -> [ [ 1|0 ] ], list of atom masks
        """
        if exclude is None:
            exclude = N0.zeros( self.model.lenAtoms(), 'i' )

        if exclude_all is None:
            exclude_all = N0.zeros( self.model.lenAtoms(), 'i' )

        n = n or 500

        centers = self.random_translations( n=n, center=self.center )

        ## start from excluded patch (if given) working outwards
        origin = centers[0]

        tabu = exclude_all
        if not N0.any( tabu ):
            tabu = exclude
        else:
            origin = self.model.center( mask=tabu )

        centers = self.orderCenters( centers, origin )

        r = []

        for i in range(n):

            m = self.patchAround( centers[i], size )

            if N0.sum( m * exclude ) <= max_overlap \
               and N0.sum( m * exclude_all ) == 0:

                exclude = exclude + m
                r += [ m ]

        return r

Example #30

0

Show file

 def __distances(self, point, xyz=None):
     """
     point - 3 x 1 array of float; point of origin
     xyz   - 3 x n array of float; coordinates, if None -- take model atoms
     -> distances of all atoms to given point
     """
     if xyz is None:
         xyz = self.model.getXyz()
     return N0.sqrt(N0.sum(N0.power(xyz - point, 2), 1))

Example #31

0

Show file

File: PatchGeneratorFromOrbit.py Project: graik/biskit

 def __distances( self, point, xyz=None ):
     """
     point - 3 x 1 array of float; point of origin
     xyz   - 3 x n array of float; coordinates, if None -- take model atoms
     -> distances of all atoms to given point
     """
     if xyz is None:
         xyz = self.model.getXyz()
     return N0.sqrt( N0.sum( N0.power( xyz - point, 2), 1 ) )

Example #32

0

Show file

File: Benchmark.py Project: graik/biskit

    def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model):
        """
        Takes the two fitted structures (with and without iterative fitting),
        the known structure (reference), and the associated model inside the
        pdb_list. Calculates the different RMSD and set the profiles

        @param fitted_model_if: itteratively fitted model
        @type  fitted_model_if: PDBModel
        @param fitted_model_wo_if: normaly fitted model
        @type  fitted_model_wo_if: PDBModel
        @param reference: reference model
        @type  reference: PDBModel
        @param model: model
        @type  model: PDBModel
        """
        ## first calculate rmsd for heavy atoms and CA without
        ## removing any residues from the model
        mask_CA = fitted_model_wo_if.maskCA()

        rmsd_aa = fitted_model_wo_if.rms( reference, fit=0 )
        rmsd_ca = fitted_model_wo_if.rms( reference, mask=mask_CA, fit=1 )

        model.info["rmsd2ref_aa_wo_if"] = rmsd_aa
        model.info["rmsd2ref_ca_wo_if"] = rmsd_ca

        outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers"))

        ## Now remove the residues that were outliers in the iterative fit
        ## and calculate the rmsd again
        fitted_model_if = fitted_model_if.compress( outliers_mask )
        reference = reference.compress( outliers_mask )

        mask_CA = fitted_model_if.maskCA()

        rmsd_aa_if = fitted_model_if.rms( reference, fit=0 )
        rmsd_ca_if = fitted_model_if.rms( reference, mask=mask_CA, fit=1 )

        model.info["rmsd2ref_aa_if"] = rmsd_aa_if
        model.info["rmsd2ref_ca_if"] = rmsd_ca_if
        model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \
                                                 - N0.sum(outliers_mask)) / len(outliers_mask)
        model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \
                                                 - N0.sum(N0.compress(mask_CA, outliers_mask))) \
             / N0.sum(mask_CA)

Example #33

0

Show file

    def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model):
        """
        Takes the two fitted structures (with and without iterative fitting),
        the known structure (reference), and the associated model inside the
        pdb_list. Calculates the different RMSD and set the profiles

        @param fitted_model_if: itteratively fitted model
        @type  fitted_model_if: PDBModel
        @param fitted_model_wo_if: normaly fitted model
        @type  fitted_model_wo_if: PDBModel
        @param reference: reference model
        @type  reference: PDBModel
        @param model: model
        @type  model: PDBModel
        """
        ## first calculate rmsd for heavy atoms and CA without
        ## removing any residues from the model
        mask_CA = fitted_model_wo_if.maskCA()

        rmsd_aa = fitted_model_wo_if.rms(reference, fit=0)
        rmsd_ca = fitted_model_wo_if.rms(reference, mask=mask_CA, fit=1)

        model.info["rmsd2ref_aa_wo_if"] = rmsd_aa
        model.info["rmsd2ref_ca_wo_if"] = rmsd_ca

        outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers"))

        ## Now remove the residues that were outliers in the iterative fit
        ## and calculate the rmsd again
        fitted_model_if = fitted_model_if.compress(outliers_mask)
        reference = reference.compress(outliers_mask)

        mask_CA = fitted_model_if.maskCA()

        rmsd_aa_if = fitted_model_if.rms(reference, fit=0)
        rmsd_ca_if = fitted_model_if.rms(reference, mask=mask_CA, fit=1)

        model.info["rmsd2ref_aa_if"] = rmsd_aa_if
        model.info["rmsd2ref_ca_if"] = rmsd_ca_if
        model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \
                                                 - N0.sum(outliers_mask)) / len(outliers_mask)
        model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \
                                                 - N0.sum(N0.compress(mask_CA, outliers_mask))) \
             / N0.sum(mask_CA)

Example #34

0

Show file

File: Complex.py Project: tybiot/biskit

    def rmsInterface(self, ref, cutoff=4.5, fit=1):
        """
        Rmsd between this and reference interface. The interface is
        defined as any residue that has an atom which is within the
        distance given by |cutoff| from its partner.
        
        @param ref: reference complex
        @type  ref: Complex
        @param cutoff: atom distance cutoff for interface residue definition
                       (default: 4.5)
        @type  cutoff: float
        @param fit: least-squares fit before calculating the rms (default: 1)
        @type  fit: 1|0
        
        @return: interface rmad
        @rtype: float
        """
        ## casting
        this = self
        if not ref.rec_model.equals( self.rec_model )[1] \
           or not ref.lig_model.equals( self.lig_model )[1]:

            m_rec, m_rec_ref, m_lig, m_lig_ref = self.equalAtoms(ref)
            this = self.compress(m_rec, m_lig)
            ref = ref.compress(m_rec_ref, m_lig_ref)

        ## determine interface
        contacts = ref.resContacts(cutoff)

        if_rec = ref.rec_model.res2atomMask(N0.sum(contacts, 1))
        if_lig = ref.lig_model.res2atomMask(N0.sum(contacts, 0))

        mask_interface = N0.concatenate((if_rec, if_lig))
        mask_heavy = N0.concatenate(
            (ref.rec().maskHeavy(), ref.lig_model.maskHeavy()))
        mask_interface = mask_interface * mask_heavy

        ## rms
        ref_model = ref.model()
        this_model = this.model()

        return ref_model.rms(this_model, mask_interface, fit=fit)

Example #35

0

Show file

File: PDBParseModel.py Project: tybiot/biskit

    def test_PDBParseModel(self):
        """PDBParseModel test"""

        ## loading output file from X-plor
        if self.local:
            print 'Loading pdb file ..'

        self.p = PDBParseModel()
        self.m = self.p.parse2new(B.PDBModel(T.testRoot() + '/rec/1A2P.pdb'))

        self.assertAlmostEqual(N0.sum(self.m.centerOfMass()), 113.682601929, 2)

Example #36

0

Show file

File: Complex.py Project: tybiot/biskit

    def contactsOverlap(self, ref, cutoff=None):
        """
        Fraction of overlapping B{residue-residue} contacts between this and
        reference complex.
        
        @param ref: reference complex
        @type  ref: Complex
        @param cutoff: maximal atom-atom distance, None .. previous setting
        @type  cutoff: float
        
        @return: fraction of contacts shared between this and ref
                 (normalized to number of all contacts)
        @rtype: float
        """
        equal = N0.logical_and(self.resContacts(cutoff=cutoff),
                               ref.resContacts(cutoff=cutoff))
        total = N0.logical_or(self.resContacts(cutoff),
                              ref.resContacts(cutoff))

        return N0.sum(N0.sum(equal)) * 1.0 / N0.sum(N0.sum(total))

Example #37

0

Show file

File: ReduceCoordinates.py Project: tybiot/biskit

    def reduceXyz(self, xyz, axis=0):
        """
        Reduce the number of atoms in the given coordinate set. The set must
        have the same length and order as the reference model. It may have
        an additional (time) dimension as first axis.
        
        @param xyz: coordinates (N_atoms x 3) or (N_frames x N_atoms x 3)
        @type  xyz: array
        @param axis: axis with atoms (default: 0)
        @type  axis: int
        
        @return: coordinate array (N_less_atoms x 3) or
                 (N_frames x N_less_atoms x 3)
        @rtype: array
        """
        masses = self.m.atoms.get('mass')
        r_xyz = None

        for atom_indices in self.groups:

            x = N0.take(xyz, atom_indices, axis)
            m = N0.take(masses, atom_indices)

            center = N0.sum(x * N0.transpose([
                m,
            ]), axis=axis) / N0.sum(m)

            if axis == 0:
                center = center[N0.NewAxis, :]

            if axis == 1:
                center = center[:, N0.NewAxis, :]

            if r_xyz is None:
                r_xyz = center

            else:
                r_xyz = N0.concatenate((r_xyz, center), axis)

        return r_xyz

Example #38

0

Show file

File: PDBParsePickle.py Project: tybiot/biskit

    def test_PDBParsePickle(self):
        """PDBParsePickle test"""

        import Biskit.oldnumeric as N0

        ## loading output file from X-plor
        if self.local:
            print 'Loading pickled model ..'

        self.p = PDBParsePickle()
        self.m = self.p.parse2new(T.testRoot() + '/rec/1A2P_dry.model')

        self.assertAlmostEqual(N0.sum(self.m.centerOfMass()), 114.18037, 5)

Example #39

0

Show file

File: Hmmer.py Project: graik/biskit

    def entropy( self, emmProb, nullProb ):
        """ 
        Calculate the Kullback-Leibler distance between the observed and the
        background amino acid distribution at a given position. High values mean
        high conservation. Empty (all 0) emmission probabilities yield score 0.
        See also:BMC Bioinformatics. 2006; 7: 385
        
        emmProb & nullProb is shape 1,len(alphabet)

        @param emmProb: emmission probabilities
        @type  emmProb: array
        @param nullProb: null probabilities
        @type  nullProb: array

        @return: relative entropy score
        @rtype:  float
        """
        ## avoid log error
        if N0.sum( emmProb ) == 0.:
            return 0.

        return N0.sum( emmProb * N0.log(emmProb/nullProb) )

Example #40

0

Show file

File: ContactSlave.py Project: graik/biskit

    def calcContacts( self, soln, c ):
        """
        Calculate contact matrices and fraction of native contacts, residue-
        and atom-based, with different distance cutoffs.

        @param soln: solution number
        @type  soln: int
        @param c: Complex
        @type  c: Complex
        """
        try:
            if self.requested(c, 'fnac_4.5') and self.c_ref_atom_4_5 is not None:
                ## cache pairwise atom distances for following calculations
                contacts = c.atomContacts( 4.5, self.mask_rec, self.mask_lig,
                                           cache=1, map_back=0 )
                ref = N0.ravel( self.c_ref_atom_4_5 )

                c['fnac_4.5'] = N0.sum( N0.ravel(contacts) * ref )\
                 / float( N0.sum(ref))

            if self.requested(c, 'fnac_10') and self.c_ref_atom_10 is not None:

                contacts = c.atomContacts( 10., self.mask_rec, self.mask_lig,
                                           cache=1, map_back=0 )

                ref = N0.ravel( self.c_ref_atom_10 )
                c['fnac_10'] = N0.sum( N0.ravel(contacts) * ref ) \
                 / float( N0.sum(ref))

            if self.requested(c, 'c_res_4.5') \
               or ( self.c_ref_res_4_5 is not None \
                    and (self.requested(c,'fnrc_4.5','fnSurf_rec'))):

                res_cont = c.resContacts( 4.5,
                                          cache=self.requested(c, 'c_res_4.5'))

                if self.c_ref_res_4_5 is not None \
                   and self.requested(c, 'fnrc_4.5' ):
                    ref = N0.ravel( self.c_ref_res_4_5 )
                    c['fnrc_4.5'] = N0.sum(N0.ravel(res_cont)*ref) \
                     /float(N0.sum(ref))

                if self.c_ref_res_4_5 is not None \
                   and self.requested(c, 'fnSurf_rec'):
                    r, l = c.fractionNativeSurface(res_cont,
                                                   self.c_ref_res_4_5 )
                    c['fnSurf_rec'] = r
                    c['fnSurf_lig'] = l

        except:
            m1 = m2 = s = 0
            try:
                m1, m2, s = c.get('model1',0), c.get('model2',0),\
                  c.get('soln',0)
            except:
                pass
            self.reportError('contact error (r %i : l %i, #%i)'%\
                             (m1,m2,s), soln)

Example #41

0

Show file

File: dope.py Project: graik/biskit

def changeModel( inFile, prefix, sourceModel ):

    print '\nget ' + os.path.basename( inFile ) + '..',

    model = PDBModel( inFile )

    model.update()

    model = model.sort()

    eq = model.equals( sourceModel )
    if not eq[0] and eq[1]:
        raise ConvertError('source and other models are not equal: ' + str(eq))

#    model.validSource()
    model.setSource( sourceModel.validSource() )

    #model.atomsChanged = 0
    for k in model.atoms:
        model.atoms[k,'changed'] = N0.all( model[k] == sourceModel[k] )

    model.xyzChanged = ( 0 != N0.sum( N0.ravel( model.xyz - sourceModel.xyz)) )

    model.update( updateMissing=1 )

    if model.xyzChanged:

        doper = PDBDope( model )

        if 'MS' in sourceModel.atoms.keys():
            doper.addSurfaceRacer( probe=1.4 )

        if 'density' in sourceModel.atoms.keys():
            doper.addDensity()

##        if 'foldX' in sourceModel.info.keys():
##            doper.addFoldX()
            
        if 'delphi' in sourceModel.info.keys():
            doper.addDelphi()

    outFile = os.path.dirname( inFile ) + '/' + prefix +\
            T.stripFilename( inFile ) + '.model' 

    T.dump( model, outFile )

    print '-> ' + os.path.basename( outFile )

Example #42

0

Show file

File: WhatIf.py Project: graik/biskit

    def test_Whatif(self):
        """Whatif test"""

        from Biskit import PDBModel

        ## Loading PDB...

        f = T.testRoot()+"/com/1BGS.pdb"
        m = PDBModel(f)

        m = m.compress( m.maskProtein() )
        m = m.compress( m.maskHeavy() )

        ## Starting WhatIf
        x = WhatIf( m, debug=0, verbose=0 )

        ## Running
        atomAcc, resAcc, resMask = x.run()

        if self.local:
            ## check that model hasn't changed
            m_ref = PDBModel(f)
            m_ref = m.compress( m.maskProtein() )
            for k in m_ref.atoms.keys():
                if not N0.all(m_ref[k] == m[k]):
                    print 'Not equal ', k
                else:
                    print 'Equal ', k

            ## display exposed residues in PyMol
            from Pymoler import Pymoler
            pm = Pymoler()
            model = pm.addPdb( m, '1' )
            pm.colorRes( '1', resAcc[:,0] )
            pm.show()

            print "\nResult for first 10 atoms/residues: "
            print '\nAccessability (A^2):\n', atomAcc[:10]
            print '\nResidue accessability (A^2)'
            print '[total, backbone, sidechain]:\n', resAcc[:10]
            print '\nExposed residue mask:\n',resMask[:10]
            print '\nTotal atom    accessability (A^2): %.2f'%sum(atomAcc) 
            print '      residue accessability (A^2): %.2f'%sum(resAcc)[0]

        self.assertAlmostEqual( N0.sum(resAcc[:,0]), 2814.6903, 7 )

Example #43

0

Show file

File: Analyzer.py Project: graik/biskit

    def __categorizeHexSurf(self, cutoff=0.1):
        """
        Compare complexes of list to native complex to see if
        their contact surfaces overlapp with the native complex.
        
        @param cutoff: fraction cutoff for defining a overlap (default: 0.1)
        @type  cutoff: float
        
        @return: list of len(self.hexContacts) overlapping with
                 native contact surface of lig and rec (0 - no overlap,
                 1 - rec OR lig overlapps, 2- rec AND lig overlapps)
        @rtype: [0|1|2]
        """
        result = [ self.com.fractionNativeSurface( c, self.contacts )
                   for c in self.hexContacts ]

        result = [ N0.sum( N0.greater( o, cutoff ) ) for o in result ]
        return result

Example #44

0

Show file

File: PatchGeneratorFromOrbit.py Project: graik/biskit

    def random_translations( self, n=1, center=None ):
        """
        n Random translations on a sphere around center with fixed radius.
        The radius must be given as orbit to __init__.
        n      - int, number of random coordinates to generate
        center - 3 array of float
        -> array n x 3 of float
        """
        if center is None:
            center = self.center

        xyz = ra.random( (n,3) ) - 0.5

        scale = self.orbit*1.0 / N0.sqrt( N0.sum( xyz**2, 1 ) )

        r = N0.array( [ scale[i]*xyz[i] for i in range(n) ] )

        return r + center

Example #45

0

Show file

File: PatchGeneratorFromOrbit.py Project: graik/biskit

def test( model, center2center, nAtoms=10, exclude=None ):

    from Biskit import Pymoler, PDBModel

    g = PatchGeneratorFromOrbit( model, center2center )

    overlap = int( round( nAtoms / 4.0 ) )

    r = g.randomPatches( nAtoms, 500, max_overlap=overlap, exclude=exclude )

    profile = N0.sum( N0.array(r) )

    pm  = Pymoler()
    pm.addPdb( model, 'all' )

    ms = [ model.take( N0.nonzero(mask) ) for mask in r ]

    pm.addMovie( ms )

    return pm

Example #46

0

Show file

File: molTools.py Project: graik/biskit

def xyzOfNearestCovalentNeighbour( i, model ):
    """
    Closest atom in the same residue as atom with index i

    @param model: PDBModel 
    @type  model: PDBModel
    @param i: atom index 
    @type  i: int

    @return: coordinates of the nearest atom 
    @rtype: [float, float, float]
    """
    resModel = model.filter( residue_number=model.atoms['residue_number'][i] )
    dist = N0.sqrt( N0.sum( (resModel.xyz - model.xyz[i])**2 , 1) )

    ## set distance to self to something high
    dist[ N0.argmin(dist) ] = 100.
    
    pos_shortest =  N0.nonzero( dist == min(dist) )[0]
 
    return resModel.xyz[ pos_shortest ]

Example #47

0

Show file

File: CheckIdentities.py Project: graik/biskit

    def identities(self, aln_dictionary):
        """
        Create a dictionary that contains information about all the
        alignments in the aln_dictionary using pairwise comparisons.

        @param aln_dictionary: alignment dictionary
        @type  aln_dictionary: dict

        @return: a dictionary of dictionaries with the sequence name as the
        top key. Each sub dictionary then has the keys: 
         - 'name' - str, sequence name
         - 'seq' - str, sequence of
         - 'template_info' - list of the same length as the 'key'
             sequence excluding deletions. The number of sequences
             in the multiple alignment that contain information at
             this position.
         - 'ID' - dict, sequence identity in percent comparing the
            'key'  sequence to all other sequences (excluding deletions)
         - 'info_ID' - dict, same as 'ID' but compared to the template
             sequence length (i.e excluding deletions and insertions
             in the 'key' sequence )
         - 'cov_ID' - dict, same as 'info_ID' but insertions are defined
             comparing to all template sequences (i.e where
             'template_info' is zero )
        @rtype: dict
        """
        ## loop over all sequences in alignment
        for i in self.sequences_name:
            template_names = []

            ## don't compare to self, remove current sequence
            for name in self.sequences_name:
                if(name is not i):
                    template_names.append(name)

            ## loop over all sequences in alignment
            info_ID, ID, cov_ID  = {}, {}, {}
            for y in self.sequences_name:
##                identity = 0
##                info_identity = 0
##                cov_identity = 0
                nb_of_identities = 0
                nb_of_template = 0 
                template_info = []
                nb_of_residues = 0

                ## loop over the full length of the alignment
                for w in range(len(aln_dictionary["target"]["seq"])):

                    ## skip deletions
                    nb_of_info_res=0
                    if(aln_dictionary[i]["seq"][w] is not '-'):
                        nb_of_residues += 1

                        ## count identities
                        if(aln_dictionary[i]["seq"][w] == \
                           aln_dictionary[y]["seq"][w]):
                            nb_of_identities += 1

                        ## length excluding insertions
                        if(aln_dictionary[y]["seq"][w] is not '-'):
                            nb_of_template += 1

                        ## loop over all sequences but self
                        for z in template_names:
                            ## count how many sequences contain alignment
                            ## information at this position
                            if(aln_dictionary[z]["seq"][w] is not '-'):
                                nb_of_info_res += 1

                        template_info.append(nb_of_info_res)

                ## number of positions in which any other sequence
                ## contains alignment information
                nb_cov_res = N0.sum( N0.greater(template_info, 0) )

                ## calculate identities
                info_ID[y] = ID[y] = cov_ID[y] = 0
                ## RAIK: Hack, nb_of_... can turn 0 for fragmented alignments
                if nb_of_template:
                    info_ID[y] = 100. * nb_of_identities / nb_of_template
                if nb_of_residues:
                    ID[y]      = 100. * nb_of_identities / nb_of_residues
                if nb_cov_res:
                    cov_ID[y]  = 100. * nb_of_identities / nb_cov_res

            aln_dictionary[i]["info_ID"] = info_ID 
            aln_dictionary[i]["ID"] = ID
            aln_dictionary[i]["cov_ID"] = cov_ID
            aln_dictionary[i]["template_info"] = template_info

        return aln_dictionary

Example #48

0

Show file

File: Hmmer.py Project: graik/biskit

    def parse_result( self ):
        """
        Extract some information about the profile as well as the
        match state emmission scores. Keys of the returned dictionary::
          'AA', 'name', 'NrSeq', 'emmScore', 'accession',
          'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum'
          
        @return: dictionary with warious information about the profile
        @rtype: dict
        """
        ## check that the outfut file is there and seems valid
        if not os.path.exists( self.f_out ):
            raise HmmerError,\
                  'Hmmerfetch result file %s does not exist.'%self.f_out
        
        if T.fileLength( self.f_out ) < 10:
            raise HmmerError,\
                  'Hmmerfetch result file %s seems incomplete.'%self.f_out
        
        profileDic = {}

        ## read result
        hmm = open( self.f_out, 'r')
        out = hmm.read()
        hmm.close()

        ## collect some data about the hmm profile
        profileDic['name'] =  self.hmmName 
        profileDic['profLength'] = \
                  int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] )
        profileDic['accession'] = \
                  string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] 
        profileDic['NrSeq'] = \
                  int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] )
        profileDic['AA'] = \
              string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:]

        ## collect null emmission scores
        pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20
        nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ]

        ## get emmision scores
        prob=[]
        for i in range(1, profileDic['profLength']+1):
            pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20
            e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ]
            prob += [ e ]

        profileDic['seqNr'] = N0.transpose( N0.take( prob, (0,),1 ) )
        profileDic['emmScore'] = N0.array(prob)[:,1:]

        ## calculate emission probablitities
        emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore'])

        ent = [ N0.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ]
        profileDic['ent'] = N0.array(ent)

        ###### TEST #####

        proba = N0.array(prob)[:,1:]

##         # test set all to max score
##         p = proba
##         p1 = []
##         for i in range( len(p) ):
##             p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ]
##         profileDic['maxAll'] = p1

        # test set all to N0.sum( abs( probabilities ) )
        p = proba
        p2 = []
        for i in range( len(p) ) :
            p2 += [ N0.resize( N0.sum( N0.absolute( p[i] )), N0.shape( p[i] ) ) ]
        profileDic['absSum'] = p2

        # set all to normalized max score 
        p = proba
        p4 = []
        for i in range( len(p) ) :
            p_scale = (p[i] - N0.average(p[i]) )/ math.SD(p[i])
            p4 += [ N0.resize( p_scale[N0.argmax( N0.array(p_scale) )] ,
                              N0.shape( p[i] ) ) ]
        profileDic['maxAllScale'] = p4

        return profileDic

Example #49

0

Show file

File: PatchGeneratorFromOrbit.py Project: graik/biskit

    doper.addSurfaceRacer( probe=1.4 )
    surf_rec = rec.profile2mask( 'MS', 0.0001, 101 )

    doper = PDBDope( lig )
    doper.addSurfaceRacer( probe=1.4 )
    surf_lig = lig.profile2mask( 'MS', 0.0001, 101 )

    ## kick out non-surface
    rec = rec.compress( surf_rec )
    lig = lig.compress( surf_lig )

    com = Complex( rec, lig )

    ## get interface patch
    cont = com.atomContacts( cutoff=6.0 )
    rec_if = N0.sum( cont, 1 )
    lig_if = N0.sum( cont, 0 )

    ## center distance
    c2c = N0.sqrt( N0.sum( (rec.center() - lig.center())**2, 0 ) )
    print "Center2Center: ", c2c

    ## get patches and put them into Pymoler for display
    print "Patching"
    excl = N0.compress( N0.ones( len( rec_if ) ), rec_if )
    pm = test( rec, c2c, nAtoms=len(N0.nonzero(rec_if)), exclude=rec_if )


    pm.addPdb( rec.compress( rec_if ), 'rec_interface' )
    pm.addPdb( lig.compress( lig_if ), 'lig_interface' )
    pm.addPdb( com.model(), 'complex')