예제 #1
0
    def takeFrames( self, indices ):
        """
        Return a copy of the trajectory containing only the specified frames.

        @param indices: positions to take
        @type  indices: [int]

        @return: copy of this Trajectory (fewer frames, semi-deep copy of ref)
        @rtype: Trajectory
        """
        ## remove out-of-bound indices
        indices = N0.compress( N0.less( indices, len( self.frames) ), indices )

        r = self.__class__()

        ## this step takes some time for large frames !
        r.frames = N0.take( self.frames, indices, 0 )

        ## semi-deep copy of reference model
        r.setRef( self.ref.take( range( self.ref.lenAtoms() )) )

        if self.frameNames is not None:
            r.frameNames = N0.take( self.frameNames, indices, 0 )
            r.frameNames = map( ''.join, r.frameNames.tolist() )

        r.pc = self.__takePca( indices )

        r.profiles = self.profiles.take( indices )

        r.resIndex = self.resIndex

        return r
예제 #2
0
파일: Complex.py 프로젝트: tybiot/biskit
    def take(self, rec_pos, lig_pos):
        """
        Get copy of this complex with given atoms of rec and lig.

        @param rec_pos: receptor indices to take
        @type  rec_pos: [int]
        @param lig_pos: ligand  indices to take
        @type  lig_pos: [int]

        @return: new complex
        @rtype: Complex
        """
        r = self.__class__()
        r.lig_model = self.lig_model.take(lig_pos)
        r.rec_model = self.rec_model.take(rec_pos)
        r.info = deepcopy(self.info)

        if self.pw_dist:
            r.pw_dist = N0.take(self.pw_dist, rec_pos, 1)
            r.pw_dist = N0.take(r.pw_dist, lig_pos)

        r.ligandMatrix = copy(self.ligandMatrix)

        ## todo: take cached contacts as well

        return r
예제 #3
0
    def rmsMatrixByMember(self, mirror=0, step=1):
        """
        Get result matrix ordered first by member then by time. (requires
        EnsembleTraj)

        @param mirror: mirror matrix at diagonal (only for intra-traj. rms)
                       (default: 0)
        @type  mirror: 0|1

        @param step: take only every step frame [1]
        @type  step: int
        """
        intra_traj = self.traj_2 is None

        m = self.getResult(mirror=intra_traj)

        i1 = i2 = self.traj_1.argsortMember(step=step)

        if self.traj_2 is not None:
            i2 = self.traj_2.argsortMember(step=step)

        a = N0.take(m, i1, 0)
        a = N0.take(a, i2, 1)

        if intra_traj and not mirror:
            for i in range(N0.shape(a)[0]):
                for j in range(i, N0.shape(a)[1]):
                    a[j, i] = 0.

        return a
예제 #4
0
    def rmsMatrixByMember( self, mirror=0, step=1 ):
        """
        Get result matrix ordered first by member then by time. (requires
        EnsembleTraj)

        @param mirror: mirror matrix at diagonal (only for intra-traj. rms)
                       (default: 0)
        @type  mirror: 0|1

        @param step: take only every step frame [1]
        @type  step: int
        """
        intra_traj = self.traj_2 is None

        m = self.getResult( mirror=intra_traj )

        i1 = i2 = self.traj_1.argsortMember( step=step )

        if self.traj_2 is not None:
            i2 = self.traj_2.argsortMember( step=step )

        a = N0.take( m, i1, 0 )
        a = N0.take( a, i2, 1 )

        if intra_traj and not mirror:
            for i in range( N0.shape(a)[0] ):
                for j in range( i, N0.shape(a)[1] ):
                    a[j,i] = 0.

        return a
예제 #5
0
파일: MatrixPlot.py 프로젝트: tybiot/biskit
 def __thinarray(self, a, step):
     """
     @param a: input array
     @type  a: N0.array
     @param step: stepping in both dimensions
     @type  step: int
     @return: smaller array
     @rtype: N0.array
     """
     r = N0.take(a, range(0, len(a), step), axis=0)
     r = N0.take(r, range(0, len(r[0]), step), axis=1)
     return r
예제 #6
0
    def takeAtoms( self, indices, returnClass=None ):
        """
        Take atoms from frames::
          takeAtoms( indices, type=None ) -> copy of Trajectory

        @param indices: list of atom indices
        @type  indices: [int]
        @param returnClass: default: None, same class as this object
        @type  returnClass: class OR None

        @return: copy of this Trajectory (with fewer atoms)
        @rtype: Trajectory        
        """

        returnClass = returnClass or self.__class__
        r = returnClass()

        ## copy over everything, so that child classes can preserve own fields
        r.__dict__.update( self.__dict__ )
        r.frames = r.ref = r.frameNames = r.profiles = None

        r.frames = N0.take( self.frames, indices, 1 )

        r.setRef( self.ref.take( indices ) )

        r.frameNames = copy.copy( self.frameNames )
        r.resIndex = None

        r.profiles = self.profiles.clone()

        r.pc = self.pc   ## this is not really clean

        return r
예제 #7
0
    def getResFluct( self, atomFluctList=None ):
        """
        Convert list of atomic fluctuations to list of residue
        fluctuation.

        @param atomFluctList: array 1 x N_atoms of float
        @type  atomFluctList: [float]

        @return: array 1 x N_residues of float
        @rtype: [float]

        @raise TrajError: if result length <> N_residues: 
        """
        if atomFluctList is None:
            atomFluctList = self.getFluct_global()

        ## Give all atoms of each res. the same fluct. value
        ## (the highest fluctuation of any backbone atom)
        result = self.residusMaximus( atomFluctList, self.ref.maskBB() )

        ## take first atoms only
        result = N0.take( result, self.ref.resIndex() )
##        result = N0.compress( self.ref.maskCA(), atomFluctList)

        ## check dimension
        if len( result ) <> self.ref.lenResidues():
            raise TrajError(
                "getResFluct(): Length of result list (%i) <>" % len(result)+
                " number of residues (%i)." % self.ref.lenResidues() )

        return result
예제 #8
0
    def __translateChainIndices(self, atomIndices, newChainMap):
        """
        Translate current chain indices into what they would look like in
        a PDBModel containing only the given atoms in the given order.
        
        @param atomIndices: indices of atoms
        @type  atomIndices: [int]
        @param newChainMap: chain map [0000011133333..]
        @type  newChainMap: [int]
        
        @return: { int:int, .. } map current chain indices to new ones
        @rtype: {int:int}
        
        @raise ComplexTrajError: if (parts of) chains are inserted into
                                 each other
        """
        ## todo: looks not very elegant

        oldChainMap = N0.take(self.ref.chainMap(), atomIndices)

        r = {}
        for i in range(len(oldChainMap)):
            old, new = oldChainMap[i], newChainMap[i]
            if old in r:
                if r[old] != new:
                    raise ComplexTrajError(
                        "Can't insert different chains into each other.")
            else:
                r[old] = new

        return r
예제 #9
0
    def group(self, a_indices, maxPerCenter):
        """
        Group a bunch of integers (atom indices in PDBModel) so that each
        group has at most maxPerCenter items.
        
        @param a_indices: atom indices
        @type  a_indices: [int]
        @param maxPerCenter: max entries per group
        @type  maxPerCenter: int
        
        @return: list of lists of int
        @rtype: [[int],[int]..]
        """
        ## how many groups are necessary?
        n_centers = len(a_indices) / maxPerCenter
        if len(a_indices) % maxPerCenter:
            n_centers += 1

        ## how many items/atoms go into each group?
        nAtoms = N0.ones(n_centers, N0.Int) * int(len(a_indices) / n_centers)
        i = 0
        while N0.sum(nAtoms) != len(a_indices):
            nAtoms[i] += 1
            i += 1

        ## distribute atom indices into groups
        result = []
        pos = 0
        for n in nAtoms:
            result += [N0.take(a_indices, N0.arange(n) + pos)]
            pos += n

        return result
예제 #10
0
파일: Analyzer.py 프로젝트: suliat16/biskit
    def shuffledLists( self, n, lst, mask=None ):
        """
        shuffle order of a list n times, leaving masked(0) elements untouched

        @param n: number of times to shuffle the list
        @type  n: int
        @param lst: list to shuffle
        @type  lst: [any]
        @param mask: mask to be applied to lst
        @type  mask: [1|0]

        @return: list of shuffeled lists
        @rtype: [[any]]        
        """
        if not mask:
            mask = N0.ones( len(lst)  )

        if type( lst ) == list:
            lst = N0.array( lst )
        
        pos = N0.nonzero( mask )

        rand_pos = N0.array( [ self.__shuffleList( pos ) for i in range(n) ] )

        result = []
        for p in rand_pos:

            r = copy.copy( lst )
            N0.put( r, p, N0.take( lst, pos ) )
            result += [r]

        return result
예제 #11
0
    def reduceToModel(self, xyz=None, reduce_profiles=1):
        """
        Create a reduced PDBModel from coordinates. Atom profiles the source
        PDBModel are reduced by averaging over the grouped atoms.
        
        @param xyz: coordinte array (N_atoms x 3) or
                    None (->use reference coordinates)
        @type  xyz: array OR None
        
        @return: PDBModel with reduced atom set and profile 'mass'
        @rtype: PDBModel
        """

        mass = self.m.atoms.get('mass')
        if xyz is None: xyz = self.m.getXyz()

        mProf = [N0.sum(N0.take(mass, group)) for group in self.groups]
        xyz = self.reduceXyz(xyz)

        result = PDBModel()

        for k in self.atoms.keys():
            result.atoms.set(k, self.atoms.valuesOf(k))

##         result.setAtoms( self.atoms )

        result.setXyz(xyz)
        result.atoms.set('mass', mProf)

        if reduce_profiles:
            self.reduceAtomProfiles(self.m, result)

            result.residues = self.m.residues

        return result
예제 #12
0
    def valuesOf(self, infoKey, default=None, indices=None, unique=0 ):
        """
        Get all values of a certain info record of all or some Complexes.

        @param infoKey: key for info dict
        @type  infoKey: str
        @param default: default value if infoKey is not found (None)
        @type  default: any
        @param indices: list of int OR None(=all), indices of Complexes (None)
        @type  indices: [int] OR None
        @param unique: report each value only once (set union), (default 0)
        @type  unique: 1|0

        @return: list of values
        @rtype: [any]
        """
        l = self
        if indices is not None:
            l = N0.take( N0.array(l,'O'), indices )

        if not unique:
            return [ c.info.get(infoKey, default) for c in l ]

        r = []
        for c in l:
            if c.info.get(infoKey, default) not in r:
                r += [ c.info.get( infoKey ) ]
        return r
예제 #13
0
 def convertChainIdsNter(self, model, chains):
     """
     Convert normal chain ids to chain ids considering chain breaks.
     """
     if len(chains) == 0:
         return chains
     i = N0.take(model.chainIndex(), chains)
     ## convert back to chain indices but this time including chain breaks
     return model.atom2chainIndices(i, breaks=1)
예제 #14
0
    def __find_intervals(self, l):
        l = N0.array(l)
        l = N0.take(l, N0.argsort(l))

        globals().update(locals())

        break_points = N0.nonzero(N0.greater(l[1:] - l[:-1], 1))

        start = 0
        intervals = []

        for i in range(len(break_points)):
            index = break_points[i]
            intervals.append(tuple(N0.take(l, range(start, index + 1))))
            start = index + 1

        intervals.append(tuple(l[start:]))

        return intervals
예제 #15
0
 def convertChainIdsCter(self, model, chains):
     """
     Convert normal chain ids to chain ids considering chain breaks.
     """
     if len(chains) == 0:
         return chains
     ## fetch last atom of given chains
     index = N0.concatenate((model.chainIndex(), [len(model)]))
     i = N0.take(index, N0.array(chains) + 1) - 1
     ## convert back to chain indices but this time including chain breaks
     return model.atom2chainIndices(i, breaks=1)
예제 #16
0
파일: Analyzer.py 프로젝트: suliat16/biskit
    def __shuffleList(self, lst ):
        """
        shuffle order of lst

        @param lst: list to shuffle
        @type  lst: [any]
        
        @return: shuffeled list
        @rtype: [any]
        """
        pos = R.permutation( len( lst ))
        return N0.take( lst, pos )
예제 #17
0
    def reduceXyz(self, xyz, axis=0):
        """
        Reduce the number of atoms in the given coordinate set. The set must
        have the same length and order as the reference model. It may have
        an additional (time) dimension as first axis.
        
        @param xyz: coordinates (N_atoms x 3) or (N_frames x N_atoms x 3)
        @type  xyz: array
        @param axis: axis with atoms (default: 0)
        @type  axis: int
        
        @return: coordinate array (N_less_atoms x 3) or
                 (N_frames x N_less_atoms x 3)
        @rtype: array
        """
        masses = self.m.atoms.get('mass')
        r_xyz = None

        for atom_indices in self.groups:

            x = N0.take(xyz, atom_indices, axis)
            m = N0.take(masses, atom_indices)

            center = N0.sum(x * N0.transpose([
                m,
            ]), axis=axis) / N0.sum(m)

            if axis == 0:
                center = center[N0.NewAxis, :]

            if axis == 1:
                center = center[:, N0.NewAxis, :]

            if r_xyz is None:
                r_xyz = center

            else:
                r_xyz = N0.concatenate((r_xyz, center), axis)

        return r_xyz
예제 #18
0
    def __takePca( self, indices ):
        """
        extract PCA results for certain frames.

        @param indices: frame indecies
        @type  indices: [int]

        @return: list of pca values
        @rtype: [float]        
        """
        result = copy.deepcopy( getattr(self, 'pc', None ))

        if result is not None:

            result['p'] = N0.take( result['p'], indices, 0 )

            result['u'] = N0.take( result['u'], indices, 0 )

            if result['fMask'] is not None:
                result['fMask'] = N0.take( result['fMask'], indices, 0 )

        return result
예제 #19
0
    def __resWindow( self, res, n_neighbores, rchainMap=None,
                     left_allowed=None, right_allowed=None ):
        """
        Get indices of all atoms of a residue and some atoms of its
        neighboring residues (if they belong to the same chain).

        @param res: residue index
        @type  res: int
        @param n_neighbores: number of residues to include right and left
        @type  n_neighbores: int
        @param right_allowed: array 1 x N_atoms of 1|0, possible neighbore
                              atoms
        @type  right_allowed: array
        @param left_allowed: array 1 x N_atoms of 1|0, possible neighbore atoms
        @type  left_allowed: array 
        @param rchainMap: array 1 x N_residues of int, chain id of each res
        @type  rchainMap: array

        @return: atoms of res, atoms of neighbores
        @rtype: [ int ], [ int ]
        """
        ## some defaults.. time-consuming..
        if rchainMap is None:
            rchainMap = N0.take( self.chainMap(), self.resIndex() )

        if left_allowed  is None: left_allowed = N0.nonzero( self.ref.maskBB() )
        if right_allowed is None: right_allowed= N0.nonzero( self.ref.maskBB() )

        ## atom indices of center residue
        result = self.ref.res2atomIndices( [ res ] ).tolist()

        ## get indices of neighbore residues that still belong to same chain
        l = self.ref.lenResidues()
        chain = rchainMap[res]

        outer_left = range( res-n_neighbores, res )
        outer_right= range( res+1, res+n_neighbores+1 )

        outer_left = [ i for i in outer_left  if i > 0 and rchainMap[i]==chain]
        outer_right= [ i for i in outer_right if i < l and rchainMap[i]==chain]

        ## convert to atom indices, filter them against allowed neighbore atoms
        if outer_left:
            outer_left = self.ref.res2atomIndices( outer_left )
            outer_left = MU.intersection( left_allowed,  outer_left )

        if outer_right:
            outer_right= self.ref.res2atomIndices( outer_right)
            outer_right= MU.intersection( right_allowed, outer_right)

        return result, outer_left + outer_right
예제 #20
0
    def orderCenters(self, points, origin=None):
        """
        Order random points by increasing distance to first or to origin.
        points  - n x 3 array of float, random center coordinates
        origin  - 3 array of float
        -> [ int ], indices into  points ordered by increasing distance
        """
        origin = origin
        if origin is None:
            origin = points[0]

        dist = self.__distances(origin, points)

        return N0.take(points, N0.argsort(dist))
예제 #21
0
    def orderCenters( self, points, origin=None ):
        """
        Order random points by increasing distance to first or to origin.
        points  - n x 3 array of float, random center coordinates
        origin  - 3 array of float
        -> [ int ], indices into  points ordered by increasing distance
        """
        origin = origin
        if origin is None:
            origin = points[0]

        dist = self.__distances( origin, points )

        return N0.take( points, N0.argsort( dist ) )
예제 #22
0
    def memberIndices(self, member, step=1):
        """
        List of frame indices for this member::
          memberIndices( int_member, [int_step] )

        @param member: member trajectory
        @type  member: int
        @param step: return only every i'th frame (default: 1)
        @type  step: int

        @return: indices for members
        @rtype: [int]
        """
        r = range(member, self.lenFrames(), self.n_members)
        if step != 1:
            r = N0.take(r, range(0, len(r), step)).tolist()
        return r
예제 #23
0
파일: Analyzer.py 프로젝트: suliat16/biskit
    def random_contacts( self, contMat, n, maskRec=None, maskLig=None ):
        """
        Create randomized surface contact matrix with same number of
        contacts and same shape as given contact matrix.
        
        @param contMat: template contact matrix
        @type  contMat: matrix
        @param n: number of matrices to generate
        @type  n: int
        @param maskRec: surface masks (or something similar)
        @type  maskRec: [1|0]
        @param maskLig: surface masks (or something similar)
        @type  maskLig: [1|0]
        
        @return: list of [n] random contact matricies
        @rtype: [matrix]
        """
        a,b = N0.shape( contMat )
        nContacts = N0.sum( N0.sum( contMat ))

        if not maskLig:
            r_size, l_size = N0.shape( contMat )
            maskLig = N0.ones( l_size )
            maskRec = N0.ones( r_size )

        c_mask = N0.ravel( N0.outerproduct( maskRec, maskLig ) )
        c_pos = N0.nonzero( c_mask )

        # get array with surface positions from complex
        cont = N0.take( N0.ravel(contMat), c_pos )
        length = len( cont )

        result = []

        for i in range( n ):
            # create random array
            ranCont = mathUtils.randomMask( nContacts,length )

            # blow up to size of original matrix
            r = N0.zeros(a*b)
            N0.put( r, c_pos, ranCont)

            result += [ N0.reshape( r, (a,b) ) ]

        return result
예제 #24
0
    def calcProfiles(self, m):
        """
        Calculate needed profiles.

        @param m: PDBModel to calculate data for
        @type  m: PDBModel
        """
        if self.verbose: print "Initiating PDBDope..."
        d = PDBDope(m)

        if not self.profileName in m.atoms.keys():

            if self.profileName in ['MS', 'AS', 'curvature', 'relAS', 'relMS']:
                if self.verbose: print "Adding SurfaceRacer profile...",
                d.addSurfaceRacer()

            if self.profileName in ['density']:
                if self.verbose: print "Adding surface density...",
                d.addDensity()

        if not self.profileName in m.residues.keys():

            if self.profileName in ['cons_abs', 'cons_max', 'cons_ent']:
                if self.verbose: print "Adding conservation data...",
                d.addConservation()

        if self.verbose: print 'Done.'

        ## convert atom profiles to average residue profile
        if self.profileName in m.atoms.keys():
            prof = []
            aProfile = m.profile(self.profileName)
            resIdx = m.resIndex().tolist()
            resIdx += [m.lenAtoms()]
            for i in range(len(resIdx) - 1):
                prof += [
                    N0.average(
                        N0.take(aProfile, range(resIdx[i], resIdx[i + 1])))
                ]
        else:
            prof = m.profile(self.profileName)

        return prof
예제 #25
0
파일: SparseArray.py 프로젝트: graik/biskit
    def __setAll_1D( self, a ):
        """
        Replace content of this sparseArray with values from Numeric array
        or list of numbers -- only for 1-dimensional arrays.

        @param a: array OR list
        @type  a: array OR [ number ]
        """
        if type( a ) is list:
            a = N0.array( a, self.__typecode )

        if self.shape != a.shape:
            raise SparseArrayError, 'dimensions not aligned'

        self.indices = N0.nonzero( N0.logical_not( N0.equal(a, self.__default) ) )
        self.indices = self.indices.tolist()

        self.values = N0.take( a, self.indices )
        self.values = self.values.tolist()
예제 #26
0
    def __setAll_1D(self, a):
        """
        Replace content of this sparseArray with values from Numeric array
        or list of numbers -- only for 1-dimensional arrays.

        @param a: array OR list
        @type  a: array OR [ number ]
        """
        if type(a) is list:
            a = N0.array(a, self.__typecode)

        if self.shape != a.shape:
            raise SparseArrayError, 'dimensions not aligned'

        self.indices = N0.nonzero(N0.logical_not(N0.equal(a, self.__default)))
        self.indices = self.indices.tolist()

        self.values = N0.take(a, self.indices)
        self.values = self.values.tolist()
예제 #27
0
    def valuesOf( self, infoKey, version=None, default=None,
                  indices=None, unique=0 ):
        """
        Get all values of a certain info record of all or some Complexes.

        @param infoKey: key for info dict
        @type  infoKey: str
        @param version: index in history or None (=current) (default: None)
        @type  version: int
        @param default: default value if infoKey is not found (default: None)
        @type  default: any
        @param indices: list of int OR None(=all), indices of Complexes
                        (default: None)
        @type  indices: [int] OR None
        @param unique: report each value only once (set union), (default: 0)
        @type  unique: 1|0

        @return: list of values
        @rtype: [any]
        """
        l = self
        if indices is not None:
            l = N0.take( l, indices )

        if not unique:
            if version is None:
                return [ c.get(infoKey, default) for c in l ]
            return [ c[version].get( infoKey, default) for c in l ]

        r = []
        for c in l:
            if version is not None:
                c = c[ version ]

            if c.info.get(infoKey, default) not in r:
                r += [ c.info.get( infoKey ) ]

        return r
예제 #28
0
    def thin(self, step=1):
        """
        Keep only each step'th frame from trajectory with 10 ensemble members.
        
        @param step: 1..keep all frames, 2..skip first and every second, ..
                     (default: 1)
        @type  step: int
        
        @return: reduced EnsembleTraj
        @rtype: EnsembleTraj
        """
        T.ensure(step, int, forbidden=[0])

        ## 10 x lenFrames/10, frame indices of each member
        mI = [self.memberIndices(i) for i in range(self.n_members)]

        mI = N0.array(mI)

        mI = N0.take(mI, range(-1, N0.shape(mI)[1], step)[1:], 1)

        mI = N0.transpose(mI)

        return self.takeFrames(N0.ravel(mI))
예제 #29
0
    def reduceAtomProfiles(self, from_model, to_model):
        """
        reduce all atom profiles according to the calculated map by calculating
        the average over the grouped atoms.
        
        @param from_model: model
        @type  from_model: PDBModel
        @param to_model: model
        @type  to_model: PDBModel
        """
        for profname in from_model.atoms:

            p0 = from_model.atoms.get(profname)
            info = from_model.profileInfo(profname)

            try:
                pr = [N0.average(N0.take(p0, group)) for group in self.groups]

                to_model.atoms.set(profname, pr)
            except:
                pass

            to_model.atoms.setInfo(profname, **info)
예제 #30
0
    def confidenceInterval(self, level):
        """
        confidenceInterval(self, level)

        @param level: confidence level (e.g. 0.68 for stdev interval)
        @type  level: float

        @return: start and end of the confidence interval
                 containing |level|*100 % of the probability
        @rtype: float, float
        """
        order = N0.argsort(self.p).tolist()
        cumulative = N0.add.accumulate(N0.take(self.p, order)) * self.delta_x

        ind = N0.nonzero(N0.greater_equal(cumulative, 1. - level))

        sub_set = order[ind[0]:]

        intervals = self.__find_intervals(sub_set)

        boundaries = [(self.x[i[0]], self.x[i[-1]]) for i in intervals]

        return tuple(boundaries)
예제 #31
0
    def findConfidenceInterval(self, x):
        """
        findConfidenceInterval(self, x)
        Find the smallest possible density interval that still includes x.

        @param x: value
        @type  x: float

        @return: convidence level, interval start and end
        @rtype: float, (float,float)
        """
        closest = N0.argmin(abs(self.x - x))

        ind = N0.nonzero(N0.greater_equal(self.p, self.p[closest])).tolist()

        intervals = self.__find_intervals(ind)

        ##        lens = N0.array([len(i) for i in intervals])
        levels = [N0.sum(N0.take(self.p, i)) for i in intervals]
        level = N0.sum(levels) * self.delta_x

        boundaries = [(self.x[i[0]], self.x[i[-1]]) for i in intervals]

        return level, tuple(boundaries)
예제 #32
0
파일: Hmmer.py 프로젝트: graik/biskit
    def parse_result( self ):
        """
        Extract some information about the profile as well as the
        match state emmission scores. Keys of the returned dictionary::
          'AA', 'name', 'NrSeq', 'emmScore', 'accession',
          'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum'
          
        @return: dictionary with warious information about the profile
        @rtype: dict
        """
        ## check that the outfut file is there and seems valid
        if not os.path.exists( self.f_out ):
            raise HmmerError,\
                  'Hmmerfetch result file %s does not exist.'%self.f_out
        
        if T.fileLength( self.f_out ) < 10:
            raise HmmerError,\
                  'Hmmerfetch result file %s seems incomplete.'%self.f_out
        
        profileDic = {}

        ## read result
        hmm = open( self.f_out, 'r')
        out = hmm.read()
        hmm.close()

        ## collect some data about the hmm profile
        profileDic['name'] =  self.hmmName 
        profileDic['profLength'] = \
                  int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] )
        profileDic['accession'] = \
                  string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] 
        profileDic['NrSeq'] = \
                  int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] )
        profileDic['AA'] = \
              string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:]

        ## collect null emmission scores
        pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20
        nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ]

        ## get emmision scores
        prob=[]
        for i in range(1, profileDic['profLength']+1):
            pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20
            e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ]
            prob += [ e ]

        profileDic['seqNr'] = N0.transpose( N0.take( prob, (0,),1 ) )
        profileDic['emmScore'] = N0.array(prob)[:,1:]

        ## calculate emission probablitities
        emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore'])

        ent = [ N0.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ]
        profileDic['ent'] = N0.array(ent)

        ###### TEST #####

        proba = N0.array(prob)[:,1:]

##         # test set all to max score
##         p = proba
##         p1 = []
##         for i in range( len(p) ):
##             p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ]
##         profileDic['maxAll'] = p1

        # test set all to N0.sum( abs( probabilities ) )
        p = proba
        p2 = []
        for i in range( len(p) ) :
            p2 += [ N0.resize( N0.sum( N0.absolute( p[i] )), N0.shape( p[i] ) ) ]
        profileDic['absSum'] = p2

        # set all to normalized max score 
        p = proba
        p4 = []
        for i in range( len(p) ) :
            p_scale = (p[i] - N0.average(p[i]) )/ math.SD(p[i])
            p4 += [ N0.resize( p_scale[N0.argmax( N0.array(p_scale) )] ,
                              N0.shape( p[i] ) ) ]
        profileDic['maxAllScale'] = p4

        return profileDic
예제 #33
0
    def parse_result(self):
        """
        Extract some information about the profile as well as the
        match state emmission scores. Keys of the returned dictionary::
          'AA', 'name', 'NrSeq', 'emmScore', 'accession',
          'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum'
          
        @return: dictionary with warious information about the profile
        @rtype: dict
        """
        ## check that the outfut file is there and seems valid
        if not os.path.exists(self.f_out):
            raise HmmerError,\
                  'Hmmerfetch result file %s does not exist.'%self.f_out

        if T.fileLength(self.f_out) < 10:
            raise HmmerError,\
                  'Hmmerfetch result file %s seems incomplete.'%self.f_out

        profileDic = {}

        ## read result
        hmm = open(self.f_out, 'r')
        out = hmm.read()
        hmm.close()

        ## collect some data about the hmm profile
        profileDic['name'] = self.hmmName
        profileDic['profLength'] = \
                  int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] )
        profileDic['accession'] = \
                  string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1]
        profileDic['NrSeq'] = \
                  int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] )
        profileDic['AA'] = \
              string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:]

        ## collect null emmission scores
        pattern = 'NULE[ ]+' + '[-0-9]+[ ]+' * 20
        nullEmm = [
            float(j) for j in string.split(re.findall(pattern, out)[0])[1:]
        ]

        ## get emmision scores
        prob = []
        for i in range(1, profileDic['profLength'] + 1):
            pattern = "[ ]+%i" % i + "[ ]+[-0-9]+" * 20
            e = [float(j) for j in string.split(re.findall(pattern, out)[0])]
            prob += [e]

        profileDic['seqNr'] = N0.transpose(N0.take(prob, (0, ), 1))
        profileDic['emmScore'] = N0.array(prob)[:, 1:]

        ## calculate emission probablitities
        emmProb, nullProb = self.hmmEmm2Prob(nullEmm, profileDic['emmScore'])

        ent = [
            N0.resize(self.entropy(e, nullProb), (1, 20))[0] for e in emmProb
        ]
        profileDic['ent'] = N0.array(ent)

        ###### TEST #####

        proba = N0.array(prob)[:, 1:]

        ##         # test set all to max score
        ##         p = proba
        ##         p1 = []
        ##         for i in range( len(p) ):
        ##             p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ]
        ##         profileDic['maxAll'] = p1

        # test set all to N0.sum( abs( probabilities ) )
        p = proba
        p2 = []
        for i in range(len(p)):
            p2 += [N0.resize(N0.sum(N0.absolute(p[i])), N0.shape(p[i]))]
        profileDic['absSum'] = p2

        # set all to normalized max score
        p = proba
        p4 = []
        for i in range(len(p)):
            p_scale = (p[i] - N0.average(p[i])) / math.SD(p[i])
            p4 += [
                N0.resize(p_scale[N0.argmax(N0.array(p_scale))],
                          N0.shape(p[i]))
            ]
        profileDic['maxAllScale'] = p4

        return profileDic
예제 #34
0
    def getFluct_local( self, mask=None, border_res=1,
                        left_atoms=['C'], right_atoms=['N'], verbose=1 ):
        """
        Get mean displacement of each atom from it's average position after
        fitting of each residue to the reference backbone coordinates of itself
        and selected atoms of neighboring residues to the right and left.

        @param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation
                     should be calculated
        @type  mask: array
        @param border_res: number of neighboring residues to use for fitting
        @type  border_res: int
        @param left_atoms: atoms (names) to use from these neighbore residues
        @type  left_atoms: [str]
        @param right_atoms: atoms (names) to use from these neighbore residues
        @type  right_atoms: [str]

        @return: Numpy array ( N_unmasked x 1 ) of float
        @rtype: array
        """
        if mask is None:
            mask = N0.ones( len( self.frames[0] ), N0.Int32 )

        if verbose: T.errWrite( "rmsd fitting per residue..." )

        residues = N0.nonzero( self.ref.atom2resMask( mask ) )

        ## backbone atoms used for fit
        fit_atoms_right = N0.nonzero( self.ref.mask( right_atoms ) )
        fit_atoms_left  = N0.nonzero( self.ref.mask( left_atoms ) )
        ## chain index of each residue
        rchainMap = N0.take( self.ref.chainMap(), self.ref.resIndex() )

        result = []

        for res in residues:

            i_res, i_border = self.__resWindow(res, border_res, rchainMap,
                                               fit_atoms_left, fit_atoms_right)

            try:
                if not len( i_res ): raise PDBError, 'empty residue'

                t_res = self.takeAtoms( i_res + i_border )

                i_center = range( len( i_res ) )

                mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy()

                ## fit with border atoms ..
                t_res.fit( ref=t_res.ref, mask=mask_BB, verbose=0 )
                ## .. but calculate only with center residue atoms
                frames = N0.take( t_res.frames, i_center, 1 )

                avg = N0.average( frames )

                rmsd = N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) ))

                result.extend( rmsd )

                if verbose: T.errWrite('#')

            except ZeroDivisionError:
                result.extend( N0.zeros( len(i_res), N0.Float32 ) )
                T.errWrite('?' + str( res ))

        if verbose: T.errWriteln( "done" )

        return result