Python transpose 예제들, Biskit.oldnumeric.transpose Python 예제들

예제 #1

0

파일 보기

파일: WhatIf.py 프로젝트: suliat16/biskit

    def __exposedResidues(self,
                          ASA_values,
                          sidechainCut=0.0,
                          backboneCut=0.0,
                          totalCut=0.0):
        """
        Decide what is a surface exposed residue and what is not.
        sidechainCut, backboneCut, totalCut - float, cutoff value
        for what will be considered as a exposed residue. All
        three values have to pass the test.

        @param ASA_values: array with ASA values for side chains, backbone
                           and total calculated in L{__read_residueASA}.
        @type  ASA_values: array
        @param sidechainCut: cutoff ASA value for considering the side chain
                             to consider thew residue being exposed
                             (default: 0.0) 
        @type  sidechainCut: float
        @param backboneCut: cutoffvalue for back bone ASA
        @type  backboneCut: float 
        @param totalCut: cutoff for total ASA
        @type  totalCut: float   

        @return: residue mask, where 0 = burried
        @rtype: [1|0]
        """
        col_0 = N0.greater(N0.transpose(ASA_values)[0], totalCut)
        col_1 = N0.greater(N0.transpose(ASA_values)[1], backboneCut)
        col_2 = N0.greater(N0.transpose(ASA_values)[2], sidechainCut)

        col_012 = N0.concatenate(([col_0], [col_1], [col_2]))

        exposedList = N0.greater(N0.sum(col_012), 0)

        return exposedList

예제 #2

0

파일 보기

파일: WhatIf.py 프로젝트: graik/biskit

    def __exposedResidues( self, ASA_values, sidechainCut=0.0,
                         backboneCut=0.0, totalCut=0.0  ):
        """
        Decide what is a surface exposed residue and what is not.
        sidechainCut, backboneCut, totalCut - float, cutoff value
        for what will be considered as a exposed residue. All
        three values have to pass the test.

        @param ASA_values: array with ASA values for side chains, backbone
                           and total calculated in L{__read_residueASA}.
        @type  ASA_values: array
        @param sidechainCut: cutoff ASA value for considering the side chain
                             to consider thew residue being exposed
                             (default: 0.0) 
        @type  sidechainCut: float
        @param backboneCut: cutoffvalue for back bone ASA
        @type  backboneCut: float 
        @param totalCut: cutoff for total ASA
        @type  totalCut: float   

        @return: residue mask, where 0 = burried
        @rtype: [1|0]
        """
        col_0 = N0.greater( N0.transpose(ASA_values)[0], totalCut )
        col_1 = N0.greater( N0.transpose(ASA_values)[1], backboneCut )
        col_2 = N0.greater( N0.transpose(ASA_values)[2], sidechainCut )

        col_012 = N0.concatenate( ([col_0],[col_1],[col_2]) ) 

        exposedList = N0.greater(N0.sum(col_012), 0)

        return exposedList

예제 #3

0

파일 보기

def squared_distance_matrix(x, y):

    d1 = N0.diagonal(N0.dot(x, N0.transpose(x)))
    d2 = N0.diagonal(N0.dot(y, N0.transpose(y)))

    a1 = N0.add.outer(d1,d2)
    a2 = N0.dot(x, N0.transpose(y))

    return a1 - 2 * a2

예제 #4

0

파일 보기

파일: gnuplot.py 프로젝트: tybiot/biskit

    def test_plot(self):
        """gnuplot.plot test"""
        # List of (x, y) pairs
        # plot([(0.,1),(1.,5),(2.,3),(3.,4)])
        # plot( zip( range(10), range(10) ) )

        # Two plots; each given by a 2d array
        import Biskit.oldnumeric as N0
        x = N0.arange(10)
        y1 = x**2
        y2 = (10 - x)**2
        plot(N0.transpose(N0.array([x, y1])), N0.transpose(N0.array([x, y2])))

예제 #5

0

파일 보기

파일: Complex.py 프로젝트: tybiot/biskit

    def __pairwiseDistances(self, u, v):
        """
        pairwise distance between 2 3-D numpy arrays of atom coordinates.

        @param u: coordinates
        @type  u: array
        @param v: coordinates
        @type  v: array
        
        @return: Numpy array len(u) x len(v)
        @rtype:array
        
        @author: Wolfgang Rieping.
        """
        ## check input
        if not type( u ) == arraytype or\
           not type( v ) == arraytype:
            raise ComplexError('unsupported argument type ' + \
                               str( type(u) ) + ' or ' + str( type(v) ) )

        diag1 = N0.diagonal(N0.dot(u, N0.transpose(u)))
        diag2 = N0.diagonal(N0.dot(v, N0.transpose(v)))
        dist = -N0.dot(v, N0.transpose(u)) - N0.transpose(
            N0.dot(u, N0.transpose(v)))
        dist= N0.transpose(N0.asarray(map(lambda column,a:column+a, \
                                   N0.transpose(dist), diag1)))

        return N0.transpose(
            N0.sqrt(N0.asarray(map(lambda row, a: row + a, dist, diag2))))

예제 #6

0

파일 보기

파일: Complex.py 프로젝트: tybiot/biskit

    def contactResDistribution(self, cm=None):
        """
        Count occurrence of residues in protein-protein interface.
        
        @param cm: pre-calculated contact matrix (default: None)
        @type  cm: matrix
        
        @return: dict {'A':3, 'C':1, .. } (20 standard amino acids)
        @rtype: dict
        """
        if cm is None:
            cm = self.resContacts()

        ## get mask for residues involved in contacts
        maskLig = N0.sum(cm)
        maskRec = N0.sum(N0.transpose(cm))

        ## get sequence of contact residues only
        seqLig = N0.compress(maskLig, self.lig().sequence())
        seqRec = N0.compress(maskRec, self.rec().sequence())
        seq = ''.join(seqLig) + ''.join(seqRec)  ## convert back to string

        ## count occurrence of letters
        result = {}
        for aa in molUtils.allAA():
            result[aa] = seq.count(aa)

        return result

예제 #7

0

파일 보기

파일: rmsFit.py 프로젝트: tybiot/biskit

def findTransformation(x, y):
    """
    Match two arrays by rotation and translation. Returns the
    rotation matrix and the translation vector.

    @param x: first set of coordinates
    @type  x: array('f')
    @param y: second set of coordinates
    @type  y: array('f')

    @return: rotation matrix (3x3) and translation vector (1x3)
    @rtype:  array, array
    """
    ## center configurations
    x_av = N0.average(x)
    y_av = N0.average(y)

    x = x - x_av
    y = y - y_av

    ## svd of correlation matrix
    v, l, u = svd(N0.dot(N0.transpose(x), y))

    ## build rotation matrix and translation vector
    r = N0.dot(v, u)

    t = x_av - N0.dot(r, y_av)

    return r, t

예제 #8

0

파일 보기

def histogram(data, nbins, range=None):
    """
    Create a histogram.
    Comes from Konrad Hinsen: Scientific Python

    @param data: data list or array
    @type  data: [any]
    @param nbins: number of bins
    @type  nbins: int
    @param range: data range to create histogram from (min val, max val)
    @type  range: (float, float) OR None

    @return: array (2 x len(data) ) with start of bin and witdh of bin. 
    @rtype: array
    """
    data = N0.array(data, N0.Float)
    if range is None:
        min = N0.minimum.reduce(data)
        max = N0.maximum.reduce(data)
    else:
        min, max = range
        data = N0.repeat(
            data,
            N0.logical_and(N0.less_equal(data, max),
                           N0.greater_equal(data, min)))
    bin_width = (max - min) / nbins
    data = N0.floor((data - min) / bin_width).astype(N0.Int)
    histo = N0.add.reduce(N0.equal(N0.arange(nbins)[:, N0.NewAxis], data), -1)
    histo[-1] = histo[-1] + N0.add.reduce(N0.equal(nbins, data))
    bins = min + bin_width * (N0.arange(nbins) + 0.5)
    return N0.transpose(N0.array([bins, histo]))

예제 #9

0

파일 보기

    def takeMembers(self, mIndices):
        """
        Take all frames belonging to the members in mIndices::
          takeMembers( mIndices ) -> EnsembleTraj with frames of given members
        
        @param mIndices: list of member indices
        @type  mIndices: [int] OR array('i')
        
        @return: EnsembleTraj with specified members
        @rtype: EnsembleTraj
        
        @todo: return self.__class__ instead of EnsembleTraj
        """
        try:
            ## assumes that each member traj has same number of frames
            fi = N0.array([self.memberIndices(i) for i in mIndices])
            fi = N0.ravel(N0.transpose(fi))

            n_members = len(mIndices)

            ## has wrong n_members and member order
            t = self.takeFrames(fi)

            result = EnsembleTraj(n_members=n_members)

            result.__dict__.update(t.__dict__)
            result.n_members = n_members
            result.resetFrameNames()

            return result

        except TypeError:
            raise EnsembleTrajError, 'takeMembers TypeError '+\
                  str(mIndices)+\
                  "\nlenFrames: %i; n_members: %i" %(len(self), self.n_members)

예제 #10

0

파일 보기

파일: Complex.py 프로젝트: tybiot/biskit

    def __findTransformation(self, x, y):
        """
        Match two arrays by rotation and translation. Returns the
        rotation matrix and the translation vector.
        Back transformation:
        for atom i new coordinates will be::
            y_new[i] = N0.dot(r, y[i]) + t
            
        for all atoms in one step::
            y_new = N0.dot(y, N0.transpose(r)) + t

        @param x: coordinates
        @type  x: array
        @param y: coordinates
        @type  y: array

        @return: rotation matrix, translation vector
        @rtype: array, array      
        
        @author: Michael Habeck
        """
        from numpy.linalg import svd

        ## center configurations
        x_av = N0.sum(x) / len(x)
        y_av = N0.sum(y) / len(y)
        x = x - x_av
        y = y - y_av
        ## svd of correlation matrix
        v, l, u = svd(N0.dot(N0.transpose(x), y))
        ## build rotation matrix and translation vector
        r = N0.dot(v, u)
        t = x_av - N0.dot(r, y_av)

        return r, t

예제 #11

0

파일 보기

파일: PDBParseFile.py 프로젝트: tybiot/biskit

    def __parseBiomt(self, pdbFile, firstLine):
        """
        Extract BIOMT (biological unit) information from REMARK 350 lines
        Creates a 'BIOMT' dictionary.
        """
        line = firstLine
        biomtDict = {}
        moleculeNum = -1

        while line[0] == 'REMARK' and line[1].startswith(' 350'):
            # 5 = len(' 350 ')
            biomtLine = line[1][5:].lstrip()
            if biomtLine.startswith('BIOMOLECULE:'):  # start a new molecule

                if moleculeNum != -1:
                    # lets update the dictionary with what we've got
                    biomtDict[moleculeNum] = (targetChains, rtList)

                #12 = len('BIOMOLECULE:')
                moleculeNum = int(biomtLine[12:].strip())
                targetChains = []
                rotation = []
                translation = []
                rtList = []

                matrixLine = 0

            if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'):
                # parse targeted chains, we assume this comes after BIOMOLECULE line
                # 30 = len('APPLY THE FOLLOWING TO CHAINS:')
                targetChains.extend(c.strip()
                                    for c in biomtLine[30:].split(','))
            if biomtLine.startswith('AND CHAINS:'):
                # 11 = len('AND CHAINS:')
                targetChains.extend(c.strip()
                                    for c in biomtLine[11:].split(','))

            if biomtLine.startswith('BIOMT'):
                # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line
                matrixLine += 1
                # 6 = len('BIOMT#')
                rawCoords = biomtLine[6:].split()
                rotation.append([float(x) for x in rawCoords[1:4]])
                translation.append(float(rawCoords[4]))
                if matrixLine % 3 == 0:
                    rotation = N0.array(rotation)
                    translation = N0.transpose([translation])
                    rotation = N0.concatenate((rotation, translation), axis=1)
                    rtList.append(N0.array(rotation))
                    ## rtList.append((rotation,translation))
                    rotation = []
                    translation = []

            try:
                line = pdbFile.readLine()
            except ValueError, what:
                self.log.add('Warning: Error parsing line %i of %s' %
                             (i, T.stripFilename(fname)))
                self.log.add('\tError: ' + str(what))
                continue

예제 #12

0

파일 보기

    def error(self, msm, d2):
        """
        @param msm: membership matrix
        @type  msm: array('f')
        @param d2: distance from data to the centers
        @type  d2: array('f')

        @return: weighted error 
        @rtype: float
        """
        p = N0.power(msm, self.w)
        product = N0.dot(p, N0.transpose(d2))
        return N0.trace(product)

예제 #13

0

파일 보기

    def create_membership_matrix(self):
        """
        Create a random membership matrix.

        @return: random array of shape length of data to
                 cluster times number of clusters
        @rtype: array('f')
        """
        ## default signature has changed oldnumeric->numpy
        if (self.seedx==0 or self.seedy==0):  
            R.seed()
        else:
            R.seed((self.seedx, self.seedy))

        r = R.random_sample((self.npoints, self.n_cluster))
        return N0.transpose(r / N0.sum(r))

예제 #14

0

파일 보기

파일: ComplexRandomizer.py 프로젝트: tybiot/biskit

    def __random_matrix( self ):
        """
        Random rotation matrix.

        @return: 4 x 4 array of float, random rotation and translation matrix
        @rtype: array
        """
        r = ma.randomRotation()
##         r = N0.array([[1,0,0],[0,1,0],[0,0,1]],'f')
        t = self.__random_translation()

        ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans
        result = N0.concatenate( (r, N0.transpose( [ t.tolist() ] )), 1)

        ## make it square
        result = N0.concatenate( (result, N0.array([[0,0,0,1]], N0.Float32)), 0 )

        return result

예제 #15

0

파일 보기

파일: msms.py 프로젝트: graik/biskit

    def prepare( self ):
        """
        Write a xyzrn coordinate file to disc.
        Overrides Executor method.
        """
        ## get radiia and name array
        p2x = Pdb2xyzrn(self.model, verbose=self.verbose, debug=self.debug )
        r, n = p2x.run()

        xyz = self.model.xyz  
        xyzr = N0.concatenate( ( xyz, N0.transpose([r]) ) ,axis=1 )

        f = open( self.f_xyzrn, 'w' )
        i = 0
        for line in xyzr:
            f.write( str(line)[2:-1] + ' 1 ' + n[i] + '\n')
            i += 1
        f.close()

예제 #16

0

파일 보기

    def prepare( self ):
        """
        Write a xyzrn coordinate file to disc.
        Overrides Executor method.
        """
        ## get radiia and name array
        p2x = Pdb2xyzrn(self.model, verbose=self.verbose, debug=self.debug )
        r, n = p2x.run()

        xyz = self.model.xyz  
        xyzr = N0.concatenate( ( xyz, N0.transpose([r]) ) ,axis=1 )

        f = open( self.f_xyzrn, 'w' )
        i = 0
        for line in xyzr:
            f.write( str(line)[2:-1] + ' 1 ' + n[i] + '\n')
            i += 1
        f.close()

예제 #17

0

파일 보기

    def transform( self, *rt ):
        """
        Apply given transformation to all frames (in place).

        @param rt: rotation translation matrix
        @type  rt: array( 4 x 4 ) OR array(3 x 3), array(3 x 1)
        """
        if len(rt) == 2:
            r, t = rt[0], rt[1]
        else:
            rt = rt[0]
            r, t = (rt[0:3,0:3], rt[0:3, 3])

        r = N0.transpose( r )
        r = r.astype(N0.Float32)
        t = t.astype(N0.Float32)

        for i in range( len( self.frames ) ):
            self.frames[ i ] = N0.array( N0.dot( self.frames[i], r ) ) + t

예제 #18

0

파일 보기

파일: Complex.py 프로젝트: tybiot/biskit

    def rtTuple2matrix(self, r, t):
        """
        Put rotation and translation matrix into single 4x4 matrix.
        
        @param r: rotation matric, array 3x3 of float
        @type  r: array
        @param t: translation vector, array 1x3 of float
        @type  t: vector
        
        @return: rotation/translation matrix, array 4x4 of float
        @rtype: array
        """
        ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans
        result = N0.concatenate((r, N0.transpose([t.tolist()])), 1)
        ## make it square
        result = N0.concatenate((result, N0.array([[0, 0, 0, 1]], N0.Float32)),
                                0)

        return result.astype(N0.Float32)

예제 #19

0

파일 보기

파일: rmsFit.py 프로젝트: tybiot/biskit

def rowDistances(x, y):
    """
    Calculate the distances between the items of two arrays (of same shape)
    after least-squares superpositioning.

    @param x: first set of coordinates
    @type  x: array('f')
    @param y: second set of coordinates
    @type  y: array('f')  

    @return: array( len(x), 'f' ), distance between x[i] and y[i] for all i
    @rtype: array
    """
    ## find transformation for best match
    r, t = findTransformation(x, y)

    ## transform coordinates
    z = N0.dot(y, N0.transpose(r)) + t

    ## calculate row distances
    return N0.sqrt(N0.sum(N0.power(x - z, 2), 1))

예제 #20

0

파일 보기

파일: ReduceCoordinates.py 프로젝트: tybiot/biskit

    def reduceXyz(self, xyz, axis=0):
        """
        Reduce the number of atoms in the given coordinate set. The set must
        have the same length and order as the reference model. It may have
        an additional (time) dimension as first axis.
        
        @param xyz: coordinates (N_atoms x 3) or (N_frames x N_atoms x 3)
        @type  xyz: array
        @param axis: axis with atoms (default: 0)
        @type  axis: int
        
        @return: coordinate array (N_less_atoms x 3) or
                 (N_frames x N_less_atoms x 3)
        @rtype: array
        """
        masses = self.m.atoms.get('mass')
        r_xyz = None

        for atom_indices in self.groups:

            x = N0.take(xyz, atom_indices, axis)
            m = N0.take(masses, atom_indices)

            center = N0.sum(x * N0.transpose([
                m,
            ]), axis=axis) / N0.sum(m)

            if axis == 0:
                center = center[N0.NewAxis, :]

            if axis == 1:
                center = center[:, N0.NewAxis, :]

            if r_xyz is None:
                r_xyz = center

            else:
                r_xyz = N0.concatenate((r_xyz, center), axis)

        return r_xyz

예제 #21

0

파일 보기

    def thin(self, step=1):
        """
        Keep only each step'th frame from trajectory with 10 ensemble members.
        
        @param step: 1..keep all frames, 2..skip first and every second, ..
                     (default: 1)
        @type  step: int
        
        @return: reduced EnsembleTraj
        @rtype: EnsembleTraj
        """
        T.ensure(step, int, forbidden=[0])

        ## 10 x lenFrames/10, frame indices of each member
        mI = [self.memberIndices(i) for i in range(self.n_members)]

        mI = N0.array(mI)

        mI = N0.take(mI, range(-1, N0.shape(mI)[1], step)[1:], 1)

        mI = N0.transpose(mI)

        return self.takeFrames(N0.ravel(mI))

예제 #22

0

파일 보기

 def clusterEntropy(self):
     centropy = N0.diagonal(N0.dot(self.msm,
                                 N0.transpose(N0.log(self.msm))))
     return -1/float(self.npoints)*centropy

예제 #23

0

파일 보기

 def calc_cluster_center(self, msm):
     p = N0.power(msm, self.w)
     ccenter = N0.transpose(N0.dot(p, self.data))
     return N0.transpose(ccenter / N0.sum(p, 1))

예제 #24

0

파일 보기

    def fit( self, mask=None, ref=None, n_it=1,
             prof='rms', verbose=1, fit=1, **profInfos ):
        """
        Superimpose all coordinate frames on reference coordinates. Put rms
        values in a profile. If n_it > 1, the fraction of atoms considered
        for the fit is put into a profile called |prof|_considered
        (i.e. by default 'rms_considered').

        @param mask: atom mask, atoms to consider default: [all]
        @type  mask: [1|0]
        @param ref: use as reference, default: None, average Structure
        @type  ref: PDBModel
        @param n_it: number of fit iterations, kicking out outliers on the way
                     1 -> classic single fit, 0 -> until convergence
                     (default: 1)
        @type  n_it: int
        @param prof: save rms per frame in profile of this name, ['rms']
        @type  prof: str
        @param verbose: print progress info to STDERR (default: 1)
        @type  verbose: 1|0
        @param fit: transform frames after match, otherwise just calc rms
                    (default: 1)          
        @type  fit: 1|0
        @param profInfos: additional key=value pairs for rms profile info []
        @type profInfos: key=value
        """
        if ref is None:
            refxyz = N0.average( self.frames, 0 )
        else:
            refxyz = ref.getXyz()

        if mask is None:
            mask = N0.ones( len( refxyz ), N0.Int32 )

        refxyz = N0.compress( mask, refxyz, 0 )

        if verbose: T.errWrite( "rmsd fitting..." )

        rms = []          ## rms value of each frame
        non_outliers = [] ## fraction of atoms considered for rms and fit
        iterations = []   ## number of iterations performed on each frame

        for i in range(0, len( self.frames) ):

            xyz = self.frames[i]

            if n_it != 1:
                (r, t), rmsdList = rmsFit.match( refxyz,
                                                 N0.compress( mask, xyz, 0), n_it)
                iterations.append( len( rmsdList ) )
                non_outliers.append( rmsdList[-1][0] )

                xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t

                rms += [ rmsdList[-1][1] ]

            else:
                r, t = rmsFit.findTransformation( refxyz,
                                                  N0.compress( mask, xyz, 0))

                xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t

                d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\
                                          - refxyz, 2), 1))


                rms += [ N0.sqrt( N0.average(d**2) ) ]


            if fit:
                self.frames[i] = xyz_transformed.astype(N0.Float32)

            if verbose and i%100 == 0:
                T.errWrite( '#' )

        self.setProfile( prof, rms, n_iterations=n_it, **profInfos )

        if non_outliers:
            self.setProfile( prof+'_considered', non_outliers,
                             n_iterations=n_it,
                             comment='fraction of atoms considered for iterative fit' )

        if verbose: T.errWrite( 'done\n' )

예제 #25

0

파일 보기

    def parse_result(self):
        """
        Extract some information about the profile as well as the
        match state emmission scores. Keys of the returned dictionary::
          'AA', 'name', 'NrSeq', 'emmScore', 'accession',
          'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum'
          
        @return: dictionary with warious information about the profile
        @rtype: dict
        """
        ## check that the outfut file is there and seems valid
        if not os.path.exists(self.f_out):
            raise HmmerError,\
                  'Hmmerfetch result file %s does not exist.'%self.f_out

        if T.fileLength(self.f_out) < 10:
            raise HmmerError,\
                  'Hmmerfetch result file %s seems incomplete.'%self.f_out

        profileDic = {}

        ## read result
        hmm = open(self.f_out, 'r')
        out = hmm.read()
        hmm.close()

        ## collect some data about the hmm profile
        profileDic['name'] = self.hmmName
        profileDic['profLength'] = \
                  int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] )
        profileDic['accession'] = \
                  string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1]
        profileDic['NrSeq'] = \
                  int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] )
        profileDic['AA'] = \
              string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:]

        ## collect null emmission scores
        pattern = 'NULE[ ]+' + '[-0-9]+[ ]+' * 20
        nullEmm = [
            float(j) for j in string.split(re.findall(pattern, out)[0])[1:]
        ]

        ## get emmision scores
        prob = []
        for i in range(1, profileDic['profLength'] + 1):
            pattern = "[ ]+%i" % i + "[ ]+[-0-9]+" * 20
            e = [float(j) for j in string.split(re.findall(pattern, out)[0])]
            prob += [e]

        profileDic['seqNr'] = N0.transpose(N0.take(prob, (0, ), 1))
        profileDic['emmScore'] = N0.array(prob)[:, 1:]

        ## calculate emission probablitities
        emmProb, nullProb = self.hmmEmm2Prob(nullEmm, profileDic['emmScore'])

        ent = [
            N0.resize(self.entropy(e, nullProb), (1, 20))[0] for e in emmProb
        ]
        profileDic['ent'] = N0.array(ent)

        ###### TEST #####

        proba = N0.array(prob)[:, 1:]

        ##         # test set all to max score
        ##         p = proba
        ##         p1 = []
        ##         for i in range( len(p) ):
        ##             p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ]
        ##         profileDic['maxAll'] = p1

        # test set all to N0.sum( abs( probabilities ) )
        p = proba
        p2 = []
        for i in range(len(p)):
            p2 += [N0.resize(N0.sum(N0.absolute(p[i])), N0.shape(p[i]))]
        profileDic['absSum'] = p2

        # set all to normalized max score
        p = proba
        p4 = []
        for i in range(len(p)):
            p_scale = (p[i] - N0.average(p[i])) / math.SD(p[i])
            p4 += [
                N0.resize(p_scale[N0.argmax(N0.array(p_scale))],
                          N0.shape(p[i]))
            ]
        profileDic['maxAllScale'] = p4

        return profileDic

예제 #26

0

파일 보기

파일: rmsFit.py 프로젝트: tybiot/biskit

def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05):
    """
    Matches two arrays onto each other, while iteratively removing outliers.
    Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }.

    @param n_iterations: number of calculations::
                           1 .. no iteration 
                           0 .. until convergence
    @type  n_iterations: 1|0
    @param z: number of standard deviations for outlier definition (default: 2)
    @type  z: float
    @param eps_rmsd: tolerance in rmsd (default: 0.5)
    @type  eps_rmsd: float
    @param eps_stdv: tolerance in standard deviations (default: 0.05)
    @type  eps_stdv: float

    @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ]
    @rtype: (array, array), [float, float, int]
    """
    iter_trace = []

    rmsd_old = 0
    stdv_old = 0

    n = 0
    converged = 0

    mask = N0.ones(len(y), N0.Int32)

    while not converged:

        ## find transformation for best match
        r, t = findTransformation(N0.compress(mask, x, 0),
                                  N0.compress(mask, y, 0))

        ## transform coordinates
        xt = N0.dot(y, N0.transpose(r)) + t

        ## calculate row distances
        d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask

        ## calculate rmsd and stdv
        rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2))
        stdv = MU.SD(N0.compress(mask, d))

        ## check conditions for convergence
        d_rmsd = abs(rmsd - rmsd_old)
        d_stdv = abs(1 - stdv_old / stdv)

        if d_rmsd < eps_rmsd and d_stdv < eps_stdv:
            converged = 1
        else:
            rmsd_old = rmsd
            stdv_old = stdv

        ## store result
        perc = round(float(N0.sum(mask)) / float(len(mask)), 2)

        ## throw out non-matching rows
        mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv))
        outliers = N0.nonzero(N0.logical_not(mask))
        iter_trace.append([perc, round(rmsd, 3), outliers])

        n += 1

        if n_iterations and n >= n_iterations:
            break

    return (r, t), iter_trace

예제 #27

0

파일 보기

파일: Hmmer.py 프로젝트: graik/biskit

    def parse_result( self ):
        """
        Extract some information about the profile as well as the
        match state emmission scores. Keys of the returned dictionary::
          'AA', 'name', 'NrSeq', 'emmScore', 'accession',
          'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum'
          
        @return: dictionary with warious information about the profile
        @rtype: dict
        """
        ## check that the outfut file is there and seems valid
        if not os.path.exists( self.f_out ):
            raise HmmerError,\
                  'Hmmerfetch result file %s does not exist.'%self.f_out
        
        if T.fileLength( self.f_out ) < 10:
            raise HmmerError,\
                  'Hmmerfetch result file %s seems incomplete.'%self.f_out
        
        profileDic = {}

        ## read result
        hmm = open( self.f_out, 'r')
        out = hmm.read()
        hmm.close()

        ## collect some data about the hmm profile
        profileDic['name'] =  self.hmmName 
        profileDic['profLength'] = \
                  int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] )
        profileDic['accession'] = \
                  string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] 
        profileDic['NrSeq'] = \
                  int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] )
        profileDic['AA'] = \
              string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:]

        ## collect null emmission scores
        pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20
        nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ]

        ## get emmision scores
        prob=[]
        for i in range(1, profileDic['profLength']+1):
            pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20
            e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ]
            prob += [ e ]

        profileDic['seqNr'] = N0.transpose( N0.take( prob, (0,),1 ) )
        profileDic['emmScore'] = N0.array(prob)[:,1:]

        ## calculate emission probablitities
        emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore'])

        ent = [ N0.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ]
        profileDic['ent'] = N0.array(ent)

        ###### TEST #####

        proba = N0.array(prob)[:,1:]

##         # test set all to max score
##         p = proba
##         p1 = []
##         for i in range( len(p) ):
##             p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ]
##         profileDic['maxAll'] = p1

        # test set all to N0.sum( abs( probabilities ) )
        p = proba
        p2 = []
        for i in range( len(p) ) :
            p2 += [ N0.resize( N0.sum( N0.absolute( p[i] )), N0.shape( p[i] ) ) ]
        profileDic['absSum'] = p2

        # set all to normalized max score 
        p = proba
        p4 = []
        for i in range( len(p) ) :
            p_scale = (p[i] - N0.average(p[i]) )/ math.SD(p[i])
            p4 += [ N0.resize( p_scale[N0.argmax( N0.array(p_scale) )] ,
                              N0.shape( p[i] ) ) ]
        profileDic['maxAllScale'] = p4

        return profileDic

예제 #28

0

파일 보기

파일: Complex.py 프로젝트: tybiot/biskit

    def __alignMatrixDimension(self, cm, thisSeq, castSeq, axis=0):
        """
        Correct one dimension of contactMatrix by inserting and deleting
        columns, so that it can be later compared to contact matrices based
        on slightly different sequences.
        
        @param cm: contact matrix, 2D matrix of residue contacts
                   recceptor x ligand sequence
        @type  cm: array
        @param thisSeq: AA sequence of this dimension of the contactMatrix
        @type  thisSeq: string
        @param castSeq: AA sequence of this dimension in the other contact
        @type  castSeq: string
        @param axis: which dimension to adapt (0=receptor, 1=ligand)
        @type  axis: 1|0
        
        @return: contact matrix with residue contacts compatible to refSeq.
        @rtype: 2D array
        """
        # compare the two sequences
        seqdiff = SequenceMatcher(None, thisSeq, castSeq)
        seqDiff = seqdiff.get_opcodes()
        ## print seqDiff

        # decide which dimension to work on
        if not axis:
            cm = N0.transpose(cm)

        seqCount = 0  # keep track of sequence length changes
        i = 0

        for list in seqDiff:

            # remove the column corresponding to the deletion in the
            # docked sequence
            if str(seqDiff[i][0]) == 'delete':

                # separate matrix into before and after deletion
                matrixSeg1 = cm[:, :seqDiff[i][1] + seqCount]
                matrixSeg2 = cm[:, seqDiff[i][2] + seqCount:]
                # concatenate part
                cm = N0.concatenate((matrixSeg1, matrixSeg2), 1)
                seqCount = seqCount + seqDiff[i][1] - seqDiff[i][2]

            # inserts zeros in the column where there is a insertion in the
            # docked sequence
            if str(seqDiff[i][0]) == 'insert':

                # create a matrix to be inserted
                insertZeros = seqDiff[i][4] - seqDiff[i][3]
                insertColumns = N0.array([[0] * insertZeros] * N0.size(cm, 0))
                # separate matrix into before and after insertion
                matrixSeg1 = cm[:, :seqDiff[i][1] + seqCount]
                matrixSeg2 = cm[:, seqDiff[i][2] + seqCount:]
                # concatenate parts with the zero matrix
                cm = N0.concatenate((matrixSeg1, insertColumns, matrixSeg2), 1)
                seqCount = seqCount + seqDiff[i][4] - seqDiff[i][3]

            i = i + 1

        if not axis:
            return N0.transpose(cm)
        return cm