def squared_distance_matrix(x, y): d1 = N0.diagonal(N0.dot(x, N0.transpose(x))) d2 = N0.diagonal(N0.dot(y, N0.transpose(y))) a1 = N0.add.outer(d1,d2) a2 = N0.dot(x, N0.transpose(y)) return a1 - 2 * a2
def squared_distance_matrix(x, y): d1 = N0.diagonal(N0.dot(x, N0.transpose(x))) d2 = N0.diagonal(N0.dot(y, N0.transpose(y))) a1 = N0.add.outer(d1, d2) a2 = N0.dot(x, N0.transpose(y)) return a1 - 2 * a2
def test_plot(self): """gnuplot.plot test""" # List of (x, y) pairs # plot([(0.,1),(1.,5),(2.,3),(3.,4)]) # plot( zip( range(10), range(10) ) ) # Two plots; each given by a 2d array import biskit.core.oldnumeric as N0 x = N0.arange(10) y1 = x**2 y2 = (10 - x)**2 plot(N0.transpose(N0.array([x, y1])), N0.transpose(N0.array([x, y2])))
def test_plot( self ): """gnuplot.plot test""" # List of (x, y) pairs # plot([(0.,1),(1.,5),(2.,3),(3.,4)]) # plot( zip( range(10), range(10) ) ) # Two plots; each given by a 2d array import biskit.core.oldnumeric as N0 x = N0.arange(10) y1 = x**2 y2 = (10-x)**2 plot( N0.transpose(N0.array([x, y1])), N0.transpose(N0.array([x, y2])))
def __pairwiseDistances(self, u, v): """ pairwise distance between 2 3-D numpy arrays of atom coordinates. @param u: coordinates @type u: array @param v: coordinates @type v: array @return: Numpy array len(u) x len(v) @rtype:array @author: Wolfgang Rieping. """ ## check input if not type( u ) == arraytype or\ not type( v ) == arraytype: raise ComplexError('unsupported argument type ' + \ str( type(u) ) + ' or ' + str( type(v) ) ) diag1 = N0.diagonal(N0.dot(u, N0.transpose(u))) diag2 = N0.diagonal(N0.dot(v, N0.transpose(v))) dist = -N0.dot(v, N0.transpose(u)) - N0.transpose( N0.dot(u, N0.transpose(v))) dist= N0.transpose(N0.asarray(list(map(lambda column,a:column+a, \ N0.transpose(dist), diag1)))) return N0.transpose( N0.sqrt(N0.asarray(list(map(lambda row, a: row + a, dist, diag2)))))
def __pairwiseDistances(self, u, v): """ pairwise distance between 2 3-D numpy arrays of atom coordinates. @param u: coordinates @type u: array @param v: coordinates @type v: array @return: Numpy array len(u) x len(v) @rtype:array @author: Wolfgang Rieping. """ ## check input if not type( u ) == arraytype or\ not type( v ) == arraytype: raise ComplexError('unsupported argument type ' + \ str( type(u) ) + ' or ' + str( type(v) ) ) diag1= N0.diagonal(N0.dot(u,N0.transpose(u))) diag2= N0.diagonal(N0.dot(v,N0.transpose(v))) dist= -N0.dot(v,N0.transpose(u))-N0.transpose(N0.dot(u,N0.transpose(v))) dist= N0.transpose(N0.asarray(list(map(lambda column,a:column+a, \ N0.transpose(dist), diag1)))) return N0.transpose(N0.sqrt(N0.asarray( list(map(lambda row,a: row+a, dist, diag2)))))
def contactResDistribution( self, cm=None ): """ Count occurrence of residues in protein-protein interface. @param cm: pre-calculated contact matrix (default: None) @type cm: matrix @return: dict {'A':3, 'C':1, .. } (20 standard amino acids) @rtype: dict """ if cm is None: cm = self.resContacts() ## get mask for residues involved in contacts maskLig = N0.sum( cm ) maskRec = N0.sum( N0.transpose( cm )) ## get sequence of contact residues only seqLig = N0.compress( maskLig, self.lig().sequence() ) seqRec = N0.compress( maskRec, self.rec().sequence() ) seq = ''.join( seqLig ) + ''.join(seqRec) ## convert back to string ## count occurrence of letters result = {} for aa in molUtils.allAA(): result[aa] = seq.count( aa ) return result
def __findTransformation(self, x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. Back transformation: for atom i new coordinates will be:: y_new[i] = N0.dot(r, y[i]) + t for all atoms in one step:: y_new = N0.dot(y, N0.transpose(r)) + t @param x: coordinates @type x: array @param y: coordinates @type y: array @return: rotation matrix, translation vector @rtype: array, array @author: Michael Habeck """ from numpy.linalg import svd ## center configurations x_av = N0.sum(x) / len(x) y_av = N0.sum(y) / len(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N0.dot(N0.transpose(x), y)) ## build rotation matrix and translation vector r = N0.dot(v, u) t = x_av - N0.dot(r, y_av) return r, t
def findTransformation(x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. :param x: first set of coordinates :type x: array('f') :param y: second set of coordinates :type y: array('f') :return: rotation matrix (3x3) and translation vector (1x3) :rtype: array, array """ ## center configurations x_av = N0.average(x) y_av = N0.average(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N0.dot(N0.transpose(x), y)) ## build rotation matrix and translation vector r = N0.dot(v, u) t = x_av - N0.dot(r, y_av) return r, t
def takeMembers( self, mIndices ): """ Take all frames belonging to the members in mIndices:: takeMembers( mIndices ) -> EnsembleTraj with frames of given members :param mIndices: list of member indices :type mIndices: [int] OR array('i') :return: EnsembleTraj with specified members :rtype: EnsembleTraj @todo: return self.__class__ instead of EnsembleTraj """ try: ## assumes that each member traj has same number of frames fi = N0.array( [ self.memberIndices( i ) for i in mIndices ] ) fi = N0.ravel( N0.transpose( fi ) ) n_members = len( mIndices ) ## has wrong n_members and member order t = self.takeFrames( fi ) result = EnsembleTraj( n_members=n_members ) result.__dict__.update( t.__dict__ ) result.n_members = n_members result.resetFrameNames() return result except TypeError: raise EnsembleTrajError('takeMembers TypeError '+\ str(mIndices)+\ "\nlenFrames: %i; n_members: %i" %(len(self), self.n_members))
def histogram(data, nbins, range = None): """ Create a histogram. Comes from Konrad Hinsen: Scientific Python :param data: data list or array :type data: [any] :param nbins: number of bins :type nbins: int :param range: data range to create histogram from (min val, max val) :type range: (float, float) OR None :return: array (2 x len(data) ) with start of bin and witdh of bin. :rtype: array """ data = N0.array(data, N0.Float) if range is None: min = N0.minimum.reduce(data) max = N0.maximum.reduce(data) else: min, max = range data = N0.repeat(data, N0.logical_and(N0.less_equal(data, max), N0.greater_equal(data, min))) bin_width = (max-min)/nbins data = N0.floor((data - min)/bin_width).astype(N0.Int) histo = N0.add.reduce(N0.equal( N0.arange(nbins)[:,N0.NewAxis], data), -1) histo[-1] = histo[-1] + N0.add.reduce(N0.equal(nbins, data)) bins = min + bin_width*(N0.arange(nbins)+0.5) return N0.transpose(N0.array([bins, histo]))
def contactResDistribution(self, cm=None): """ Count occurrence of residues in protein-protein interface. @param cm: pre-calculated contact matrix (default: None) @type cm: matrix @return: dict {'A':3, 'C':1, .. } (20 standard amino acids) @rtype: dict """ if cm is None: cm = self.resContacts() ## get mask for residues involved in contacts maskLig = N0.sum(cm) maskRec = N0.sum(N0.transpose(cm)) ## get sequence of contact residues only seqLig = N0.compress(maskLig, self.lig().sequence()) seqRec = N0.compress(maskRec, self.rec().sequence()) seq = ''.join(seqLig) + ''.join(seqRec) ## convert back to string ## count occurrence of letters result = {} for aa in molUtils.allAA(): result[aa] = seq.count(aa) return result
def error(self, msm, d2): """ @param msm: membership matrix @type msm: array('f') @param d2: distance from data to the centers @type d2: array('f') @return: weighted error @rtype: float """ p = N0.power(msm, self.w) product = N0.dot(p, N0.transpose(d2)) return N0.trace(product)
def polarToCartesian( rtp ): """ Convert polar coordinate array to cartesian coordinate array: C{ r, S{theta}, S{phi} -> x,y,z } :param rtp: array of cartesian coordinates (r, theta, phi) :type rtp: array :return: array of cartesian coordinates (x, y, z) :rtype: array """ x = rtp[:,0] * N0.cos( rtp[:,1] ) * N0.sin( rtp[:,2] ) y = rtp[:,0] * N0.sin( rtp[:,1] ) * N0.sin( rtp[:,2] ) z = rtp[:,0] * N0.cos( rtp[:,2] ) return N0.transpose( N0.concatenate( ([x],[y],[z]) ) )
def create_membership_matrix(self): """ Create a random membership matrix. @return: random array of shape length of data to cluster times number of clusters @rtype: array('f') """ ## default signature has changed oldnumeric->numpy if (self.seedx == 0 or self.seedy == 0): R.seed() else: R.seed((self.seedx, self.seedy)) r = R.random_sample((self.npoints, self.n_cluster)) return N0.transpose(r / N0.sum(r))
def create_membership_matrix(self): """ Create a random membership matrix. @return: random array of shape length of data to cluster times number of clusters @rtype: array('f') """ ## default signature has changed oldnumeric->numpy if (self.seedx==0 or self.seedy==0): R.seed() else: R.seed((self.seedx, self.seedy)) r = R.random_sample((self.npoints, self.n_cluster)) return N0.transpose(r / N0.sum(r))
def polarToCartesian(rtp): """ Convert polar coordinate array to cartesian coordinate array: C{ r, S{theta}, S{phi} -> x,y,z } :param rtp: array of cartesian coordinates (r, theta, phi) :type rtp: array :return: array of cartesian coordinates (x, y, z) :rtype: array """ x = rtp[:, 0] * N0.cos(rtp[:, 1]) * N0.sin(rtp[:, 2]) y = rtp[:, 0] * N0.sin(rtp[:, 1]) * N0.sin(rtp[:, 2]) z = rtp[:, 0] * N0.cos(rtp[:, 2]) return N0.transpose(N0.concatenate(([x], [y], [z])))
def __random_matrix( self ): """ Random rotation matrix. @return: 4 x 4 array of float, random rotation and translation matrix @rtype: array """ r = ma.randomRotation() ## r = N0.array([[1,0,0],[0,1,0],[0,0,1]],'f') t = self.__random_translation() ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate( (r, N0.transpose( [ t.tolist() ] )), 1) ## make it square result = N0.concatenate( (result, N0.array([[0,0,0,1]], N0.Float32)), 0 ) return result
def rtTuple2matrix( self, r, t): """ Put rotation and translation matrix into single 4x4 matrix. @param r: rotation matric, array 3x3 of float @type r: array @param t: translation vector, array 1x3 of float @type t: vector @return: rotation/translation matrix, array 4x4 of float @rtype: array """ ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate( (r, N0.transpose( [ t.tolist() ] )), 1) ## make it square result = N0.concatenate( (result, N0.array([[0,0,0,1]],N0.Float32)), 0) return result.astype(N0.Float32)
def pairwiseDistances(u, v): """ Pairwise distances between two arrays. :param u: first array :type u: array :param v: second array :type v: array :return: array( len(u) x len(v) ) of double :rtype: array """ diag1 = N0.diagonal(N0.dot(u, N0.transpose(u))) diag2 = N0.diagonal(N0.dot(v, N0.transpose(v))) dist = -N0.dot( v,N0.transpose(u) )\ -N0.transpose( N0.dot( u, N0.transpose(v) ) ) dist = N0.transpose( N0.asarray( list(map( lambda column,a:column+a, \ N0.transpose(dist), diag1)) ) ) return N0.transpose( N0.sqrt(N0.asarray(list(map(lambda row, a: row + a, dist, diag2)))))
def pairwiseDistances(u, v): """ Pairwise distances between two arrays. :param u: first array :type u: array :param v: second array :type v: array :return: array( len(u) x len(v) ) of double :rtype: array """ diag1 = N0.diagonal( N0.dot( u, N0.transpose(u) ) ) diag2 = N0.diagonal( N0.dot( v, N0.transpose(v) ) ) dist = -N0.dot( v,N0.transpose(u) )\ -N0.transpose( N0.dot( u, N0.transpose(v) ) ) dist = N0.transpose( N0.asarray( list(map( lambda column,a:column+a, \ N0.transpose(dist), diag1)) ) ) return N0.transpose( N0.sqrt( N0.asarray( list(map( lambda row,a: row+a, dist, diag2 ) ) )))
def transform( self, *rt ): """ Apply given transformation to all frames (in place). :param rt: rotation translation matrix :type rt: array( 4 x 4 ) OR array(3 x 3), array(3 x 1) """ if len(rt) == 2: r, t = rt[0], rt[1] else: rt = rt[0] r, t = (rt[0:3,0:3], rt[0:3, 3]) r = N0.transpose( r ) r = r.astype(N0.Float32) t = t.astype(N0.Float32) for i in range( len( self.frames ) ): self.frames[ i ] = N0.array( N0.dot( self.frames[i], r ) ) + t
def rtTuple2matrix(self, r, t): """ Put rotation and translation matrix into single 4x4 matrix. @param r: rotation matric, array 3x3 of float @type r: array @param t: translation vector, array 1x3 of float @type t: vector @return: rotation/translation matrix, array 4x4 of float @rtype: array """ ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate((r, N0.transpose([t.tolist()])), 1) ## make it square result = N0.concatenate((result, N0.array([[0, 0, 0, 1]], N0.Float32)), 0) return result.astype(N0.Float32)
def cartesianToPolar(xyz): """ Convert cartesian coordinate array to polar coordinate array: C{ x,y,z -> r, S{theta}, S{phi} } :param xyz: array of cartesian coordinates (x, y, z) :type xyz: array :return: array of polar coordinates (r, theta, phi) :rtype: array """ r = N0.sqrt(N0.sum(xyz**2, 1)) p = N0.arccos(xyz[:, 2] / r) ## have to take care of that we end up in the correct quadrant t = [] for i in range(len(xyz)): ## for theta (arctan) t += [math.atan2(xyz[i, 1], xyz[i, 0])] return N0.transpose(N0.concatenate(([r], [t], [p])))
def rowDistances( x, y ): """ Calculate the distances between the items of two arrays (of same shape) after least-squares superpositioning. :param x: first set of coordinates :type x: array('f') :param y: second set of coordinates :type y: array('f') :return: array( len(x), 'f' ), distance between x[i] and y[i] for all i :rtype: array """ ## find transformation for best match r, t = findTransformation(x, y) ## transform coordinates z = N0.dot(y, N0.transpose(r)) + t ## calculate row distances return N0.sqrt(N0.sum(N0.power(x - z, 2), 1))
def rowDistances(x, y): """ Calculate the distances between the items of two arrays (of same shape) after least-squares superpositioning. :param x: first set of coordinates :type x: array('f') :param y: second set of coordinates :type y: array('f') :return: array( len(x), 'f' ), distance between x[i] and y[i] for all i :rtype: array """ ## find transformation for best match r, t = findTransformation(x, y) ## transform coordinates z = N0.dot(y, N0.transpose(r)) + t ## calculate row distances return N0.sqrt(N0.sum(N0.power(x - z, 2), 1))
def cartesianToPolar( xyz ): """ Convert cartesian coordinate array to polar coordinate array: C{ x,y,z -> r, S{theta}, S{phi} } :param xyz: array of cartesian coordinates (x, y, z) :type xyz: array :return: array of polar coordinates (r, theta, phi) :rtype: array """ r = N0.sqrt( N0.sum( xyz**2, 1 ) ) p = N0.arccos( xyz[:,2] / r ) ## have to take care of that we end up in the correct quadrant t=[] for i in range(len(xyz)): ## for theta (arctan) t += [math.atan2( xyz[i,1], xyz[i,0] )] return N0.transpose( N0.concatenate( ([r],[t],[p]) ) )
def reduceXyz( self, xyz, axis=0 ): """ Reduce the number of atoms in the given coordinate set. The set must have the same length and order as the reference model. It may have an additional (time) dimension as first axis. @param xyz: coordinates (N_atoms x 3) or (N_frames x N_atoms x 3) @type xyz: array @param axis: axis with atoms (default: 0) @type axis: int @return: coordinate array (N_less_atoms x 3) or (N_frames x N_less_atoms x 3) @rtype: array """ masses = self.m.atoms.get('mass') r_xyz = None for atom_indices in self.groups: x = N0.take( xyz, atom_indices, axis ) m = N0.take( masses, atom_indices ) center = N0.sum( x * N0.transpose([m,]), axis=axis) / N0.sum( m ) if axis == 0: center = center[N0.NewAxis, :] if axis == 1: center = center[:, N0.NewAxis, :] if r_xyz is None: r_xyz = center else: r_xyz = N0.concatenate( (r_xyz, center), axis ) return r_xyz
def thin( self, step=1 ): """ Keep only each step'th frame from trajectory with 10 ensemble members. :param step: 1..keep all frames, 2..skip first and every second, .. (default: 1) :type step: int :return: reduced EnsembleTraj :rtype: EnsembleTraj """ T.ensure( step, int, forbidden=[0] ) ## 10 x lenFrames/10, frame indices of each member mI = [ self.memberIndices( i ) for i in range(self.n_members) ] mI = N0.array( mI ) mI = N0.take( mI, range( -1, N0.shape( mI )[1], step )[1:], 1 ) mI = N0.transpose( mI ) return self.takeFrames( N0.ravel( mI ))
def calc_cluster_center(self, msm): p = N0.power(msm, self.w) ccenter = N0.transpose(N0.dot(p, self.data)) return N0.transpose(ccenter / N0.sum(p, 1))
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero(N0.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def clusterEntropy(self): centropy = N0.diagonal(N0.dot(self.msm, N0.transpose(N0.log(self.msm)))) return -1 / float(self.npoints) * centropy
def __parseBiomt( self, pdbFile, firstLine): """ Extract BIOMT (biological unit) information from REMARK 350 lines Creates a 'BIOMT' dictionary. """ line = firstLine biomtDict = {} moleculeNum = -1 while line[0] == 'REMARK' and line[1].startswith(' 350'): # 5 = len(' 350 ') biomtLine = line[1][5:].lstrip() if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule if moleculeNum != -1: # lets update the dictionary with what we've got biomtDict[moleculeNum] = (targetChains,rtList) #12 = len('BIOMOLECULE:') moleculeNum = int(biomtLine[12:].strip()) targetChains = [] rotation = [] translation = [] rtList = [] matrixLine = 0 if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'): # parse targeted chains, we assume this comes after BIOMOLECULE line # 30 = len('APPLY THE FOLLOWING TO CHAINS:') targetChains.extend(c.strip() for c in biomtLine[30:].split(',')) if biomtLine.startswith('AND CHAINS:'): # 11 = len('AND CHAINS:') targetChains.extend(c.strip() for c in biomtLine[11:].split(',')) if biomtLine.startswith('BIOMT'): # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line matrixLine += 1 # 6 = len('BIOMT#') rawCoords = biomtLine[6:].split() rotation.append([float(x) for x in rawCoords[1:4]]) translation.append(float(rawCoords[4])) if matrixLine % 3 == 0: rotation = N0.array( rotation ) translation = N0.transpose( [ translation ] ) rotation = N0.concatenate( (rotation, translation), axis=1 ) rtList.append(N0.array(rotation)) ## rtList.append((rotation,translation)) rotation = [] translation = [] try: line = pdbFile.readLine() except ValueError as what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename( fname )) ) self.log.add('\tError: '+str(what) ) continue # process last molecule group biomtDict[moleculeNum] = (targetChains,rtList) # return (indexed transformation dictionary , last line which isn't ours) return {'BIOMT': biomtDict}, line
def __alignMatrixDimension(self, cm, thisSeq, castSeq, axis=0): """ Correct one dimension of contactMatrix by inserting and deleting columns, so that it can be later compared to contact matrices based on slightly different sequences. @param cm: contact matrix, 2D matrix of residue contacts recceptor x ligand sequence @type cm: array @param thisSeq: AA sequence of this dimension of the contactMatrix @type thisSeq: string @param castSeq: AA sequence of this dimension in the other contact @type castSeq: string @param axis: which dimension to adapt (0=receptor, 1=ligand) @type axis: 1|0 @return: contact matrix with residue contacts compatible to refSeq. @rtype: 2D array """ # compare the two sequences seqdiff = SequenceMatcher(None, thisSeq, castSeq) seqDiff = seqdiff.get_opcodes() ## print seqDiff # decide which dimension to work on if not axis: cm = N0.transpose( cm ) seqCount = 0 # keep track of sequence length changes i=0 for list in seqDiff: # remove the column corresponding to the deletion in the # docked sequence if str( seqDiff[i][0] ) == 'delete': # separate matrix into before and after deletion matrixSeg1 = cm[ :, : seqDiff[i][1] + seqCount ] matrixSeg2 = cm[ :, seqDiff[i][2] + seqCount : ] # concatenate part cm = N0.concatenate( ( matrixSeg1, matrixSeg2 ), 1) seqCount = seqCount + seqDiff[i][1] - seqDiff[i][2] # inserts zeros in the column where there is a insertion in the # docked sequence if str( seqDiff[i][0] ) == 'insert': # create a matrix to be inserted insertZeros= seqDiff[i][4] - seqDiff[i][3] insertColumns = N0.array( [ [0] * insertZeros ] * N0.size(cm,0) ) # separate matrix into before and after insertion matrixSeg1 = cm[ :, : seqDiff[i][1] + seqCount ] matrixSeg2 = cm[ :, seqDiff[i][2] + seqCount : ] # concatenate parts with the zero matrix cm = N0.concatenate( (matrixSeg1,insertColumns,matrixSeg2), 1) seqCount = seqCount + seqDiff[i][4] - seqDiff[i][3] i=i+1 if not axis: return N0.transpose( cm ) return cm
def __alignMatrixDimension(self, cm, thisSeq, castSeq, axis=0): """ Correct one dimension of contactMatrix by inserting and deleting columns, so that it can be later compared to contact matrices based on slightly different sequences. @param cm: contact matrix, 2D matrix of residue contacts recceptor x ligand sequence @type cm: array @param thisSeq: AA sequence of this dimension of the contactMatrix @type thisSeq: string @param castSeq: AA sequence of this dimension in the other contact @type castSeq: string @param axis: which dimension to adapt (0=receptor, 1=ligand) @type axis: 1|0 @return: contact matrix with residue contacts compatible to refSeq. @rtype: 2D array """ # compare the two sequences seqdiff = SequenceMatcher(None, thisSeq, castSeq) seqDiff = seqdiff.get_opcodes() ## print seqDiff # decide which dimension to work on if not axis: cm = N0.transpose(cm) seqCount = 0 # keep track of sequence length changes i = 0 for list in seqDiff: # remove the column corresponding to the deletion in the # docked sequence if str(seqDiff[i][0]) == 'delete': # separate matrix into before and after deletion matrixSeg1 = cm[:, :seqDiff[i][1] + seqCount] matrixSeg2 = cm[:, seqDiff[i][2] + seqCount:] # concatenate part cm = N0.concatenate((matrixSeg1, matrixSeg2), 1) seqCount = seqCount + seqDiff[i][1] - seqDiff[i][2] # inserts zeros in the column where there is a insertion in the # docked sequence if str(seqDiff[i][0]) == 'insert': # create a matrix to be inserted insertZeros = seqDiff[i][4] - seqDiff[i][3] insertColumns = N0.array([[0] * insertZeros] * N0.size(cm, 0)) # separate matrix into before and after insertion matrixSeg1 = cm[:, :seqDiff[i][1] + seqCount] matrixSeg2 = cm[:, seqDiff[i][2] + seqCount:] # concatenate parts with the zero matrix cm = N0.concatenate((matrixSeg1, insertColumns, matrixSeg2), 1) seqCount = seqCount + seqDiff[i][4] - seqDiff[i][3] i = i + 1 if not axis: return N0.transpose(cm) return cm
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32 ) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero( N0.logical_not( mask ) ) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def __parseBiomt(self, pdbFile, firstLine): """ Extract BIOMT (biological unit) information from REMARK 350 lines Creates a 'BIOMT' dictionary. """ line = firstLine biomtDict = {} moleculeNum = -1 while line[0] == 'REMARK' and line[1].startswith(' 350'): # 5 = len(' 350 ') biomtLine = line[1][5:].lstrip() if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule if moleculeNum != -1: # lets update the dictionary with what we've got biomtDict[moleculeNum] = (targetChains, rtList) #12 = len('BIOMOLECULE:') moleculeNum = int(biomtLine[12:].strip()) targetChains = [] rotation = [] translation = [] rtList = [] matrixLine = 0 if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'): # parse targeted chains, we assume this comes after BIOMOLECULE line # 30 = len('APPLY THE FOLLOWING TO CHAINS:') targetChains.extend(c.strip() for c in biomtLine[30:].split(',')) if biomtLine.startswith('AND CHAINS:'): # 11 = len('AND CHAINS:') targetChains.extend(c.strip() for c in biomtLine[11:].split(',')) if biomtLine.startswith('BIOMT'): # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line matrixLine += 1 # 6 = len('BIOMT#') rawCoords = biomtLine[6:].split() rotation.append([float(x) for x in rawCoords[1:4]]) translation.append(float(rawCoords[4])) if matrixLine % 3 == 0: rotation = N0.array(rotation) translation = N0.transpose([translation]) rotation = N0.concatenate((rotation, translation), axis=1) rtList.append(N0.array(rotation)) ## rtList.append((rotation,translation)) rotation = [] translation = [] try: line = pdbFile.readLine() except ValueError as what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename(fname))) self.log.add('\tError: ' + str(what)) continue # process last molecule group biomtDict[moleculeNum] = (targetChains, rtList) # return (indexed transformation dictionary , last line which isn't ours) return {'BIOMT': biomtDict}, line
def clusterEntropy(self): centropy = N0.diagonal(N0.dot(self.msm, N0.transpose(N0.log(self.msm)))) return -1/float(self.npoints)*centropy
def fit( self, mask=None, ref=None, n_it=1, prof='rms', verbose=1, fit=1, **profInfos ): """ Superimpose all coordinate frames on reference coordinates. Put rms values in a profile. If n_it > 1, the fraction of atoms considered for the fit is put into a profile called |prof|_considered (i.e. by default 'rms_considered'). :param mask: atom mask, atoms to consider default: [all] :type mask: [1|0] :param ref: use as reference, default: None, average Structure :type ref: PDBModel :param n_it: number of fit iterations, kicking out outliers on the way 1 -> classic single fit, 0 -> until convergence (default: 1) :type n_it: int :param prof: save rms per frame in profile of this name, ['rms'] :type prof: str :param verbose: print progress info to STDERR (default: 1) :type verbose: 1|0 :param fit: transform frames after match, otherwise just calc rms (default: 1) :type fit: 1|0 :param profInfos: additional key=value pairs for rms profile info [] :type profInfos: key=value """ if ref is None: refxyz = N0.average( self.frames, 0 ) else: refxyz = ref.getXyz() if mask is None: mask = N0.ones( len( refxyz ), N0.Int32 ) refxyz = N0.compress( mask, refxyz, 0 ) if verbose: T.errWrite( "rmsd fitting..." ) rms = [] ## rms value of each frame non_outliers = [] ## fraction of atoms considered for rms and fit iterations = [] ## number of iterations performed on each frame for i in range(0, len( self.frames) ): xyz = self.frames[i] if n_it != 1: (r, t), rmsdList = rmsFit.match( refxyz, N0.compress( mask, xyz, 0), n_it) iterations.append( len( rmsdList ) ) non_outliers.append( rmsdList[-1][0] ) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t rms += [ rmsdList[-1][1] ] else: r, t = rmsFit.findTransformation( refxyz, N0.compress( mask, xyz, 0)) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\ - refxyz, 2), 1)) rms += [ N0.sqrt( N0.average(d**2) ) ] if fit: self.frames[i] = xyz_transformed.astype(N0.Float32) if verbose and i%100 == 0: T.errWrite( '#' ) self.setProfile( prof, rms, n_iterations=n_it, **profInfos ) if non_outliers: self.setProfile( prof+'_considered', non_outliers, n_iterations=n_it, comment='fraction of atoms considered for iterative fit' ) if verbose: T.errWrite( 'done\n' )