def __exposedResidues(self, ASA_values, sidechainCut=0.0, backboneCut=0.0, totalCut=0.0): """ Decide what is a surface exposed residue and what is not. sidechainCut, backboneCut, totalCut - float, cutoff value for what will be considered as a exposed residue. All three values have to pass the test. @param ASA_values: array with ASA values for side chains, backbone and total calculated in L{__read_residueASA}. @type ASA_values: array @param sidechainCut: cutoff ASA value for considering the side chain to consider thew residue being exposed (default: 0.0) @type sidechainCut: float @param backboneCut: cutoffvalue for back bone ASA @type backboneCut: float @param totalCut: cutoff for total ASA @type totalCut: float @return: residue mask, where 0 = burried @rtype: [1|0] """ col_0 = N0.greater(N0.transpose(ASA_values)[0], totalCut) col_1 = N0.greater(N0.transpose(ASA_values)[1], backboneCut) col_2 = N0.greater(N0.transpose(ASA_values)[2], sidechainCut) col_012 = N0.concatenate(([col_0], [col_1], [col_2])) exposedList = N0.greater(N0.sum(col_012), 0) return exposedList
def __exposedResidues( self, ASA_values, sidechainCut=0.0, backboneCut=0.0, totalCut=0.0 ): """ Decide what is a surface exposed residue and what is not. sidechainCut, backboneCut, totalCut - float, cutoff value for what will be considered as a exposed residue. All three values have to pass the test. @param ASA_values: array with ASA values for side chains, backbone and total calculated in L{__read_residueASA}. @type ASA_values: array @param sidechainCut: cutoff ASA value for considering the side chain to consider thew residue being exposed (default: 0.0) @type sidechainCut: float @param backboneCut: cutoffvalue for back bone ASA @type backboneCut: float @param totalCut: cutoff for total ASA @type totalCut: float @return: residue mask, where 0 = burried @rtype: [1|0] """ col_0 = N0.greater( N0.transpose(ASA_values)[0], totalCut ) col_1 = N0.greater( N0.transpose(ASA_values)[1], backboneCut ) col_2 = N0.greater( N0.transpose(ASA_values)[2], sidechainCut ) col_012 = N0.concatenate( ([col_0],[col_1],[col_2]) ) exposedList = N0.greater(N0.sum(col_012), 0) return exposedList
def squared_distance_matrix(x, y): d1 = N0.diagonal(N0.dot(x, N0.transpose(x))) d2 = N0.diagonal(N0.dot(y, N0.transpose(y))) a1 = N0.add.outer(d1,d2) a2 = N0.dot(x, N0.transpose(y)) return a1 - 2 * a2
def test_plot(self): """gnuplot.plot test""" # List of (x, y) pairs # plot([(0.,1),(1.,5),(2.,3),(3.,4)]) # plot( zip( range(10), range(10) ) ) # Two plots; each given by a 2d array import Biskit.oldnumeric as N0 x = N0.arange(10) y1 = x**2 y2 = (10 - x)**2 plot(N0.transpose(N0.array([x, y1])), N0.transpose(N0.array([x, y2])))
def __pairwiseDistances(self, u, v): """ pairwise distance between 2 3-D numpy arrays of atom coordinates. @param u: coordinates @type u: array @param v: coordinates @type v: array @return: Numpy array len(u) x len(v) @rtype:array @author: Wolfgang Rieping. """ ## check input if not type( u ) == arraytype or\ not type( v ) == arraytype: raise ComplexError('unsupported argument type ' + \ str( type(u) ) + ' or ' + str( type(v) ) ) diag1 = N0.diagonal(N0.dot(u, N0.transpose(u))) diag2 = N0.diagonal(N0.dot(v, N0.transpose(v))) dist = -N0.dot(v, N0.transpose(u)) - N0.transpose( N0.dot(u, N0.transpose(v))) dist= N0.transpose(N0.asarray(map(lambda column,a:column+a, \ N0.transpose(dist), diag1))) return N0.transpose( N0.sqrt(N0.asarray(map(lambda row, a: row + a, dist, diag2))))
def contactResDistribution(self, cm=None): """ Count occurrence of residues in protein-protein interface. @param cm: pre-calculated contact matrix (default: None) @type cm: matrix @return: dict {'A':3, 'C':1, .. } (20 standard amino acids) @rtype: dict """ if cm is None: cm = self.resContacts() ## get mask for residues involved in contacts maskLig = N0.sum(cm) maskRec = N0.sum(N0.transpose(cm)) ## get sequence of contact residues only seqLig = N0.compress(maskLig, self.lig().sequence()) seqRec = N0.compress(maskRec, self.rec().sequence()) seq = ''.join(seqLig) + ''.join(seqRec) ## convert back to string ## count occurrence of letters result = {} for aa in molUtils.allAA(): result[aa] = seq.count(aa) return result
def findTransformation(x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. @param x: first set of coordinates @type x: array('f') @param y: second set of coordinates @type y: array('f') @return: rotation matrix (3x3) and translation vector (1x3) @rtype: array, array """ ## center configurations x_av = N0.average(x) y_av = N0.average(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N0.dot(N0.transpose(x), y)) ## build rotation matrix and translation vector r = N0.dot(v, u) t = x_av - N0.dot(r, y_av) return r, t
def histogram(data, nbins, range=None): """ Create a histogram. Comes from Konrad Hinsen: Scientific Python @param data: data list or array @type data: [any] @param nbins: number of bins @type nbins: int @param range: data range to create histogram from (min val, max val) @type range: (float, float) OR None @return: array (2 x len(data) ) with start of bin and witdh of bin. @rtype: array """ data = N0.array(data, N0.Float) if range is None: min = N0.minimum.reduce(data) max = N0.maximum.reduce(data) else: min, max = range data = N0.repeat( data, N0.logical_and(N0.less_equal(data, max), N0.greater_equal(data, min))) bin_width = (max - min) / nbins data = N0.floor((data - min) / bin_width).astype(N0.Int) histo = N0.add.reduce(N0.equal(N0.arange(nbins)[:, N0.NewAxis], data), -1) histo[-1] = histo[-1] + N0.add.reduce(N0.equal(nbins, data)) bins = min + bin_width * (N0.arange(nbins) + 0.5) return N0.transpose(N0.array([bins, histo]))
def takeMembers(self, mIndices): """ Take all frames belonging to the members in mIndices:: takeMembers( mIndices ) -> EnsembleTraj with frames of given members @param mIndices: list of member indices @type mIndices: [int] OR array('i') @return: EnsembleTraj with specified members @rtype: EnsembleTraj @todo: return self.__class__ instead of EnsembleTraj """ try: ## assumes that each member traj has same number of frames fi = N0.array([self.memberIndices(i) for i in mIndices]) fi = N0.ravel(N0.transpose(fi)) n_members = len(mIndices) ## has wrong n_members and member order t = self.takeFrames(fi) result = EnsembleTraj(n_members=n_members) result.__dict__.update(t.__dict__) result.n_members = n_members result.resetFrameNames() return result except TypeError: raise EnsembleTrajError, 'takeMembers TypeError '+\ str(mIndices)+\ "\nlenFrames: %i; n_members: %i" %(len(self), self.n_members)
def __findTransformation(self, x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. Back transformation: for atom i new coordinates will be:: y_new[i] = N0.dot(r, y[i]) + t for all atoms in one step:: y_new = N0.dot(y, N0.transpose(r)) + t @param x: coordinates @type x: array @param y: coordinates @type y: array @return: rotation matrix, translation vector @rtype: array, array @author: Michael Habeck """ from numpy.linalg import svd ## center configurations x_av = N0.sum(x) / len(x) y_av = N0.sum(y) / len(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N0.dot(N0.transpose(x), y)) ## build rotation matrix and translation vector r = N0.dot(v, u) t = x_av - N0.dot(r, y_av) return r, t
def __parseBiomt(self, pdbFile, firstLine): """ Extract BIOMT (biological unit) information from REMARK 350 lines Creates a 'BIOMT' dictionary. """ line = firstLine biomtDict = {} moleculeNum = -1 while line[0] == 'REMARK' and line[1].startswith(' 350'): # 5 = len(' 350 ') biomtLine = line[1][5:].lstrip() if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule if moleculeNum != -1: # lets update the dictionary with what we've got biomtDict[moleculeNum] = (targetChains, rtList) #12 = len('BIOMOLECULE:') moleculeNum = int(biomtLine[12:].strip()) targetChains = [] rotation = [] translation = [] rtList = [] matrixLine = 0 if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'): # parse targeted chains, we assume this comes after BIOMOLECULE line # 30 = len('APPLY THE FOLLOWING TO CHAINS:') targetChains.extend(c.strip() for c in biomtLine[30:].split(',')) if biomtLine.startswith('AND CHAINS:'): # 11 = len('AND CHAINS:') targetChains.extend(c.strip() for c in biomtLine[11:].split(',')) if biomtLine.startswith('BIOMT'): # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line matrixLine += 1 # 6 = len('BIOMT#') rawCoords = biomtLine[6:].split() rotation.append([float(x) for x in rawCoords[1:4]]) translation.append(float(rawCoords[4])) if matrixLine % 3 == 0: rotation = N0.array(rotation) translation = N0.transpose([translation]) rotation = N0.concatenate((rotation, translation), axis=1) rtList.append(N0.array(rotation)) ## rtList.append((rotation,translation)) rotation = [] translation = [] try: line = pdbFile.readLine() except ValueError, what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename(fname))) self.log.add('\tError: ' + str(what)) continue
def error(self, msm, d2): """ @param msm: membership matrix @type msm: array('f') @param d2: distance from data to the centers @type d2: array('f') @return: weighted error @rtype: float """ p = N0.power(msm, self.w) product = N0.dot(p, N0.transpose(d2)) return N0.trace(product)
def create_membership_matrix(self): """ Create a random membership matrix. @return: random array of shape length of data to cluster times number of clusters @rtype: array('f') """ ## default signature has changed oldnumeric->numpy if (self.seedx==0 or self.seedy==0): R.seed() else: R.seed((self.seedx, self.seedy)) r = R.random_sample((self.npoints, self.n_cluster)) return N0.transpose(r / N0.sum(r))
def __random_matrix( self ): """ Random rotation matrix. @return: 4 x 4 array of float, random rotation and translation matrix @rtype: array """ r = ma.randomRotation() ## r = N0.array([[1,0,0],[0,1,0],[0,0,1]],'f') t = self.__random_translation() ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate( (r, N0.transpose( [ t.tolist() ] )), 1) ## make it square result = N0.concatenate( (result, N0.array([[0,0,0,1]], N0.Float32)), 0 ) return result
def prepare( self ): """ Write a xyzrn coordinate file to disc. Overrides Executor method. """ ## get radiia and name array p2x = Pdb2xyzrn(self.model, verbose=self.verbose, debug=self.debug ) r, n = p2x.run() xyz = self.model.xyz xyzr = N0.concatenate( ( xyz, N0.transpose([r]) ) ,axis=1 ) f = open( self.f_xyzrn, 'w' ) i = 0 for line in xyzr: f.write( str(line)[2:-1] + ' 1 ' + n[i] + '\n') i += 1 f.close()
def transform( self, *rt ): """ Apply given transformation to all frames (in place). @param rt: rotation translation matrix @type rt: array( 4 x 4 ) OR array(3 x 3), array(3 x 1) """ if len(rt) == 2: r, t = rt[0], rt[1] else: rt = rt[0] r, t = (rt[0:3,0:3], rt[0:3, 3]) r = N0.transpose( r ) r = r.astype(N0.Float32) t = t.astype(N0.Float32) for i in range( len( self.frames ) ): self.frames[ i ] = N0.array( N0.dot( self.frames[i], r ) ) + t
def rtTuple2matrix(self, r, t): """ Put rotation and translation matrix into single 4x4 matrix. @param r: rotation matric, array 3x3 of float @type r: array @param t: translation vector, array 1x3 of float @type t: vector @return: rotation/translation matrix, array 4x4 of float @rtype: array """ ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate((r, N0.transpose([t.tolist()])), 1) ## make it square result = N0.concatenate((result, N0.array([[0, 0, 0, 1]], N0.Float32)), 0) return result.astype(N0.Float32)
def rowDistances(x, y): """ Calculate the distances between the items of two arrays (of same shape) after least-squares superpositioning. @param x: first set of coordinates @type x: array('f') @param y: second set of coordinates @type y: array('f') @return: array( len(x), 'f' ), distance between x[i] and y[i] for all i @rtype: array """ ## find transformation for best match r, t = findTransformation(x, y) ## transform coordinates z = N0.dot(y, N0.transpose(r)) + t ## calculate row distances return N0.sqrt(N0.sum(N0.power(x - z, 2), 1))
def reduceXyz(self, xyz, axis=0): """ Reduce the number of atoms in the given coordinate set. The set must have the same length and order as the reference model. It may have an additional (time) dimension as first axis. @param xyz: coordinates (N_atoms x 3) or (N_frames x N_atoms x 3) @type xyz: array @param axis: axis with atoms (default: 0) @type axis: int @return: coordinate array (N_less_atoms x 3) or (N_frames x N_less_atoms x 3) @rtype: array """ masses = self.m.atoms.get('mass') r_xyz = None for atom_indices in self.groups: x = N0.take(xyz, atom_indices, axis) m = N0.take(masses, atom_indices) center = N0.sum(x * N0.transpose([ m, ]), axis=axis) / N0.sum(m) if axis == 0: center = center[N0.NewAxis, :] if axis == 1: center = center[:, N0.NewAxis, :] if r_xyz is None: r_xyz = center else: r_xyz = N0.concatenate((r_xyz, center), axis) return r_xyz
def thin(self, step=1): """ Keep only each step'th frame from trajectory with 10 ensemble members. @param step: 1..keep all frames, 2..skip first and every second, .. (default: 1) @type step: int @return: reduced EnsembleTraj @rtype: EnsembleTraj """ T.ensure(step, int, forbidden=[0]) ## 10 x lenFrames/10, frame indices of each member mI = [self.memberIndices(i) for i in range(self.n_members)] mI = N0.array(mI) mI = N0.take(mI, range(-1, N0.shape(mI)[1], step)[1:], 1) mI = N0.transpose(mI) return self.takeFrames(N0.ravel(mI))
def clusterEntropy(self): centropy = N0.diagonal(N0.dot(self.msm, N0.transpose(N0.log(self.msm)))) return -1/float(self.npoints)*centropy
def calc_cluster_center(self, msm): p = N0.power(msm, self.w) ccenter = N0.transpose(N0.dot(p, self.data)) return N0.transpose(ccenter / N0.sum(p, 1))
def fit( self, mask=None, ref=None, n_it=1, prof='rms', verbose=1, fit=1, **profInfos ): """ Superimpose all coordinate frames on reference coordinates. Put rms values in a profile. If n_it > 1, the fraction of atoms considered for the fit is put into a profile called |prof|_considered (i.e. by default 'rms_considered'). @param mask: atom mask, atoms to consider default: [all] @type mask: [1|0] @param ref: use as reference, default: None, average Structure @type ref: PDBModel @param n_it: number of fit iterations, kicking out outliers on the way 1 -> classic single fit, 0 -> until convergence (default: 1) @type n_it: int @param prof: save rms per frame in profile of this name, ['rms'] @type prof: str @param verbose: print progress info to STDERR (default: 1) @type verbose: 1|0 @param fit: transform frames after match, otherwise just calc rms (default: 1) @type fit: 1|0 @param profInfos: additional key=value pairs for rms profile info [] @type profInfos: key=value """ if ref is None: refxyz = N0.average( self.frames, 0 ) else: refxyz = ref.getXyz() if mask is None: mask = N0.ones( len( refxyz ), N0.Int32 ) refxyz = N0.compress( mask, refxyz, 0 ) if verbose: T.errWrite( "rmsd fitting..." ) rms = [] ## rms value of each frame non_outliers = [] ## fraction of atoms considered for rms and fit iterations = [] ## number of iterations performed on each frame for i in range(0, len( self.frames) ): xyz = self.frames[i] if n_it != 1: (r, t), rmsdList = rmsFit.match( refxyz, N0.compress( mask, xyz, 0), n_it) iterations.append( len( rmsdList ) ) non_outliers.append( rmsdList[-1][0] ) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t rms += [ rmsdList[-1][1] ] else: r, t = rmsFit.findTransformation( refxyz, N0.compress( mask, xyz, 0)) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\ - refxyz, 2), 1)) rms += [ N0.sqrt( N0.average(d**2) ) ] if fit: self.frames[i] = xyz_transformed.astype(N0.Float32) if verbose and i%100 == 0: T.errWrite( '#' ) self.setProfile( prof, rms, n_iterations=n_it, **profInfos ) if non_outliers: self.setProfile( prof+'_considered', non_outliers, n_iterations=n_it, comment='fraction of atoms considered for iterative fit' ) if verbose: T.errWrite( 'done\n' )
def parse_result(self): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists(self.f_out): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength(self.f_out) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open(self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+' * 20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob = [] for i in range(1, profileDic['profLength'] + 1): pattern = "[ ]+%i" % i + "[ ]+[-0-9]+" * 20 e = [float(j) for j in string.split(re.findall(pattern, out)[0])] prob += [e] profileDic['seqNr'] = N0.transpose(N0.take(prob, (0, ), 1)) profileDic['emmScore'] = N0.array(prob)[:, 1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob(nullEmm, profileDic['emmScore']) ent = [ N0.resize(self.entropy(e, nullProb), (1, 20))[0] for e in emmProb ] profileDic['ent'] = N0.array(ent) ###### TEST ##### proba = N0.array(prob)[:, 1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N0.sum( abs( probabilities ) ) p = proba p2 = [] for i in range(len(p)): p2 += [N0.resize(N0.sum(N0.absolute(p[i])), N0.shape(p[i]))] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range(len(p)): p_scale = (p[i] - N0.average(p[i])) / math.SD(p[i]) p4 += [ N0.resize(p_scale[N0.argmax(N0.array(p_scale))], N0.shape(p[i])) ] profileDic['maxAllScale'] = p4 return profileDic
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. @param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence @type n_iterations: 1|0 @param z: number of standard deviations for outlier definition (default: 2) @type z: float @param eps_rmsd: tolerance in rmsd (default: 0.5) @type eps_rmsd: float @param eps_stdv: tolerance in standard deviations (default: 0.05) @type eps_stdv: float @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] @rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero(N0.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def parse_result( self ): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists( self.f_out ): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength( self.f_out ) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open( self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob=[] for i in range(1, profileDic['profLength']+1): pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20 e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ] prob += [ e ] profileDic['seqNr'] = N0.transpose( N0.take( prob, (0,),1 ) ) profileDic['emmScore'] = N0.array(prob)[:,1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore']) ent = [ N0.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ] profileDic['ent'] = N0.array(ent) ###### TEST ##### proba = N0.array(prob)[:,1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N0.sum( abs( probabilities ) ) p = proba p2 = [] for i in range( len(p) ) : p2 += [ N0.resize( N0.sum( N0.absolute( p[i] )), N0.shape( p[i] ) ) ] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range( len(p) ) : p_scale = (p[i] - N0.average(p[i]) )/ math.SD(p[i]) p4 += [ N0.resize( p_scale[N0.argmax( N0.array(p_scale) )] , N0.shape( p[i] ) ) ] profileDic['maxAllScale'] = p4 return profileDic
def __alignMatrixDimension(self, cm, thisSeq, castSeq, axis=0): """ Correct one dimension of contactMatrix by inserting and deleting columns, so that it can be later compared to contact matrices based on slightly different sequences. @param cm: contact matrix, 2D matrix of residue contacts recceptor x ligand sequence @type cm: array @param thisSeq: AA sequence of this dimension of the contactMatrix @type thisSeq: string @param castSeq: AA sequence of this dimension in the other contact @type castSeq: string @param axis: which dimension to adapt (0=receptor, 1=ligand) @type axis: 1|0 @return: contact matrix with residue contacts compatible to refSeq. @rtype: 2D array """ # compare the two sequences seqdiff = SequenceMatcher(None, thisSeq, castSeq) seqDiff = seqdiff.get_opcodes() ## print seqDiff # decide which dimension to work on if not axis: cm = N0.transpose(cm) seqCount = 0 # keep track of sequence length changes i = 0 for list in seqDiff: # remove the column corresponding to the deletion in the # docked sequence if str(seqDiff[i][0]) == 'delete': # separate matrix into before and after deletion matrixSeg1 = cm[:, :seqDiff[i][1] + seqCount] matrixSeg2 = cm[:, seqDiff[i][2] + seqCount:] # concatenate part cm = N0.concatenate((matrixSeg1, matrixSeg2), 1) seqCount = seqCount + seqDiff[i][1] - seqDiff[i][2] # inserts zeros in the column where there is a insertion in the # docked sequence if str(seqDiff[i][0]) == 'insert': # create a matrix to be inserted insertZeros = seqDiff[i][4] - seqDiff[i][3] insertColumns = N0.array([[0] * insertZeros] * N0.size(cm, 0)) # separate matrix into before and after insertion matrixSeg1 = cm[:, :seqDiff[i][1] + seqCount] matrixSeg2 = cm[:, seqDiff[i][2] + seqCount:] # concatenate parts with the zero matrix cm = N0.concatenate((matrixSeg1, insertColumns, matrixSeg2), 1) seqCount = seqCount + seqDiff[i][4] - seqDiff[i][3] i = i + 1 if not axis: return N0.transpose(cm) return cm