def __atom2residueMatrix(self, m): """ Reduce binary matrix of n x k atoms to binary matrix of i x j residues. @param m: atom contact matrix, array n x k with 1(contact) or 0(no contact) @type m: array @return: residue contact matrix, 2-D numpy array(residues_receptor x residues_ligand) @rtype: array """ recInd = N0.concatenate( (self.rec().resIndex(), [self.rec().lenAtoms()])) ligInd = N0.concatenate( (self.lig_model.resIndex(), [self.lig_model.lenAtoms()])) residueMatrix = N0.zeros((len(recInd) - 1, len(ligInd) - 1), N0.Int) for r in range(len(recInd) - 1): for l in range(len(ligInd) - 1): res2res = m[int(recInd[r]):int(recInd[r + 1]), int(ligInd[l]):int(ligInd[l + 1])] if N0.any(res2res): residueMatrix[r, l] = 1 return residueMatrix
def concat( self, *traj ): """ Concatenate this with other trajectories. The ref model of the new Trajectory is a 'semi-deep' copy of this trajectorie's model. (see L{PDBModel.take()} ):: concat( traj [, traj2, traj3, ..] ) -> Trajectory @param traj: one or more Trajectory with identical atoms as this one @type traj: Trajectories @return: concatenated trajecties @rtype: Trajectory """ if len( traj ) == 0: return self r = self.__class__() r.frames = N0.concatenate( (self.frames, traj[0].frames), 0 ) r.setRef( self.ref.clone()) if self.frameNames and traj[0].frameNames: r.frameNames = self.frameNames + traj[0].frameNames try: if self.pc is not None and traj[0].pc is not None: r.pc['p'] = N0.concatenate( (self.pc['p'], traj[0].pc['p']),0) r.pc['u'] = N0.concatenate( (self.pc['u'], traj[0].pc['u']),0) except TypeError, why: EHandler.error('cannot concat PC '+str(why) )
def concatAtoms( self, *traj ): """ Concatenate 2 trajectories of same (frame) length 'horizontally', i.e. for each frame the coordinates of one are appended to the coordinates of the other. The ref model of the new trajectory is a 'semi-deep' copy of this trajectory's model (see L{PDBModel.take()} ):: concatAtoms( traj1 [traj2, traj3..]) -> Trajectory @param traj: one or more Trajectory of the same number of frames @type traj: Trajectories @return: trajectory with concatenated atoms @rtype: Trajectory """ if len( traj ) == 0: return self r = self.__class__() r.frames = N0.concatenate( (self.frames, traj[0].frames), 1 ) r.setRef( self.ref.concat( traj[0].getRef() ) ) r.profiles = self.profiles.clone() r.frameNames = self.frameNames return r.concatAtoms( *traj[1:] )
def __exposedResidues( self, ASA_values, sidechainCut=0.0, backboneCut=0.0, totalCut=0.0 ): """ Decide what is a surface exposed residue and what is not. sidechainCut, backboneCut, totalCut - float, cutoff value for what will be considered as a exposed residue. All three values have to pass the test. @param ASA_values: array with ASA values for side chains, backbone and total calculated in L{__read_residueASA}. @type ASA_values: array @param sidechainCut: cutoff ASA value for considering the side chain to consider thew residue being exposed (default: 0.0) @type sidechainCut: float @param backboneCut: cutoffvalue for back bone ASA @type backboneCut: float @param totalCut: cutoff for total ASA @type totalCut: float @return: residue mask, where 0 = burried @rtype: [1|0] """ col_0 = N0.greater( N0.transpose(ASA_values)[0], totalCut ) col_1 = N0.greater( N0.transpose(ASA_values)[1], backboneCut ) col_2 = N0.greater( N0.transpose(ASA_values)[2], sidechainCut ) col_012 = N0.concatenate( ([col_0],[col_1],[col_2]) ) exposedList = N0.greater(N0.sum(col_012), 0) return exposedList
def castHmmDic( self, hmmDic, repete, hmmGap, key ): """ Blow up hmmDic to the number of repetes of the profile used. Correct scores for possible deletions in the search sequence. @param hmmDic: dictionary from L{getHmmProfile} @type hmmDic: dict @param repete: repete information from L{align} @type repete: int @param hmmGap: information about gaps from L{align} @type hmmGap: [int] @param key: name of scoring method to adjust for gaps and repetes @type key: str @return: dictionary with information about the profile @rtype: dict """ s = hmmDic[key] for i in range( repete ): mask = N0.ones( len(s) ) N0.put( mask, hmmGap[i], 0 ) if i == 0: score = N0.compress( mask, s, 0 ) if i > 0: score = N0.concatenate( ( N0.compress( mask, s, 0 ), score ) ) hmmDic[key] = score return hmmDic
def test_ComplexTraj(self): """Dock.ComplexTraj test""" import Biskit.tools as T ## there is no complex trajectory in the test folder so will have ## to create a fake trajectory with a complex f = [T.testRoot() + '/com/1BGS.pdb'] * 5 t = Trajectory(f, verbose=self.local) t = ComplexTraj(t, recChains=[0]) #if self.local: #print 'plotting contact density...' #t.plotContactDensity( step=2 ) ## create a fake second chain in the ligand for i in range(1093 + 98, 1968): t.ref.atoms['chain_id'][i] = 'B' t.ref.chainIndex(force=1, cache=1) t.cl = [1, 2] r = N0.concatenate((range(1093, 1191), range(0, 1093), range(1191, 1968))) tt = t.takeAtoms(r) contactMat = tt.atomContacts(1) if self.local: print 'Receptor chains: %s Ligand chains: %s' % (t.cr, t.cl) self.assertEqual(N0.sum(N0.ravel(contactMat)), 308)
def castHmmDic(self, hmmDic, repete, hmmGap, key): """ Blow up hmmDic to the number of repetes of the profile used. Correct scores for possible deletions in the search sequence. @param hmmDic: dictionary from L{getHmmProfile} @type hmmDic: dict @param repete: repete information from L{align} @type repete: int @param hmmGap: information about gaps from L{align} @type hmmGap: [int] @param key: name of scoring method to adjust for gaps and repetes @type key: str @return: dictionary with information about the profile @rtype: dict """ s = hmmDic[key] for i in range(repete): mask = N0.ones(len(s)) N0.put(mask, hmmGap[i], 0) if i == 0: score = N0.compress(mask, s, 0) if i > 0: score = N0.concatenate((N0.compress(mask, s, 0), score)) hmmDic[key] = score return hmmDic
def __parseBiomt(self, pdbFile, firstLine): """ Extract BIOMT (biological unit) information from REMARK 350 lines Creates a 'BIOMT' dictionary. """ line = firstLine biomtDict = {} moleculeNum = -1 while line[0] == 'REMARK' and line[1].startswith(' 350'): # 5 = len(' 350 ') biomtLine = line[1][5:].lstrip() if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule if moleculeNum != -1: # lets update the dictionary with what we've got biomtDict[moleculeNum] = (targetChains, rtList) #12 = len('BIOMOLECULE:') moleculeNum = int(biomtLine[12:].strip()) targetChains = [] rotation = [] translation = [] rtList = [] matrixLine = 0 if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'): # parse targeted chains, we assume this comes after BIOMOLECULE line # 30 = len('APPLY THE FOLLOWING TO CHAINS:') targetChains.extend(c.strip() for c in biomtLine[30:].split(',')) if biomtLine.startswith('AND CHAINS:'): # 11 = len('AND CHAINS:') targetChains.extend(c.strip() for c in biomtLine[11:].split(',')) if biomtLine.startswith('BIOMT'): # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line matrixLine += 1 # 6 = len('BIOMT#') rawCoords = biomtLine[6:].split() rotation.append([float(x) for x in rawCoords[1:4]]) translation.append(float(rawCoords[4])) if matrixLine % 3 == 0: rotation = N0.array(rotation) translation = N0.transpose([translation]) rotation = N0.concatenate((rotation, translation), axis=1) rtList.append(N0.array(rotation)) ## rtList.append((rotation,translation)) rotation = [] translation = [] try: line = pdbFile.readLine() except ValueError, what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename(fname))) self.log.add('\tError: ' + str(what)) continue
def __exposedResidues(self, ASA_values, sidechainCut=0.0, backboneCut=0.0, totalCut=0.0): """ Decide what is a surface exposed residue and what is not. sidechainCut, backboneCut, totalCut - float, cutoff value for what will be considered as a exposed residue. All three values have to pass the test. @param ASA_values: array with ASA values for side chains, backbone and total calculated in L{__read_residueASA}. @type ASA_values: array @param sidechainCut: cutoff ASA value for considering the side chain to consider thew residue being exposed (default: 0.0) @type sidechainCut: float @param backboneCut: cutoffvalue for back bone ASA @type backboneCut: float @param totalCut: cutoff for total ASA @type totalCut: float @return: residue mask, where 0 = burried @rtype: [1|0] """ col_0 = N0.greater(N0.transpose(ASA_values)[0], totalCut) col_1 = N0.greater(N0.transpose(ASA_values)[1], backboneCut) col_2 = N0.greater(N0.transpose(ASA_values)[2], sidechainCut) col_012 = N0.concatenate(([col_0], [col_1], [col_2])) exposedList = N0.greater(N0.sum(col_012), 0) return exposedList
def __random_matrix( self ): """ Random rotation matrix. @return: 4 x 4 array of float, random rotation and translation matrix @rtype: array """ r = ma.randomRotation() ## r = N0.array([[1,0,0],[0,1,0],[0,0,1]],'f') t = self.__random_translation() ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate( (r, N0.transpose( [ t.tolist() ] )), 1) ## make it square result = N0.concatenate( (result, N0.array([[0,0,0,1]], N0.Float32)), 0 ) return result
def rtTuple2matrix(self, r, t): """ Put rotation and translation matrix into single 4x4 matrix. @param r: rotation matric, array 3x3 of float @type r: array @param t: translation vector, array 1x3 of float @type t: vector @return: rotation/translation matrix, array 4x4 of float @rtype: array """ ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate((r, N0.transpose([t.tolist()])), 1) ## make it square result = N0.concatenate((result, N0.array([[0, 0, 0, 1]], N0.Float32)), 0) return result.astype(N0.Float32)
def rmsInterface(self, ref, cutoff=4.5, fit=1): """ Rmsd between this and reference interface. The interface is defined as any residue that has an atom which is within the distance given by |cutoff| from its partner. @param ref: reference complex @type ref: Complex @param cutoff: atom distance cutoff for interface residue definition (default: 4.5) @type cutoff: float @param fit: least-squares fit before calculating the rms (default: 1) @type fit: 1|0 @return: interface rmad @rtype: float """ ## casting this = self if not ref.rec_model.equals( self.rec_model )[1] \ or not ref.lig_model.equals( self.lig_model )[1]: m_rec, m_rec_ref, m_lig, m_lig_ref = self.equalAtoms(ref) this = self.compress(m_rec, m_lig) ref = ref.compress(m_rec_ref, m_lig_ref) ## determine interface contacts = ref.resContacts(cutoff) if_rec = ref.rec_model.res2atomMask(N0.sum(contacts, 1)) if_lig = ref.lig_model.res2atomMask(N0.sum(contacts, 0)) mask_interface = N0.concatenate((if_rec, if_lig)) mask_heavy = N0.concatenate( (ref.rec().maskHeavy(), ref.lig_model.maskHeavy())) mask_interface = mask_interface * mask_heavy ## rms ref_model = ref.model() this_model = this.model() return ref_model.rms(this_model, mask_interface, fit=fit)
def convertChainIdsCter(self, model, chains): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains ## fetch last atom of given chains index = N0.concatenate((model.chainIndex(), [len(model)])) i = N0.take(index, N0.array(chains) + 1) - 1 ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices(i, breaks=1)
def prepare( self ): """ Write a xyzrn coordinate file to disc. Overrides Executor method. """ ## get radiia and name array p2x = Pdb2xyzrn(self.model, verbose=self.verbose, debug=self.debug ) r, n = p2x.run() xyz = self.model.xyz xyzr = N0.concatenate( ( xyz, N0.transpose([r]) ) ,axis=1 ) f = open( self.f_xyzrn, 'w' ) i = 0 for line in xyzr: f.write( str(line)[2:-1] + ' 1 ' + n[i] + '\n') i += 1 f.close()
def test_FuzzyCluster( self): """FuzzyCluster test""" import gnuplot as G x1 = R.random_sample((500,2)) x2 = R.random_sample((500,2)) + 1 x3 = R.random_sample((500,2)) + 2 self.x = N0.concatenate((x1, x2, x3)) self.fuzzy = FuzzyCluster(self.x, n_cluster=5, weight=1.5) self.centers = self.fuzzy.go(1.e-30, n_iterations=50, nstep=10, verbose=self.local) if self.local: print "cluster centers are displayed in green" G.scatter( self.x, self.centers ) self.assertEqual( N0.shape(self.centers), (5, 2) )
def reduceXyz(self, xyz, axis=0): """ Reduce the number of atoms in the given coordinate set. The set must have the same length and order as the reference model. It may have an additional (time) dimension as first axis. @param xyz: coordinates (N_atoms x 3) or (N_frames x N_atoms x 3) @type xyz: array @param axis: axis with atoms (default: 0) @type axis: int @return: coordinate array (N_less_atoms x 3) or (N_frames x N_less_atoms x 3) @rtype: array """ masses = self.m.atoms.get('mass') r_xyz = None for atom_indices in self.groups: x = N0.take(xyz, atom_indices, axis) m = N0.take(masses, atom_indices) center = N0.sum(x * N0.transpose([ m, ]), axis=axis) / N0.sum(m) if axis == 0: center = center[N0.NewAxis, :] if axis == 1: center = center[:, N0.NewAxis, :] if r_xyz is None: r_xyz = center else: r_xyz = N0.concatenate((r_xyz, center), axis) return r_xyz
def __alignMatrixDimension(self, cm, thisSeq, castSeq, axis=0): """ Correct one dimension of contactMatrix by inserting and deleting columns, so that it can be later compared to contact matrices based on slightly different sequences. @param cm: contact matrix, 2D matrix of residue contacts recceptor x ligand sequence @type cm: array @param thisSeq: AA sequence of this dimension of the contactMatrix @type thisSeq: string @param castSeq: AA sequence of this dimension in the other contact @type castSeq: string @param axis: which dimension to adapt (0=receptor, 1=ligand) @type axis: 1|0 @return: contact matrix with residue contacts compatible to refSeq. @rtype: 2D array """ # compare the two sequences seqdiff = SequenceMatcher(None, thisSeq, castSeq) seqDiff = seqdiff.get_opcodes() ## print seqDiff # decide which dimension to work on if not axis: cm = N0.transpose(cm) seqCount = 0 # keep track of sequence length changes i = 0 for list in seqDiff: # remove the column corresponding to the deletion in the # docked sequence if str(seqDiff[i][0]) == 'delete': # separate matrix into before and after deletion matrixSeg1 = cm[:, :seqDiff[i][1] + seqCount] matrixSeg2 = cm[:, seqDiff[i][2] + seqCount:] # concatenate part cm = N0.concatenate((matrixSeg1, matrixSeg2), 1) seqCount = seqCount + seqDiff[i][1] - seqDiff[i][2] # inserts zeros in the column where there is a insertion in the # docked sequence if str(seqDiff[i][0]) == 'insert': # create a matrix to be inserted insertZeros = seqDiff[i][4] - seqDiff[i][3] insertColumns = N0.array([[0] * insertZeros] * N0.size(cm, 0)) # separate matrix into before and after insertion matrixSeg1 = cm[:, :seqDiff[i][1] + seqCount] matrixSeg2 = cm[:, seqDiff[i][2] + seqCount:] # concatenate parts with the zero matrix cm = N0.concatenate((matrixSeg1, insertColumns, matrixSeg2), 1) seqCount = seqCount + seqDiff[i][4] - seqDiff[i][3] i = i + 1 if not axis: return N0.transpose(cm) return cm