def __atom2residueMatrix( self, m ): """ Reduce binary matrix of n x k atoms to binary matrix of i x j residues. @param m: atom contact matrix, array n x k with 1(contact) or 0(no contact) @type m: array @return: residue contact matrix, 2-D numpy array(residues_receptor x residues_ligand) @rtype: array """ recInd = N0.concatenate((self.rec().resIndex(), [ self.rec().lenAtoms()] )) ligInd = N0.concatenate((self.lig_model.resIndex(), [ self.lig_model.lenAtoms() ] )) residueMatrix = N0.zeros(( len(recInd)-1, len(ligInd)-1 ), N0.Int) for r in range( len(recInd)-1 ): for l in range( len(ligInd)-1 ): res2res = m[ int(recInd[r]):int(recInd[r+1]), int(ligInd[l]):int(ligInd[l+1]) ] if N0.any( res2res ): residueMatrix[r, l] = 1 return residueMatrix
def __atom2residueMatrix(self, m): """ Reduce binary matrix of n x k atoms to binary matrix of i x j residues. @param m: atom contact matrix, array n x k with 1(contact) or 0(no contact) @type m: array @return: residue contact matrix, 2-D numpy array(residues_receptor x residues_ligand) @rtype: array """ recInd = N0.concatenate( (self.rec().resIndex(), [self.rec().lenAtoms()])) ligInd = N0.concatenate( (self.lig_model.resIndex(), [self.lig_model.lenAtoms()])) residueMatrix = N0.zeros((len(recInd) - 1, len(ligInd) - 1), N0.Int) for r in range(len(recInd) - 1): for l in range(len(ligInd) - 1): res2res = m[int(recInd[r]):int(recInd[r + 1]), int(ligInd[l]):int(ligInd[l + 1])] if N0.any(res2res): residueMatrix[r, l] = 1 return residueMatrix
def test_ComplexTraj(self): """Dock.ComplexTraj test""" import biskit.tools as T ## there is no complex trajectory in the test folder so will have ## to create a fake trajectory with a complex f = [T.testRoot() + '/com/1BGS.pdb'] * 5 t = Trajectory(f, verbose=self.local) t = ComplexTraj(t, recChains=[0]) #if self.local: #print 'plotting contact density...' #t.plotContactDensity( step=2 ) ## create a fake second chain in the ligand for i in range(1093 + 98, 1968): t.ref.atoms['chain_id'][i] = 'B' t.ref.chainIndex(force=1, cache=1) t.cl = [1, 2] r = N0.concatenate( (list(range(1093, 1191)), list(range(0, 1093)), list(range(1191, 1968)))) tt = t.takeAtoms(r) contactMat = tt.atomContacts(1) if self.local: print('Receptor chains: %s Ligand chains: %s' % (t.cr, t.cl)) self.assertEqual(N0.sum(N0.ravel(contactMat)), 308)
def concatAtoms( self, *traj ): """ Concatenate 2 trajectories of same (frame) length 'horizontally', i.e. for each frame the coordinates of one are appended to the coordinates of the other. The ref model of the new trajectory is a 'semi-deep' copy of this trajectory's model (see :class:`PDBModel.take()` ):: concatAtoms( traj1 [traj2, traj3..]) -> Trajectory :param traj: one or more Trajectory of the same number of frames :type traj: Trajectories :return: trajectory with concatenated atoms :rtype: Trajectory """ if len( traj ) == 0: return self r = self.__class__() r.frames = N0.concatenate( (self.frames, traj[0].frames), 1 ) r.setRef( self.ref.concat( traj[0].getRef() ) ) r.profiles = self.profiles.clone() r.frameNames = self.frameNames return r.concatAtoms( *traj[1:] )
def test_ComplexTraj(self): """Dock.ComplexTraj test""" import biskit.tools as T ## there is no complex trajectory in the test folder so will have ## to create a fake trajectory with a complex f = [ T.testRoot()+ '/com/1BGS.pdb' ] * 5 t = Trajectory( f, verbose=self.local ) t = ComplexTraj( t, recChains=[0] ) #if self.local: #print 'plotting contact density...' #t.plotContactDensity( step=2 ) ## create a fake second chain in the ligand for i in range( 1093+98, 1968 ): t.ref.atoms['chain_id'][i] = 'B' t.ref.chainIndex( force=1, cache=1 ) t.cl = [1,2] r = N0.concatenate((list(range(1093,1191)), list(range(0,1093)), list(range(1191,1968)))) tt = t.takeAtoms( r ) contactMat = tt.atomContacts( 1 ) if self.local: print('Receptor chains: %s Ligand chains: %s'%(t.cr, t.cl)) self.assertEqual( N0.sum(N0.ravel(contactMat)), 308 )
def rtTuple2matrix( self, r, t): """ Put rotation and translation matrix into single 4x4 matrix. @param r: rotation matric, array 3x3 of float @type r: array @param t: translation vector, array 1x3 of float @type t: vector @return: rotation/translation matrix, array 4x4 of float @rtype: array """ ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate( (r, N0.transpose( [ t.tolist() ] )), 1) ## make it square result = N0.concatenate( (result, N0.array([[0,0,0,1]],N0.Float32)), 0) return result.astype(N0.Float32)
def __random_matrix( self ): """ Random rotation matrix. @return: 4 x 4 array of float, random rotation and translation matrix @rtype: array """ r = ma.randomRotation() ## r = N0.array([[1,0,0],[0,1,0],[0,0,1]],'f') t = self.__random_translation() ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate( (r, N0.transpose( [ t.tolist() ] )), 1) ## make it square result = N0.concatenate( (result, N0.array([[0,0,0,1]], N0.Float32)), 0 ) return result
def rtTuple2matrix(self, r, t): """ Put rotation and translation matrix into single 4x4 matrix. @param r: rotation matric, array 3x3 of float @type r: array @param t: translation vector, array 1x3 of float @type t: vector @return: rotation/translation matrix, array 4x4 of float @rtype: array """ ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate((r, N0.transpose([t.tolist()])), 1) ## make it square result = N0.concatenate((result, N0.array([[0, 0, 0, 1]], N0.Float32)), 0) return result.astype(N0.Float32)
def rmsInterface( self, ref, cutoff=4.5, fit=1 ): """ Rmsd between this and reference interface. The interface is defined as any residue that has an atom which is within the distance given by |cutoff| from its partner. @param ref: reference complex @type ref: Complex @param cutoff: atom distance cutoff for interface residue definition (default: 4.5) @type cutoff: float @param fit: least-squares fit before calculating the rms (default: 1) @type fit: 1|0 @return: interface rmad @rtype: float """ ## casting this = self if not ref.rec_model.equals( self.rec_model )[1] \ or not ref.lig_model.equals( self.lig_model )[1]: m_rec, m_rec_ref, m_lig, m_lig_ref = self.equalAtoms( ref ) this = self.compress( m_rec, m_lig ) ref = ref.compress( m_rec_ref, m_lig_ref ) ## determine interface contacts = ref.resContacts( cutoff ) if_rec = ref.rec_model.res2atomMask( N0.sum( contacts, 1 ) ) if_lig = ref.lig_model.res2atomMask( N0.sum( contacts, 0 ) ) mask_interface = N0.concatenate( (if_rec, if_lig) ) mask_heavy = N0.concatenate( (ref.rec().maskHeavy(), ref.lig_model.maskHeavy()) ) mask_interface = mask_interface * mask_heavy ## rms ref_model = ref.model() this_model= this.model() return ref_model.rms( this_model, mask_interface, fit=fit)
def rmsInterface(self, ref, cutoff=4.5, fit=1): """ Rmsd between this and reference interface. The interface is defined as any residue that has an atom which is within the distance given by |cutoff| from its partner. @param ref: reference complex @type ref: Complex @param cutoff: atom distance cutoff for interface residue definition (default: 4.5) @type cutoff: float @param fit: least-squares fit before calculating the rms (default: 1) @type fit: 1|0 @return: interface rmad @rtype: float """ ## casting this = self if not ref.rec_model.equals( self.rec_model )[1] \ or not ref.lig_model.equals( self.lig_model )[1]: m_rec, m_rec_ref, m_lig, m_lig_ref = self.equalAtoms(ref) this = self.compress(m_rec, m_lig) ref = ref.compress(m_rec_ref, m_lig_ref) ## determine interface contacts = ref.resContacts(cutoff) if_rec = ref.rec_model.res2atomMask(N0.sum(contacts, 1)) if_lig = ref.lig_model.res2atomMask(N0.sum(contacts, 0)) mask_interface = N0.concatenate((if_rec, if_lig)) mask_heavy = N0.concatenate( (ref.rec().maskHeavy(), ref.lig_model.maskHeavy())) mask_interface = mask_interface * mask_heavy ## rms ref_model = ref.model() this_model = this.model() return ref_model.rms(this_model, mask_interface, fit=fit)
def convertChainIdsCter( self, model, chains ): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains ## fetch last atom of given chains index = N0.concatenate( (model.chainIndex(), [len(model)]) ) i = N0.take( index, N0.array( chains ) + 1 ) - 1 ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices( i, breaks=1 )
def convertChainIdsCter(self, model, chains): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains ## fetch last atom of given chains index = N0.concatenate((model.chainIndex(), [len(model)])) i = N0.take(index, N0.array(chains) + 1) - 1 ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices(i, breaks=1)
def concat( self, *traj ): """ Concatenate this with other trajectories. The ref model of the new Trajectory is a 'semi-deep' copy of this trajectorie's model. (see :class:`PDBModel.take()` ):: concat( traj [, traj2, traj3, ..] ) -> Trajectory :param traj: one or more Trajectory with identical atoms as this one :type traj: Trajectories :return: concatenated trajecties :rtype: Trajectory """ if len( traj ) == 0: return self r = self.__class__() r.frames = N0.concatenate( (self.frames, traj[0].frames), 0 ) r.setRef( self.ref.clone()) if self.frameNames and traj[0].frameNames: r.frameNames = self.frameNames + traj[0].frameNames try: if self.pc is not None and traj[0].pc is not None: r.pc['p'] = N0.concatenate( (self.pc['p'], traj[0].pc['p']),0) r.pc['u'] = N0.concatenate( (self.pc['u'], traj[0].pc['u']),0) except TypeError as why: EHandler.error('cannot concat PC '+str(why) ) r.profiles = self.profiles.concat( traj[0].profiles ) ## recursively add other trajectories return r.concat( *traj[1:] )
def polarToCartesian( rtp ): """ Convert polar coordinate array to cartesian coordinate array: C{ r, S{theta}, S{phi} -> x,y,z } :param rtp: array of cartesian coordinates (r, theta, phi) :type rtp: array :return: array of cartesian coordinates (x, y, z) :rtype: array """ x = rtp[:,0] * N0.cos( rtp[:,1] ) * N0.sin( rtp[:,2] ) y = rtp[:,0] * N0.sin( rtp[:,1] ) * N0.sin( rtp[:,2] ) z = rtp[:,0] * N0.cos( rtp[:,2] ) return N0.transpose( N0.concatenate( ([x],[y],[z]) ) )
def polarToCartesian(rtp): """ Convert polar coordinate array to cartesian coordinate array: C{ r, S{theta}, S{phi} -> x,y,z } :param rtp: array of cartesian coordinates (r, theta, phi) :type rtp: array :return: array of cartesian coordinates (x, y, z) :rtype: array """ x = rtp[:, 0] * N0.cos(rtp[:, 1]) * N0.sin(rtp[:, 2]) y = rtp[:, 0] * N0.sin(rtp[:, 1]) * N0.sin(rtp[:, 2]) z = rtp[:, 0] * N0.cos(rtp[:, 2]) return N0.transpose(N0.concatenate(([x], [y], [z])))
def area(curve, start=0.0, stop=1.0): """ Numerically add up the area under the given curve. The curve is a 2-D array or list of tupples. The x-axis is the first column of this array (curve[:,0]). (originally taken from biskit.Statistics.ROCalyzer) :param curve: a list of x,y coordinates :type curve: [ (y,x), ] or N0.array :param start: lower boundary (in x) (default: 0.0) :type start: float :param stop: upper boundary (in x) (default: 1.0) :type stop: float :return: the area underneath the curve between start and stop. :rtype: float """ ## convert and swap axes curve = N0.array(curve) c = N0.zeros(N0.shape(curve), curve.dtype) c[:, 0] = curve[:, 1] c[:, 1] = curve[:, 0] assert len(N0.shape(c)) == 2 ## apply boundaries ## here we have a problem with flat curves mask = N0.greater_equal(c[:, 1], start) mask *= N0.less_equal(c[:, 1], stop) c = N0.compress(mask, c, axis=0) ## fill to boundaries -- not absolutely accurate: we actually should ## interpolate to the neighboring points instead c = N0.concatenate((N0.array([ [c[0, 0], start], ]), c, N0.array([ [c[-1, 0], stop], ]))) x = c[:, 1] y = c[:, 0] dx = x[1:] - x[:-1] # distance on x between points dy = y[1:] - y[:-1] # distance on y between points areas1 = y[:-1] * dx # the rectangles between all points areas2 = dx * dy / 2.0 # the triangles between all points return N0.sum(areas1) + N0.sum(areas2)
def area(curve, start=0.0, stop=1.0 ): """ Numerically add up the area under the given curve. The curve is a 2-D array or list of tupples. The x-axis is the first column of this array (curve[:,0]). (originally taken from biskit.Statistics.ROCalyzer) :param curve: a list of x,y coordinates :type curve: [ (y,x), ] or N0.array :param start: lower boundary (in x) (default: 0.0) :type start: float :param stop: upper boundary (in x) (default: 1.0) :type stop: float :return: the area underneath the curve between start and stop. :rtype: float """ ## convert and swap axes curve = N0.array( curve ) c = N0.zeros( N0.shape(curve), curve.dtype ) c[:,0] = curve[:,1] c[:,1] = curve[:,0] assert len( N0.shape( c ) ) == 2 ## apply boundaries ## here we have a problem with flat curves mask = N0.greater_equal( c[:,1], start ) mask *= N0.less_equal( c[:,1], stop ) c = N0.compress( mask, c, axis=0 ) ## fill to boundaries -- not absolutely accurate: we actually should ## interpolate to the neighboring points instead c = N0.concatenate((N0.array([[c[0,0], start],]), c, N0.array([[c[-1,0],stop ],])) ) x = c[:,1] y = c[:,0] dx = x[1:] - x[:-1] # distance on x between points dy = y[1:] - y[:-1] # distance on y between points areas1 = y[:-1] * dx # the rectangles between all points areas2 = dx * dy / 2.0 # the triangles between all points return N0.sum(areas1) + N0.sum(areas2)
def test_FuzzyCluster( self): """FuzzyCluster test""" import biskit.gnuplot as G x1 = R.random_sample((500,2)) x2 = R.random_sample((500,2)) + 1 x3 = R.random_sample((500,2)) + 2 self.x = N0.concatenate((x1, x2, x3)) self.fuzzy = FuzzyCluster(self.x, n_cluster=5, weight=1.5) self.centers = self.fuzzy.go(1.e-30, n_iterations=50, nstep=10, verbose=self.local) if self.local: print("cluster centers are displayed in green") G.scatter( self.x, self.centers ) self.assertEqual( N0.shape(self.centers), (5, 2) )
def cartesianToPolar( xyz ): """ Convert cartesian coordinate array to polar coordinate array: C{ x,y,z -> r, S{theta}, S{phi} } :param xyz: array of cartesian coordinates (x, y, z) :type xyz: array :return: array of polar coordinates (r, theta, phi) :rtype: array """ r = N0.sqrt( N0.sum( xyz**2, 1 ) ) p = N0.arccos( xyz[:,2] / r ) ## have to take care of that we end up in the correct quadrant t=[] for i in range(len(xyz)): ## for theta (arctan) t += [math.atan2( xyz[i,1], xyz[i,0] )] return N0.transpose( N0.concatenate( ([r],[t],[p]) ) )
def cartesianToPolar(xyz): """ Convert cartesian coordinate array to polar coordinate array: C{ x,y,z -> r, S{theta}, S{phi} } :param xyz: array of cartesian coordinates (x, y, z) :type xyz: array :return: array of polar coordinates (r, theta, phi) :rtype: array """ r = N0.sqrt(N0.sum(xyz**2, 1)) p = N0.arccos(xyz[:, 2] / r) ## have to take care of that we end up in the correct quadrant t = [] for i in range(len(xyz)): ## for theta (arctan) t += [math.atan2(xyz[i, 1], xyz[i, 0])] return N0.transpose(N0.concatenate(([r], [t], [p])))
def test_FuzzyCluster(self): """FuzzyCluster test""" import biskit.gnuplot as G x1 = R.random_sample((500, 2)) x2 = R.random_sample((500, 2)) + 1 x3 = R.random_sample((500, 2)) + 2 self.x = N0.concatenate((x1, x2, x3)) self.fuzzy = FuzzyCluster(self.x, n_cluster=5, weight=1.5) self.centers = self.fuzzy.go(1.e-30, n_iterations=50, nstep=10, verbose=self.local) if self.local: print("cluster centers are displayed in green") G.scatter(self.x, self.centers) self.assertEqual(N0.shape(self.centers), (5, 2))
def reduceXyz( self, xyz, axis=0 ): """ Reduce the number of atoms in the given coordinate set. The set must have the same length and order as the reference model. It may have an additional (time) dimension as first axis. @param xyz: coordinates (N_atoms x 3) or (N_frames x N_atoms x 3) @type xyz: array @param axis: axis with atoms (default: 0) @type axis: int @return: coordinate array (N_less_atoms x 3) or (N_frames x N_less_atoms x 3) @rtype: array """ masses = self.m.atoms.get('mass') r_xyz = None for atom_indices in self.groups: x = N0.take( xyz, atom_indices, axis ) m = N0.take( masses, atom_indices ) center = N0.sum( x * N0.transpose([m,]), axis=axis) / N0.sum( m ) if axis == 0: center = center[N0.NewAxis, :] if axis == 1: center = center[:, N0.NewAxis, :] if r_xyz is None: r_xyz = center else: r_xyz = N0.concatenate( (r_xyz, center), axis ) return r_xyz
def __parseBiomt( self, pdbFile, firstLine): """ Extract BIOMT (biological unit) information from REMARK 350 lines Creates a 'BIOMT' dictionary. """ line = firstLine biomtDict = {} moleculeNum = -1 while line[0] == 'REMARK' and line[1].startswith(' 350'): # 5 = len(' 350 ') biomtLine = line[1][5:].lstrip() if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule if moleculeNum != -1: # lets update the dictionary with what we've got biomtDict[moleculeNum] = (targetChains,rtList) #12 = len('BIOMOLECULE:') moleculeNum = int(biomtLine[12:].strip()) targetChains = [] rotation = [] translation = [] rtList = [] matrixLine = 0 if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'): # parse targeted chains, we assume this comes after BIOMOLECULE line # 30 = len('APPLY THE FOLLOWING TO CHAINS:') targetChains.extend(c.strip() for c in biomtLine[30:].split(',')) if biomtLine.startswith('AND CHAINS:'): # 11 = len('AND CHAINS:') targetChains.extend(c.strip() for c in biomtLine[11:].split(',')) if biomtLine.startswith('BIOMT'): # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line matrixLine += 1 # 6 = len('BIOMT#') rawCoords = biomtLine[6:].split() rotation.append([float(x) for x in rawCoords[1:4]]) translation.append(float(rawCoords[4])) if matrixLine % 3 == 0: rotation = N0.array( rotation ) translation = N0.transpose( [ translation ] ) rotation = N0.concatenate( (rotation, translation), axis=1 ) rtList.append(N0.array(rotation)) ## rtList.append((rotation,translation)) rotation = [] translation = [] try: line = pdbFile.readLine() except ValueError as what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename( fname )) ) self.log.add('\tError: '+str(what) ) continue # process last molecule group biomtDict[moleculeNum] = (targetChains,rtList) # return (indexed transformation dictionary , last line which isn't ours) return {'BIOMT': biomtDict}, line
def __parseBiomt(self, pdbFile, firstLine): """ Extract BIOMT (biological unit) information from REMARK 350 lines Creates a 'BIOMT' dictionary. """ line = firstLine biomtDict = {} moleculeNum = -1 while line[0] == 'REMARK' and line[1].startswith(' 350'): # 5 = len(' 350 ') biomtLine = line[1][5:].lstrip() if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule if moleculeNum != -1: # lets update the dictionary with what we've got biomtDict[moleculeNum] = (targetChains, rtList) #12 = len('BIOMOLECULE:') moleculeNum = int(biomtLine[12:].strip()) targetChains = [] rotation = [] translation = [] rtList = [] matrixLine = 0 if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'): # parse targeted chains, we assume this comes after BIOMOLECULE line # 30 = len('APPLY THE FOLLOWING TO CHAINS:') targetChains.extend(c.strip() for c in biomtLine[30:].split(',')) if biomtLine.startswith('AND CHAINS:'): # 11 = len('AND CHAINS:') targetChains.extend(c.strip() for c in biomtLine[11:].split(',')) if biomtLine.startswith('BIOMT'): # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line matrixLine += 1 # 6 = len('BIOMT#') rawCoords = biomtLine[6:].split() rotation.append([float(x) for x in rawCoords[1:4]]) translation.append(float(rawCoords[4])) if matrixLine % 3 == 0: rotation = N0.array(rotation) translation = N0.transpose([translation]) rotation = N0.concatenate((rotation, translation), axis=1) rtList.append(N0.array(rotation)) ## rtList.append((rotation,translation)) rotation = [] translation = [] try: line = pdbFile.readLine() except ValueError as what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename(fname))) self.log.add('\tError: ' + str(what)) continue # process last molecule group biomtDict[moleculeNum] = (targetChains, rtList) # return (indexed transformation dictionary , last line which isn't ours) return {'BIOMT': biomtDict}, line
def __alignMatrixDimension(self, cm, thisSeq, castSeq, axis=0): """ Correct one dimension of contactMatrix by inserting and deleting columns, so that it can be later compared to contact matrices based on slightly different sequences. @param cm: contact matrix, 2D matrix of residue contacts recceptor x ligand sequence @type cm: array @param thisSeq: AA sequence of this dimension of the contactMatrix @type thisSeq: string @param castSeq: AA sequence of this dimension in the other contact @type castSeq: string @param axis: which dimension to adapt (0=receptor, 1=ligand) @type axis: 1|0 @return: contact matrix with residue contacts compatible to refSeq. @rtype: 2D array """ # compare the two sequences seqdiff = SequenceMatcher(None, thisSeq, castSeq) seqDiff = seqdiff.get_opcodes() ## print seqDiff # decide which dimension to work on if not axis: cm = N0.transpose( cm ) seqCount = 0 # keep track of sequence length changes i=0 for list in seqDiff: # remove the column corresponding to the deletion in the # docked sequence if str( seqDiff[i][0] ) == 'delete': # separate matrix into before and after deletion matrixSeg1 = cm[ :, : seqDiff[i][1] + seqCount ] matrixSeg2 = cm[ :, seqDiff[i][2] + seqCount : ] # concatenate part cm = N0.concatenate( ( matrixSeg1, matrixSeg2 ), 1) seqCount = seqCount + seqDiff[i][1] - seqDiff[i][2] # inserts zeros in the column where there is a insertion in the # docked sequence if str( seqDiff[i][0] ) == 'insert': # create a matrix to be inserted insertZeros= seqDiff[i][4] - seqDiff[i][3] insertColumns = N0.array( [ [0] * insertZeros ] * N0.size(cm,0) ) # separate matrix into before and after insertion matrixSeg1 = cm[ :, : seqDiff[i][1] + seqCount ] matrixSeg2 = cm[ :, seqDiff[i][2] + seqCount : ] # concatenate parts with the zero matrix cm = N0.concatenate( (matrixSeg1,insertColumns,matrixSeg2), 1) seqCount = seqCount + seqDiff[i][4] - seqDiff[i][3] i=i+1 if not axis: return N0.transpose( cm ) return cm
def __alignMatrixDimension(self, cm, thisSeq, castSeq, axis=0): """ Correct one dimension of contactMatrix by inserting and deleting columns, so that it can be later compared to contact matrices based on slightly different sequences. @param cm: contact matrix, 2D matrix of residue contacts recceptor x ligand sequence @type cm: array @param thisSeq: AA sequence of this dimension of the contactMatrix @type thisSeq: string @param castSeq: AA sequence of this dimension in the other contact @type castSeq: string @param axis: which dimension to adapt (0=receptor, 1=ligand) @type axis: 1|0 @return: contact matrix with residue contacts compatible to refSeq. @rtype: 2D array """ # compare the two sequences seqdiff = SequenceMatcher(None, thisSeq, castSeq) seqDiff = seqdiff.get_opcodes() ## print seqDiff # decide which dimension to work on if not axis: cm = N0.transpose(cm) seqCount = 0 # keep track of sequence length changes i = 0 for list in seqDiff: # remove the column corresponding to the deletion in the # docked sequence if str(seqDiff[i][0]) == 'delete': # separate matrix into before and after deletion matrixSeg1 = cm[:, :seqDiff[i][1] + seqCount] matrixSeg2 = cm[:, seqDiff[i][2] + seqCount:] # concatenate part cm = N0.concatenate((matrixSeg1, matrixSeg2), 1) seqCount = seqCount + seqDiff[i][1] - seqDiff[i][2] # inserts zeros in the column where there is a insertion in the # docked sequence if str(seqDiff[i][0]) == 'insert': # create a matrix to be inserted insertZeros = seqDiff[i][4] - seqDiff[i][3] insertColumns = N0.array([[0] * insertZeros] * N0.size(cm, 0)) # separate matrix into before and after insertion matrixSeg1 = cm[:, :seqDiff[i][1] + seqCount] matrixSeg2 = cm[:, seqDiff[i][2] + seqCount:] # concatenate parts with the zero matrix cm = N0.concatenate((matrixSeg1, insertColumns, matrixSeg2), 1) seqCount = seqCount + seqDiff[i][4] - seqDiff[i][3] i = i + 1 if not axis: return N0.transpose(cm) return cm