def shuffledLists( self, n, lst, mask=None ): """ shuffle order of a list n times, leaving masked(0) elements untouched @param n: number of times to shuffle the list @type n: int @param lst: list to shuffle @type lst: [any] @param mask: mask to be applied to lst @type mask: [1|0] @return: list of shuffeled lists @rtype: [[any]] """ if not mask: mask = N0.ones( len(lst) ) if type( lst ) == list: lst = N0.array( lst ) pos = N0.nonzero( mask ) rand_pos = N0.array( [ self.__shuffleList( pos ) for i in range(n) ] ) result = [] for p in rand_pos: r = copy.copy( lst ) N0.put( r, p, N0.take( lst, pos ) ) result += [r] return result
def calc(self, models): """ Calculate angles, profiles and other things needed. @param models: List of models @type models: [ PDBModel ] """ res_count = 0 for m in models: ## add profile if not there if self.profileName: self.prof += [self.calcProfiles(m)] ## calclate phi and psi angles for model self.phi_and_psi(m) ## get list with GLY and PRO residue indices gly_atomInd = m.indices(lambda a: a['residue_name'] == 'GLY') gly_resInd = N0.array(m.atom2resIndices(gly_atomInd)) pro_atomInd = m.indices(lambda a: a['residue_name'] == 'PRO') pro_resInd = N0.array(m.atom2resIndices(pro_atomInd)) self.gly.append(gly_resInd + res_count) self.pro.append(pro_resInd + res_count) res_count += m.lenResidues()
def histogram(data, nbins, range=None): """ Create a histogram. Comes from Konrad Hinsen: Scientific Python @param data: data list or array @type data: [any] @param nbins: number of bins @type nbins: int @param range: data range to create histogram from (min val, max val) @type range: (float, float) OR None @return: array (2 x len(data) ) with start of bin and witdh of bin. @rtype: array """ data = N0.array(data, N0.Float) if range is None: min = N0.minimum.reduce(data) max = N0.maximum.reduce(data) else: min, max = range data = N0.repeat( data, N0.logical_and(N0.less_equal(data, max), N0.greater_equal(data, min))) bin_width = (max - min) / nbins data = N0.floor((data - min) / bin_width).astype(N0.Int) histo = N0.add.reduce(N0.equal(N0.arange(nbins)[:, N0.NewAxis], data), -1) histo[-1] = histo[-1] + N0.add.reduce(N0.equal(nbins, data)) bins = min + bin_width * (N0.arange(nbins) + 0.5) return N0.transpose(N0.array([bins, histo]))
def __parseBiomt(self, pdbFile, firstLine): """ Extract BIOMT (biological unit) information from REMARK 350 lines Creates a 'BIOMT' dictionary. """ line = firstLine biomtDict = {} moleculeNum = -1 while line[0] == 'REMARK' and line[1].startswith(' 350'): # 5 = len(' 350 ') biomtLine = line[1][5:].lstrip() if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule if moleculeNum != -1: # lets update the dictionary with what we've got biomtDict[moleculeNum] = (targetChains, rtList) #12 = len('BIOMOLECULE:') moleculeNum = int(biomtLine[12:].strip()) targetChains = [] rotation = [] translation = [] rtList = [] matrixLine = 0 if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'): # parse targeted chains, we assume this comes after BIOMOLECULE line # 30 = len('APPLY THE FOLLOWING TO CHAINS:') targetChains.extend(c.strip() for c in biomtLine[30:].split(',')) if biomtLine.startswith('AND CHAINS:'): # 11 = len('AND CHAINS:') targetChains.extend(c.strip() for c in biomtLine[11:].split(',')) if biomtLine.startswith('BIOMT'): # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line matrixLine += 1 # 6 = len('BIOMT#') rawCoords = biomtLine[6:].split() rotation.append([float(x) for x in rawCoords[1:4]]) translation.append(float(rawCoords[4])) if matrixLine % 3 == 0: rotation = N0.array(rotation) translation = N0.transpose([translation]) rotation = N0.concatenate((rotation, translation), axis=1) rtList.append(N0.array(rotation)) ## rtList.append((rotation,translation)) rotation = [] translation = [] try: line = pdbFile.readLine() except ValueError, what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename(fname))) self.log.add('\tError: ' + str(what)) continue
def test_plot(self): """gnuplot.plot test""" # List of (x, y) pairs # plot([(0.,1),(1.,5),(2.,3),(3.,4)]) # plot( zip( range(10), range(10) ) ) # Two plots; each given by a 2d array import Biskit.oldnumeric as N0 x = N0.arange(10) y1 = x**2 y2 = (10 - x)**2 plot(N0.transpose(N0.array([x, y1])), N0.transpose(N0.array([x, y2])))
def __defaults(self ): """ backwards compatibility to earlier pickled trajectories """ self.pc = getattr( self, 'pc', None ) self.frameNames = getattr( self, 'frameNames', None) self.profiles = getattr( self, 'profiles', TrajProfiles() ) if type( self.frames ) is not N0.ndarray: self.frames = N0.array( self.frames ) if type( self.resIndex ) is not N0.ndarray: self.resIndex = N0.array( self.resIndex )
def toIntArray(o): """ Convert single value or list of values to Numeric array of int. @param o: value or list @type o: int or [int] @return: array of integer @rtype: N0.array('i') """ if type(o) == list or type(o) == type(N0.array([])): return N0.array(map(int, o)) return N0.array([int(o)])
def parse_result( self): """ Parse the SurfaceRacer output file which has the same nawe as the input pdb, but with a txt extension. The output ends up un the same folder as the input. In addition a file called result.txt is created in the same directory as the binary. @return: dictionary with curvature and surface data @rtype: dict """ curv = [] ## average curvature ms = [] ## molecular surface area asa = [] ## accessible surface area try: out_file = open( self.f_out_name ) lines = out_file.readlines() out_file.close() except: raise SurfaceRacer_Error,\ 'SurfaceRacer result file %s does not exist. You have probably encountered a very rare SurfaceRacer round off error that have caused the program to terminate. The simplest remedy to this problem is to increase the probe radii with a very small number, for example from %.3f to %.3f.'%(self.f_out_name, self.probe,self.probe+0.001 ) if len(lines) == 0: raise SurfaceRacer_Error,\ 'SurfaceRacer result file %s empty'%self.f_out_name ## don't parse cavity information, find first occurance or 'CAVITY' end = len(lines) for i in range( len(lines)-1, 0, -1 ): if lines[i][:6]=='CAVITY': end = i for i in range( end ): curv += [ float( string.strip( lines[i][-11:-1] ) ) ] ms += [ float( string.strip( lines[i][-17:-11] ) ) ] asa += [ float( string.strip( lines[i][-24:-17] ) ) ] result = {'curvature':N0.array(curv), 'MS':N0.array(ms), 'AS':N0.array(asa), 'surfaceRacerInfo':{'probe_radius':self.probe, 'vdw_set':self.vdw_set} } ## check curvature profile integrity result['curvature'] = \ self.__checkProfileIntegrity( result['curvature'], 1.0, -1.0 ) return result
def residusMaximus( self, atomValues, mask=None ): """ Take list of value per atom, return list where all atoms of any residue are set to the highest value of any atom in that residue. (after applying mask) @param atomValues: list 1 x N, values per atom @type atomValues: [ float ] @param mask: list 1 x N, 0|1, 'master' atoms of each residue @type mask: [1|0] @return: Numpy array 1 x N of float @rtype: array """ if mask is None: mask = N0.ones( len( self.frames[0] ), N0.Int32 ) ## eliminate all values that do not belong to the selected atoms masked = atomValues * mask result = [] ## set all atoms of each residue to uniform value for res in range( 0, self.resMap()[-1]+1 ): ## get atom entries for this residue resAtoms = N0.compress( N0.equal( self.resMap(), res ), masked ) ## get maximum value masterValue = max( resAtoms ) result += resAtoms * 0.0 + masterValue return N0.array( result )
class Test(BT.BiskitTest): """Test""" def test_SparseArray(self): """SparseArray test""" a = N0.zeros((6, ), N0.Float32) self.sa = SparseArray(a.shape) self.sa[3] = 1. self.sa[5] = 2. b = N0.zeros((5, 6), N0.Float32) b[0, 1] = 3. b[0, 2] = 4 b[4, 2] = 5 b[3, 0] = 6 self.sb = SparseArray(b) self.sb.append(self.sa) if self.local: print self.sa.toarray() self.assert_(N.all(self.sb.toarray() == self.EXPECTED)) EXPECTED = N0.array([[0., 3., 4., 0., 0., 0.], [0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0.], [6., 0., 0., 0., 0., 0.], [0., 0., 5., 0., 0., 0.], [0., 0., 0., 1., 0., 2.]])
def valuesOf(self, infoKey, default=None, indices=None, unique=0 ): """ Get all values of a certain info record of all or some Complexes. @param infoKey: key for info dict @type infoKey: str @param default: default value if infoKey is not found (None) @type default: any @param indices: list of int OR None(=all), indices of Complexes (None) @type indices: [int] OR None @param unique: report each value only once (set union), (default 0) @type unique: 1|0 @return: list of values @rtype: [any] """ l = self if indices is not None: l = N0.take( N0.array(l,'O'), indices ) if not unique: return [ c.info.get(infoKey, default) for c in l ] r = [] for c in l: if c.info.get(infoKey, default) not in r: r += [ c.info.get( infoKey ) ] return r
def takeMembers(self, mIndices): """ Take all frames belonging to the members in mIndices:: takeMembers( mIndices ) -> EnsembleTraj with frames of given members @param mIndices: list of member indices @type mIndices: [int] OR array('i') @return: EnsembleTraj with specified members @rtype: EnsembleTraj @todo: return self.__class__ instead of EnsembleTraj """ try: ## assumes that each member traj has same number of frames fi = N0.array([self.memberIndices(i) for i in mIndices]) fi = N0.ravel(N0.transpose(fi)) n_members = len(mIndices) ## has wrong n_members and member order t = self.takeFrames(fi) result = EnsembleTraj(n_members=n_members) result.__dict__.update(t.__dict__) result.n_members = n_members result.resetFrameNames() return result except TypeError: raise EnsembleTrajError, 'takeMembers TypeError '+\ str(mIndices)+\ "\nlenFrames: %i; n_members: %i" %(len(self), self.n_members)
def test_lognormal(self): """Statistics.lognormal test""" import random import Biskit.gnuplot as gnuplot import Biskit.hist as H cr = [] for i in range( 10000 ): ## Some random values drawn from the same lognormal distribution alpha = 1.5 beta = .7 x = 10. R = [ random.lognormvariate( alpha, beta ) for j in range( 10 ) ] cr += [ logConfidence( x, R )[0] ] ca = logArea( x, alpha, beta ) if self.local: gnuplot.plot( H.density( N0.array(cr) - ca, 100 ) ) globals().update( locals() ) self.assertAlmostEqual( ca, 0.86877651432955771, 7)
class Test(BT.BiskitTest): """Test case""" def test_rmsFit(self): """rmsFit test""" import Biskit.tools as T self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat') rt, rmsdLst = match(self.traj.ref.xyz, self.traj[-1].xyz) if self.local: print 'RMSD: %.2f' % rmsdLst[0][1] # return rotation matrix r = abs(N0.sum(N0.ravel(rt[0]))) e = abs(N0.sum(N0.ravel(self.EXPECT))) self.assertAlmostEqual(r, e, 6) EXPECT = N0.array([[ 0.9999011, 0.01311352, 0.00508244, ], [ -0.01310219, 0.99991162, -0.00225578, ], [-0.00511157, 0.00218896, 0.99998454]])
def relExposure(model, absSurf, key='AS', clip=1): """ Calculate how exposed an atom is relative to the same atom in a GLY-XXX-GLY tripeptide, an approximation of the unfolded state. @param absSurf: Absolute MS OR AS values @type absSurf: [float] @param key: MS or AS @type key: MS|AS @param clip: clip values above 100% (default: 1) @type clip: 1|0 @return: rel - list of relative accessible surfaces @rtype: [float] """ if not key == 'MS' and not key == 'AS': raise Exception,\ 'Incorrect key for relative exposiure: %s '%key rel = [] i = 0 ## loop over chains for j in range(model.lenChains()): c = model.takeChains([j]) k = 0 cIdx = c.resIndex() ## and loop over atoms in chain for a in c.atoms.iterDicts(): ## N-terminal residue if k < cIdx[1]: rel = __Nter(a, rel, absSurf, key, i) ## C-terminal residue if k >= cIdx[-1]: rel = __Cter(a, rel, absSurf, key, i) ## everything but N- and C termini if not k < cIdx[1] and not k >= cIdx[-1]: rel = __bulk(a, rel, absSurf, key, i) i += 1 k += 1 if clip: return N0.clip(N0.array(rel), 0.0, 100.0) else: return N0.array(rel)
def __setstate__(self, state): """ called for unpickling the object. """ self.__dict__ = state self.ligandMatrix = N0.array(self.ligandMatrix, N0.Float32) ## backwards compability self.__defaults()
def __collectFrames( self, pdbs, castAll=0 ): """ Read coordinates from list of pdb files. @param pdbs: list of file names @type pdbs: [str] @param castAll: analyze atom content of each frame for casting (default: 0) @type castAll: 0|1 @return: frames x (N x 3) Numpy array (of float) @rtype: array """ frameList = [] i = 0 atomCast = None if self.verbose: T.errWrite('reading %i pdbs...' % len(pdbs) ) refNames = self.ref.atomNames() ## cache for atom checking for f in pdbs: ## Load m = PDBModel(f) ## compare atom order & content of first frame to reference pdb if castAll or i==0: atomCast, castRef = m.compareAtoms( self.ref ) if castRef != range( len( self.ref ) ): ## we can take away atoms from each frame but not from ref raise TrajError("Reference PDB doesn't match %s." %m.fileName) if N0.all( atomCast == range( len( m ) ) ): atomCast = None ## no casting necessary else: if self.verbose: T.errWrite(' casting ') ## assert that frame fits reference if atomCast: m = m.take( atomCast ) ## additional check on each 100st frame if i%100 == 0 and m.atomNames() <> refNames: raise TrajError("%s doesn't match reference pdb."%m.fileName ) frameList.append( m.xyz ) i += 1 if i%10 == 0 and self.verbose: T.errWrite('#') if self.verbose: T.errWrite( 'done\n' ) ## convert to 3-D Numpy Array return N0.array(frameList).astype(N0.Float32)
class Test(BT.BiskitTest): """Test case""" def test_match2seq(self): """match2seq test""" ## Reading pdb files lig_traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat')[:2] m = [m.compress(m.maskProtein()) for m in lig_traj] ## make the models different m[1].removeRes(['ALA']) mask1, mask2 = compareModels(m[0], m[1]) if self.local: print 'Reading and comparing two models' print '\nResidue masks to make the two maodels equal' print 'mask1\n', mask1 print 'mask2\n', mask2 globals().update(locals()) self.assert_(N.all(mask1 == self.EXPECT[0])) self.assert_(N.all(mask2 == self.EXPECT[1])) def test_sequenceRepeats(self): """match2seq sequence repeat test""" seq1 = 'ABCDEFG~~~~~~~~~~~~~~~' seq2 = '~~~~~' mask1, mask2 = compareSequences(seq1, seq2) self.assert_(N.all(mask1 == N0.zeros(len(seq1)))) self.assert_(N.all(mask2 == N0.zeros(len(seq2)))) EXPECT = N0.array([1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1],N0.Int),\ N0.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],N0.Int)
def removeFrames( self, indices ): """ Remove given frames from this trajectory object. @param indices: frame numbers @type indices: [int] """ i = range( self.lenFrames() ) i.remove( N0.array(indices) ) self.keepFrames( i )
def removeMembers(self, indices): """ Remove given member trajectories from this ensemble. @param indices: trajectory (member) numbers @type indices: [int] """ i = range(self.n_members) i.remove(N0.array(indices)) self.keepMembers(i)
def convertChainIdsCter(self, model, chains): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains ## fetch last atom of given chains index = N0.concatenate((model.chainIndex(), [len(model)])) i = N0.take(index, N0.array(chains) + 1) - 1 ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices(i, breaks=1)
def __getstate__(self): """ Called before pickling the object. """ try: if type( self.frames ) == list or self.frames.dtype.char == 'd': EHandler.warning("Converting coordinates to float array.") self.frames = N0.array( self.frames ).astype(N0.Float32) except: EHandler.warning('Could not convert frames to float array.', 1) return self.__dict__
def markOutliers( traj, z, page ): outliers = N0.nonzero( traj.outliers( z=z, mask=traj.ref.maskCA() ) ) for o in outliers: t = traj.takeMember( o ) ## cross out outliers in plot prof = N0.array( t.profiles['rmsCA_ref'] ).tolist() prof.extend( t.profiles['rmsCA_last'] ) maxV = max( prof ) line = biggles.Line( (0,0), (len(t),maxV) ) page[ o / 2, o % 2 ].add( line )
def markOutliers(traj, z, page): outliers = N0.nonzero(traj.outliers(z=z, mask=traj.ref.maskCA())) for o in outliers: t = traj.takeMember(o) ## cross out outliers in plot prof = N0.array(t.profiles['rmsCA_ref']).tolist() prof.extend(t.profiles['rmsCA_last']) maxV = max(prof) line = biggles.Line((0, 0), (len(t), maxV)) page[o / 2, o % 2].add(line)
def __getslice__( self, a, b ): """ Slice a sparce array:: this[ a : b ] -> SparseArray @return: sliced sparse array @rtype: SparseArray """ shape = ( abs( b - a ), ) + self.shape[1:] result = self.__class__( shape, self.__typecode, self.__default ) pos_low = self.__pos( a ) pos_high = self.__pos( b ) result.put( N0.array( self.indices[pos_low : pos_high] ) - a, self.values[pos_low : pos_high] ) return result
def __read_residueASA(self): """ Read solvent accessibility calculated with WHATIF and return array of ASA-values. First column is the total ASA, second column the ASA of the backbone, the third column is the ASA of the side-chain. @return: array (3 x len(residues)) with ASA values @rtype: array """ ## [14:-27] skip header and tail lines line lines = open(self.f_residueASA).readlines()[14:-27] ASA_values = map(lambda line: string.split(line)[-3:], lines) ASA_values = N0.array( map(lambda row: map(string.atof, row), ASA_values)) return ASA_values
def __read_residueASA( self ): """ Read solvent accessibility calculated with WHATIF and return array of ASA-values. First column is the total ASA, second column the ASA of the backbone, the third column is the ASA of the side-chain. @return: array (3 x len(residues)) with ASA values @rtype: array """ ## [14:-27] skip header and tail lines line lines = open(self.f_residueASA).readlines()[14:-27] ASA_values = map(lambda line: string.split(line)[-3:], lines) ASA_values = N0.array(map(lambda row: map(string.atof, row), ASA_values)) return ASA_values
def store(self, val): """ Analyze distribution data. @param val: array (2 x len(data) ) with start of bin and witdh of bin (default: None) @type val: array """ self.val = N0.array(val, N0.Float32) self.x = self.val[:, 0] self.p = self.val[:, 1] self.delta_x = abs(self.x[0] - self.x[1]) Z = N0.sum(self.p) * self.delta_x self.p /= Z
def __getslice__( self, a, b ): """ Slice a sparce array:: this[ a : b ] -> SparseArray @return: sliced sparse array @rtype: SparseArray """ shape = ( abs( b - a ), ) + self.shape[1:] result = self.__class__( shape, self.__typecode, self.__default ) pos_low = self.__pos( a ) pos_high = self.__pos( b ) result.put( N0.array( self.indices[pos_low : pos_high] ) - a, self.values[pos_low : pos_high] ) return result
def __random_matrix( self ): """ Random rotation matrix. @return: 4 x 4 array of float, random rotation and translation matrix @rtype: array """ r = ma.randomRotation() ## r = N0.array([[1,0,0],[0,1,0],[0,0,1]],'f') t = self.__random_translation() ## create 3 x 4 matrix: 0:3, 0:3 contains rot; 3,0:3 contains trans result = N0.concatenate( (r, N0.transpose( [ t.tolist() ] )), 1) ## make it square result = N0.concatenate( (result, N0.array([[0,0,0,1]], N0.Float32)), 0 ) return result
def random_translations( self, n=1, center=None ): """ n Random translations on a sphere around center with fixed radius. The radius must be given as orbit to __init__. n - int, number of random coordinates to generate center - 3 array of float -> array n x 3 of float """ if center is None: center = self.center xyz = ra.random( (n,3) ) - 0.5 scale = self.orbit*1.0 / N0.sqrt( N0.sum( xyz**2, 1 ) ) r = N0.array( [ scale[i]*xyz[i] for i in range(n) ] ) return r + center
def random_translations(self, n=1, center=None): """ n Random translations on a sphere around center with fixed radius. The radius must be given as orbit to __init__. n - int, number of random coordinates to generate center - 3 array of float -> array n x 3 of float """ if center is None: center = self.center xyz = ra.random((n, 3)) - 0.5 scale = self.orbit * 1.0 / N0.sqrt(N0.sum(xyz**2, 1)) r = N0.array([scale[i] * xyz[i] for i in range(n)]) return r + center
def averageContacts(self, step=10, cutoff=4.5): """ Use:: averageContacts( step=1, cutoff=4.5 ) @param step: take only each |step|th frame (default: 10) @type step: int @param cutoff: distance cutoff in Angstrom (default: 4.5) @type cutoff: float @return: contact matrix with frequency of each contact in (thinned) traj. @rtype: matrix """ r = [ self.atomContacts(i, cutoff=cutoff) for i in range(0, len(self), step) ] return N0.sum(N0.array(r)) / (1. * len(r))
def __setAll_1D(self, a): """ Replace content of this sparseArray with values from Numeric array or list of numbers -- only for 1-dimensional arrays. @param a: array OR list @type a: array OR [ number ] """ if type(a) is list: a = N0.array(a, self.__typecode) if self.shape != a.shape: raise SparseArrayError, 'dimensions not aligned' self.indices = N0.nonzero(N0.logical_not(N0.equal(a, self.__default))) self.indices = self.indices.tolist() self.values = N0.take(a, self.indices) self.values = self.values.tolist()
def __setAll_1D( self, a ): """ Replace content of this sparseArray with values from Numeric array or list of numbers -- only for 1-dimensional arrays. @param a: array OR list @type a: array OR [ number ] """ if type( a ) is list: a = N0.array( a, self.__typecode ) if self.shape != a.shape: raise SparseArrayError, 'dimensions not aligned' self.indices = N0.nonzero( N0.logical_not( N0.equal(a, self.__default) ) ) self.indices = self.indices.tolist() self.values = N0.take( a, self.indices ) self.values = self.values.tolist()
def nondefault( self ): """ Get a 1D list of indices that have a non-default value in a raveled version of this array. If L.default()==0 this would be equivalent to nonzero( ravel( L.toarray() ) ) (except that the Numeric array is never constructed). @return: list of indices with none default values @rtype: [ int ] """ if self.is1D: return self.indices ## multidimensional r = [] len_axis_B = self.shape[1] for (i,a) in zip( self.indices, self.values ): r += (N0.array( a.nondefault() ) + len_axis_B * i ).tolist() return r
def test( model, center2center, nAtoms=10, exclude=None ): from Biskit import Pymoler, PDBModel g = PatchGeneratorFromOrbit( model, center2center ) overlap = int( round( nAtoms / 4.0 ) ) r = g.randomPatches( nAtoms, 500, max_overlap=overlap, exclude=exclude ) profile = N0.sum( N0.array(r) ) pm = Pymoler() pm.addPdb( model, 'all' ) ms = [ model.take( N0.nonzero(mask) ) for mask in r ] pm.addMovie( ms ) return pm
def hmmEmm2Prob( self, nullEmm, emmScore ): """ Convert HMM profile emmisiion scores into emmission probabilities @param nullEmm: null scores @type nullEmm: array @param emmScore: emmission scores @type emmScore: array @return: null and emmission probabilities, for each amino acid in each position @rtype: array( len_seq x 20 ), array( 1 x 20 ) """ ## Null probabilities: prob = 2 ^ (nullEmm / 1000) * 1/len(alphabet) nullProb = N0.power( 2, N0.array( nullEmm )/1000.0 )*(1./20) ## Emmission probabilities: prob = nullProb 2 ^ (nullEmm / 1000) ## see http://www.ebc.ee/WWW/hmmer2-html/node26.html emmProb = nullProb * N0.power( 2, ( emmScore/1000.0) ) return emmProb, nullProb
def parse_result( self ): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists( self.f_out ): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength( self.f_out ) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open( self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob=[] for i in range(1, profileDic['profLength']+1): pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20 e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ] prob += [ e ] profileDic['seqNr'] = N0.transpose( N0.take( prob, (0,),1 ) ) profileDic['emmScore'] = N0.array(prob)[:,1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore']) ent = [ N0.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ] profileDic['ent'] = N0.array(ent) ###### TEST ##### proba = N0.array(prob)[:,1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N0.sum( abs( probabilities ) ) p = proba p2 = [] for i in range( len(p) ) : p2 += [ N0.resize( N0.sum( N0.absolute( p[i] )), N0.shape( p[i] ) ) ] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range( len(p) ) : p_scale = (p[i] - N0.average(p[i]) )/ math.SD(p[i]) p4 += [ N0.resize( p_scale[N0.argmax( N0.array(p_scale) )] , N0.shape( p[i] ) ) ] profileDic['maxAllScale'] = p4 return profileDic
def __list2array( self, lstOrAr ): if type( lstOrAr ) == list: return N0.array( lstOrAr ) return lstOrAr
for xyz in t.frames: result_xyz.append( xyz.astype('f') ) for fname in t.frameNames: result_frameNames.append( fname ) T.flushPrint('#') print " Done" result = Trajectory() result.ref = result_ref result.ref.disconnect() if 'pdb' in o: result.ref.pdbCode = o['pdb'] result.frames = N0.array( result_xyz, 'f' ) result.frameNames = result_frameNames del result_xyz ## too much memory required for this ## result = trajLst[0].concat( *trajLst[1:] ) T.flushPrint("Converting to EnsembleTraj...") result = traj2ensemble( result, len(inLst)) T.flushPrint( "Done\nDumping ensemble traj to " + o['o'] ) T.dump( result, T.absfile( o['o'] ) )