def rmsMatrixByMember( self, mirror=0, step=1 ): """ Get result matrix ordered first by member then by time. (requires EnsembleTraj) @param mirror: mirror matrix at diagonal (only for intra-traj. rms) (default: 0) @type mirror: 0|1 @param step: take only every step frame [1] @type step: int """ intra_traj = self.traj_2 is None m = self.getResult( mirror=intra_traj ) i1 = i2 = self.traj_1.argsortMember( step=step ) if self.traj_2 is not None: i2 = self.traj_2.argsortMember( step=step ) a = N0.take( m, i1, 0 ) a = N0.take( a, i2, 1 ) if intra_traj and not mirror: for i in range( N0.shape(a)[0] ): for j in range( i, N0.shape(a)[1] ): a[j,i] = 0. return a
def rmsMatrixByMember(self, mirror=0, step=1): """ Get result matrix ordered first by member then by time. (requires EnsembleTraj) @param mirror: mirror matrix at diagonal (only for intra-traj. rms) (default: 0) @type mirror: 0|1 @param step: take only every step frame [1] @type step: int """ intra_traj = self.traj_2 is None m = self.getResult(mirror=intra_traj) i1 = i2 = self.traj_1.argsortMember(step=step) if self.traj_2 is not None: i2 = self.traj_2.argsortMember(step=step) a = N0.take(m, i1, 0) a = N0.take(a, i2, 1) if intra_traj and not mirror: for i in range(N0.shape(a)[0]): for j in range(i, N0.shape(a)[1]): a[j, i] = 0. return a
def getResult(self, mirror=0): """ Get result matrix ordered such as input trajectory. @param mirror: mirror the matrix at diagonal (default: 1) (only for intra-traj) @type mirror: 1|0 @return: array( (n_frames, n_frames), 'f'), matrix of pairwise rms @rtype: array """ if self.verbose: self.log.write('assembling result matrix...') intra_traj = self.traj_2 is None n1 = n2 = len(self.traj_1) if self.traj_2 is not None: n2 = len(self.traj_2) a = N0.zeros((n1, n2), N0.Float32) if self.verbose: self.log.write('#') for key, value in self.result.items(): i_start, i_stop = key[0] j_start, j_stop = key[1] window = N0.reshape(value, (i_stop - i_start, j_stop - j_start)) window = window.astype(N0.Float32) a[i_start:i_stop, j_start:j_stop] = window if self.verbose: self.log.write('#') if intra_traj: for i in range(N0.shape(a)[0]): for j in range(i, N0.shape(a)[1]): if a[j, i] == 0: a[j, i] = a[i, j] else: a[i, j] = a[j, i] if self.verbose: self.log.write('#') if intra_traj and not mirror: for i in range(N0.shape(a)[0]): for j in range(i, N0.shape(a)[1]): a[j, i] = 0. if self.verbose: self.log.add('done') return a
def getResult( self, mirror=0 ): """ Get result matrix ordered such as input trajectory. @param mirror: mirror the matrix at diagonal (default: 1) (only for intra-traj) @type mirror: 1|0 @return: array( (n_frames, n_frames), 'f'), matrix of pairwise rms @rtype: array """ if self.verbose: self.log.write('assembling result matrix...') intra_traj = self.traj_2 is None n1 = n2 = len( self.traj_1 ) if self.traj_2 is not None: n2 = len( self.traj_2 ) a = N0.zeros( (n1,n2), N0.Float32 ) if self.verbose: self.log.write('#') for key, value in self.result.items(): i_start, i_stop = key[0] j_start, j_stop = key[1] window = N0.reshape( value, (i_stop-i_start, j_stop-j_start) ) window = window.astype(N0.Float32) a[i_start:i_stop, j_start:j_stop] = window if self.verbose: self.log.write('#') if intra_traj: for i in range( N0.shape(a)[0] ): for j in range( i, N0.shape(a)[1] ): if a[j,i] == 0: a[j,i] = a[i,j] else: a[i,j] = a[j,i] if self.verbose: self.log.write('#') if intra_traj and not mirror: for i in range( N0.shape(a)[0] ): for j in range( i, N0.shape(a)[1] ): a[j,i] = 0. if self.verbose: self.log.add('done') return a
def test_MatrixPlot(self): """MatrixPlot test""" n = 30 z = N0.zeros((n, n), N0.Float) for i in range(N0.shape(z)[0]): for j in range(N0.shape(z)[1]): z[i, j] = N0.exp(-0.01 * ((i - n / 2)**2 + (j - n / 2)**2)) self.p = MatrixPlot(z, palette='sausage', legend=1) if self.local or self.VERBOSITY > 2: self.p.show() self.assert_(self.p is not None)
def __atomContacts(self, cutoff, rec_mask, lig_mask, cache): """ Intermolecular distances below cutoff after applying the two masks. @param cutoff: cutoff for B{atom-atom} contact in \AA @type cutoff: float @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @param cache: cache pairwise atom distance matrix @type cache: 1|0 @return: atom contact matrix, array sum_rec_mask x sum_lig_mask @rtype: array """ ## get atom coordinats as array 3 x all_atoms rec_xyz = self.rec().getXyz() lig_xyz = self.lig().getXyz() ## get pair-wise distances -> atoms_rec x atoms_lig dist = getattr(self, 'pw_dist', None) if dist is None or \ N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ): dist = self.__pairwiseDistances(N0.compress(rec_mask, rec_xyz, 0), N0.compress(lig_mask, lig_xyz, 0)) if cache: self.pw_dist = dist ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig return N0.less(dist, cutoff)
def loadResContacts(self): """ Uncompress residue contact matrix if necessary. @return: dict with contact matrix and parameters OR None @rtype: dict OR None """ ## Backwards compatibility if self.contacts is not None and type(self.contacts) == str: self.contacts = t.load(self.contacts) EHandler.warning("loading old-style pickled contacts.") return self.contacts ## New, uncompression from list of indices into raveled array if self.contacts is not None and \ len( N0.shape( self.contacts['result'])) == 1: try: lenRec, lenLig = self.contacts['shape'] except: EHandler.warning("uncompressing contacts without shape") lenRec = self.rec().lenResidues() lenLig = self.lig().lenResidues() m = N0.zeros(lenRec * lenLig) N0.put(m, self.contacts['result'], 1) self.contacts['result'] = N0.reshape(m, (lenRec, lenLig)) return self.contacts
def test_Analyzer( self): """Dock.Analyzer test """ from Biskit import Trajectory from Biskit.Dock import ComplexList ## create a minimal 1-frame receptor trajectory from a pdb file self.t_rec = Trajectory( [t.testRoot()+'/rec/1A2P.pdb'], verbose=self.local) t.dump( self.t_rec, self.f_out ) ## load a complex list cl = t.load( t.testRoot() + '/dock/hex/complexes.cl') self.a= Analyzer( rec = self.f_out, lig = t.testRoot()+'/lig_pcr_00/traj.dat', ref = t.testRoot()+'/com/ref.complex', verbose = self.local) ## shuffle this list five times shuff_lst = self.a.shuffledLists( 5, range(8) ) ## create two random contact matrices rand_mat = self.a.random_contacts( cl[0].atomContacts(), 2 ) self.assertEqual( N0.shape(rand_mat[1]), (1075, 876) )
def random_contacts( self, contMat, n, maskRec=None, maskLig=None ): """ Create randomized surface contact matrix with same number of contacts and same shape as given contact matrix. @param contMat: template contact matrix @type contMat: matrix @param n: number of matrices to generate @type n: int @param maskRec: surface masks (or something similar) @type maskRec: [1|0] @param maskLig: surface masks (or something similar) @type maskLig: [1|0] @return: list of [n] random contact matricies @rtype: [matrix] """ a,b = N0.shape( contMat ) nContacts = N0.sum( N0.sum( contMat )) if not maskLig: r_size, l_size = N0.shape( contMat ) maskLig = N0.ones( l_size ) maskRec = N0.ones( r_size ) c_mask = N0.ravel( N0.outerproduct( maskRec, maskLig ) ) c_pos = N0.nonzero( c_mask ) # get array with surface positions from complex cont = N0.take( N0.ravel(contMat), c_pos ) length = len( cont ) result = [] for i in range( n ): # create random array ranCont = mathUtils.randomMask( nContacts,length ) # blow up to size of original matrix r = N0.zeros(a*b) N0.put( r, c_pos, ranCont) result += [ N0.reshape( r, (a,b) ) ] return result
def centers(self): """ Get 'center structure' for each cluster. @return: N0.array( n_clusters x n_atoms_masked x 3 ) @rtype: array """ lenAtoms = N0.shape(self.fcCenters)[1] / 3 return N0.reshape(self.fcCenters, (self.n_clusters, lenAtoms, 3))
def slim(self): """ Remove coordinates and atoms of ligand and receptor from memory, if they can be restored from file, compress contact matrix. @note: CALLED BEFORE PICKLING """ self.lig_transformed = None self.pw_dist = None ## self.ligandMatrix = self.ligandMatrix.tolist() if 'matrix' in self.info: del self.info['matrix'] ## compress contact matrix array if self.contacts is not None and \ len(N0.shape( self.contacts['result'] ) )==2: m = self.contacts['result'] self.contacts['shape'] = N0.shape(m) self.contacts['result'] = N0.nonzero(N0.ravel(m)).astype(N0.Int32)
def test_FlexMaster(self): """TrajFlexMaster test""" from Biskit.MatrixPlot import MatrixPlot from numpy.random.mtrand import random_sample as random assert len(hosts.cpus_all) > 0,\ 'Master requires at least 1 PVM node for initialisation.' traj_1 = T.load(T.testRoot() + '/lig_pcr_00/traj.dat') traj_1 = traj2ensemble(traj_1) ## create fake second trajectory by adding ## increasing noise to first frames = [] for i in range(len(traj_1)): f = traj_1.frames[i] d = N0.zeros(N0.shape(f), N0.Float32) if i > 0: d = random(N0.shape(f)) * ((i / 10) + 1) frames += [f + d] traj_2 = traj_1.clone() traj_2.frames = frames master = TrajFlexMaster(traj_1, traj_2, hosts=hosts.cpus_all, show_output=self.local, add_hosts=1, log=None, slaveLog=None, verbose=self.local, only_cross_member=0) r = master.calculateResult(mirror=0) if self.local: p = MatrixPlot(r, palette='plasma2', legend=1) p.show()
def test_FlexMaster(self): """TrajFlexMaster test""" from Biskit.MatrixPlot import MatrixPlot from numpy.random.mtrand import random_sample as random assert len(hosts.cpus_all) > 0,\ 'Master requires at least 1 PVM node for initialisation.' traj_1 = T.load( T.testRoot() + '/lig_pcr_00/traj.dat' ) traj_1 = traj2ensemble( traj_1 ) ## create fake second trajectory by adding ## increasing noise to first frames = [] for i in range( len( traj_1 ) ): f = traj_1.frames[i] d = N0.zeros( N0.shape( f ), N0.Float32) if i > 0: d = random( N0.shape( f ) ) * ((i / 10) + 1) frames += [f + d] traj_2 = traj_1.clone() traj_2.frames = frames master = TrajFlexMaster( traj_1, traj_2, hosts=hosts.cpus_all, show_output= self.local, add_hosts=1, log=None, slaveLog=None, verbose= self.local, only_cross_member=0 ) r = master.calculateResult( mirror=0 ) if self.local: p = MatrixPlot( r, palette='plasma2', legend=1 ) p.show()
def __init__( self, array_or_shape=0, typecode='f', default=0. ): """ Create a sparse array from a normal Numeric array or create an empty sparse array with a certain shape. @param array_or_shape: craeate sparse array from:: ( int, ), shape of array OR int, length of array OR array, numeric (dense) array @type array_or_shape: ( int, ) OR int OR array @param typecode: single char type of values ['f' OR input type] @type typecode: str @param default: value of majority of array content (default: 0.) @type default: any """ self.values = [] self.indices = [] self.__default = default self.__typecode= typecode self.__last_pos = 0 ## cache last position manipulated self.__last_i = 0 ## cache last index manipulated a = array_or_shape atype = type( a ) if atype is tuple: self.shape = a else: if atype is int: self.shape = ( a, ) else: if atype is N0.arraytype or atype is list: self.shape = N0.shape( a ) else: raise SparseArrayError, '%s argument not allowed.' %\ str(atype) self.is1D = len( self.shape ) == 1 if not self.is1D: ## multidimensional self.__default = SparseArray( self.shape[1:], typecode, default ) self.__typecode = 'SA' if atype is N0.arraytype or atype is list : self.__setAll( a )
def __init__(self, array_or_shape=0, typecode='f', default=0.): """ Create a sparse array from a normal Numeric array or create an empty sparse array with a certain shape. @param array_or_shape: craeate sparse array from:: ( int, ), shape of array OR int, length of array OR array, numeric (dense) array @type array_or_shape: ( int, ) OR int OR array @param typecode: single char type of values ['f' OR input type] @type typecode: str @param default: value of majority of array content (default: 0.) @type default: any """ self.values = [] self.indices = [] self.__default = default self.__typecode = typecode self.__last_pos = 0 ## cache last position manipulated self.__last_i = 0 ## cache last index manipulated a = array_or_shape atype = type(a) if atype is tuple: self.shape = a else: if atype is int: self.shape = (a, ) else: if atype is N0.arraytype or atype is list: self.shape = N0.shape(a) else: raise SparseArrayError, '%s argument not allowed.' %\ str(atype) self.is1D = len(self.shape) == 1 if not self.is1D: ## multidimensional self.__default = SparseArray(self.shape[1:], typecode, default) self.__typecode = 'SA' if atype is N0.arraytype or atype is list: self.__setAll(a)
def ramachandran_background(self): """ Creates a background (favoured regions) for a ramachandran plot. @return: list of biggles.Point objects @rtype: [ biggles.Point ] """ bg = [] mat = biggles.read_matrix(T.dataRoot() + '/biggles/ramachandran_bg.dat') x, y = N0.shape(mat) for i in range(x): for j in range(y): if mat[i, j] < 200: a = (360. / y) * j - 180 b = (360. / x) * (x - i) - 180 bg += [biggles.Point(a, b, type="dot")] return bg
def color_array( self, a, resetLimits=1 ): """ @param a: array of float @type a: array of float @param resetLimits: re-define color range on max and min of values (default: 1) @type resetLimits: 1|0 @return: matrix of color codes with same dimensions as a @rtype: array of float """ s = N0.shape( a ) v = N0.ravel( a ) r = self.colors( v, resetLimits=resetLimits ) r = N0.reshape( r, s ) return r
def test_FuzzyCluster( self): """FuzzyCluster test""" import gnuplot as G x1 = R.random_sample((500,2)) x2 = R.random_sample((500,2)) + 1 x3 = R.random_sample((500,2)) + 2 self.x = N0.concatenate((x1, x2, x3)) self.fuzzy = FuzzyCluster(self.x, n_cluster=5, weight=1.5) self.centers = self.fuzzy.go(1.e-30, n_iterations=50, nstep=10, verbose=self.local) if self.local: print "cluster centers are displayed in green" G.scatter( self.x, self.centers ) self.assertEqual( N0.shape(self.centers), (5, 2) )
def __init__(self, data, n_cluster, weight, seedx = 0, seedy = 0): """ @param data: cluster this @type data: [float] OR array @param n_cluster: number of clusters @type n_cluster: int @param weight: fuzziness weigth @type weight: float @param seedx: random seed value for RandomArray.seed (default: 0) @type seedx: int OR 0 @param seedy: random seed value for RandomArray.seed (default: 0, set seed from clock) @type seedy: int OR 0 """ self.data = N0.array(data, N0.Float) self.w = weight self.n_cluster = n_cluster self.npoints, self.dimension = N0.shape(data) self.seedx = seedx self.seedy = seedy
def thin(self, step=1): """ Keep only each step'th frame from trajectory with 10 ensemble members. @param step: 1..keep all frames, 2..skip first and every second, .. (default: 1) @type step: int @return: reduced EnsembleTraj @rtype: EnsembleTraj """ T.ensure(step, int, forbidden=[0]) ## 10 x lenFrames/10, frame indices of each member mI = [self.memberIndices(i) for i in range(self.n_members)] mI = N0.array(mI) mI = N0.take(mI, range(-1, N0.shape(mI)[1], step)[1:], 1) mI = N0.transpose(mI) return self.takeFrames(N0.ravel(mI))
def parse_result( self ): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists( self.f_out ): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength( self.f_out ) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open( self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob=[] for i in range(1, profileDic['profLength']+1): pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20 e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ] prob += [ e ] profileDic['seqNr'] = N0.transpose( N0.take( prob, (0,),1 ) ) profileDic['emmScore'] = N0.array(prob)[:,1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore']) ent = [ N0.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ] profileDic['ent'] = N0.array(ent) ###### TEST ##### proba = N0.array(prob)[:,1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N0.sum( abs( probabilities ) ) p = proba p2 = [] for i in range( len(p) ) : p2 += [ N0.resize( N0.sum( N0.absolute( p[i] )), N0.shape( p[i] ) ) ] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range( len(p) ) : p_scale = (p[i] - N0.average(p[i]) )/ math.SD(p[i]) p4 += [ N0.resize( p_scale[N0.argmax( N0.array(p_scale) )] , N0.shape( p[i] ) ) ] profileDic['maxAllScale'] = p4 return profileDic
def __init__(self, matrix, mesh=0, palette="plasma", legend=0, step=1, vmin=None, vmax=None): """ @param matrix: the 2-D array to plot @type matrix: array @param mesh: create a plot with a dotted mesh @type mesh: 1|0 @param palette: color palette name see L{Biskit.ColorSpectrum} @type palette: str @param legend: create a legend (scale) showing the walues of the different colors in the plot. @type legend: 1|0 @param step: reduce matrix -- take only each step position in x and y @type step: int @param vmin: override minimal value, all values below will revert to default color @return: biggles plot object, view with biggles.FramedPlot.show() or save with biggles.FramedPlot.write_eps(file_name). @rtype: biggles.FramedPlot """ if not biggles: raise ImportError, 'biggles module could not be imported.' FramedPlot.__init__(self) if step != 1: matrix = self.__thinarray(matrix, step) if vmin is None: vmin = N0.amin(matrix) if vmax is None: vmax = N0.amax(matrix) self.palette = ColorSpectrum(palette, vmin=vmin, vmax=vmax) self.matrix = self.palette.color_array(matrix, resetLimits=0) s = N0.shape(self.matrix) for i in range(s[0]): for j in range(s[1]): col = self.matrix[i, j] x1 = (j, j + 1) y1 = (i, i) y2 = (i + 1, i + 1) cell = biggles.FillBetween(x1, y1, x1, y2, color=col) self.add(cell) if mesh: for i in range(s[0] + 1): self.add(biggles.LineY(i, linetype='dotted')) for i in range(s[1] + 1): self.add(biggles.LineX(i, linetype='dotted')) if legend: legend = self.__make_legend() self.add(legend) self.add(biggles.PlotBox((-0.17, -0.1), (1.25, 1.1))) self.aspect_ratio = 1.0
def parse_result(self): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists(self.f_out): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength(self.f_out) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open(self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+' * 20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob = [] for i in range(1, profileDic['profLength'] + 1): pattern = "[ ]+%i" % i + "[ ]+[-0-9]+" * 20 e = [float(j) for j in string.split(re.findall(pattern, out)[0])] prob += [e] profileDic['seqNr'] = N0.transpose(N0.take(prob, (0, ), 1)) profileDic['emmScore'] = N0.array(prob)[:, 1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob(nullEmm, profileDic['emmScore']) ent = [ N0.resize(self.entropy(e, nullProb), (1, 20))[0] for e in emmProb ] profileDic['ent'] = N0.array(ent) ###### TEST ##### proba = N0.array(prob)[:, 1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N0.sum( abs( probabilities ) ) p = proba p2 = [] for i in range(len(p)): p2 += [N0.resize(N0.sum(N0.absolute(p[i])), N0.shape(p[i]))] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range(len(p)): p_scale = (p[i] - N0.average(p[i])) / math.SD(p[i]) p4 += [ N0.resize(p_scale[N0.argmax(N0.array(p_scale))], N0.shape(p[i])) ] profileDic['maxAllScale'] = p4 return profileDic
def test_getXyz(self): """AmberRstParser.getXyz test""" self.xyz = self.p.getXyz() self.assertEqual(N0.shape(self.xyz), (11200, 3))
def lenAtoms( self ): """ @return: number of atoms in frames @rtype: int """ return N0.shape( self.frames )[1]