def histogram(data, nbins, range = None): """ Comes from Konrad Hinsen: Scientific Python """ data = Numeric.array(data, Numeric.Float) if range is None: min = Numeric.minimum.reduce(data) max = Numeric.maximum.reduce(data) else: min, max = range data = Numeric.repeat(data, Numeric.logical_and(Numeric.less_equal(data, max), Numeric.greater_equal(data, min))) # end if bin_width = (max-min)/nbins data = Numeric.floor((data - min)/bin_width).astype(Numeric.Int) histo = Numeric.add.reduce(Numeric.equal( Numeric.arange(nbins)[:,Numeric.NewAxis], data), -1) histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data)) bins = min + bin_width*(Numeric.arange(nbins)+0.5) return Numeric.transpose(Numeric.array([bins, histo]))
def histogram(data, nbins, range=None): """ Create a histogram. Comes from Konrad Hinsen: Scientific Python @param data: data list or array @type data: [any] @param nbins: number of bins @type nbins: int @param range: data range to create histogram from (min val, max val) @type range: (float, float) OR None @return: array (2 x len(data) ) with start of bin and witdh of bin. @rtype: array """ data = Numeric.array(data, Numeric.Float) if range is None: min = Numeric.minimum.reduce(data) max = Numeric.maximum.reduce(data) else: min, max = range data = Numeric.repeat( data, Numeric.logical_and(Numeric.less_equal(data, max), Numeric.greater_equal(data, min))) bin_width = (max - min) / nbins data = Numeric.floor((data - min) / bin_width).astype(Numeric.Int) histo = Numeric.add.reduce( Numeric.equal(Numeric.arange(nbins)[:, Numeric.NewAxis], data), -1) histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data)) bins = min + bin_width * (Numeric.arange(nbins) + 0.5) return Numeric.transpose(Numeric.array([bins, histo]))
def histogram(data, nbins, range = None): """ Create a histogram. Comes from Konrad Hinsen: Scientific Python @param data: data list or array @type data: [any] @param nbins: number of bins @type nbins: int @param range: data range to create histogram from (min val, max val) @type range: (float, float) OR None @return: array (2 x len(data) ) with start of bin and witdh of bin. @rtype: array """ data = Numeric.array(data, Numeric.Float) if range is None: min = Numeric.minimum.reduce(data) max = Numeric.maximum.reduce(data) else: min, max = range data = Numeric.repeat(data, Numeric.logical_and(Numeric.less_equal(data, max), Numeric.greater_equal(data, min))) bin_width = (max-min)/nbins data = Numeric.floor((data - min)/bin_width).astype(Numeric.Int) histo = Numeric.add.reduce(Numeric.equal( Numeric.arange(nbins)[:,Numeric.NewAxis], data), -1) histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data)) bins = min + bin_width*(Numeric.arange(nbins)+0.5) return Numeric.transpose(Numeric.array([bins, histo]))
def histogram(data, nbins, range=None): """ Comes from Konrad Hinsen: Scientific Python """ data = Numeric.array(data, Numeric.Float) if range is None: min = Numeric.minimum.reduce(data) max = Numeric.maximum.reduce(data) else: min, max = range data = Numeric.repeat( data, Numeric.logical_and(Numeric.less_equal(data, max), Numeric.greater_equal(data, min))) # end if bin_width = (max - min) / nbins data = Numeric.floor((data - min) / bin_width).astype(Numeric.Int) histo = Numeric.add.reduce( Numeric.equal(Numeric.arange(nbins)[:, Numeric.NewAxis], data), -1) histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data)) bins = min + bin_width * (Numeric.arange(nbins) + 0.5) return Numeric.transpose(Numeric.array([bins, histo]))
def onDataInput(self, structuredData): """handles input data; sets self.dataStructure, self.numExamples, self.numVariables and self.ps; updates info, calls updateAnovaTypeBox(), runs ANOVA and sends out new data. """ self.dataStructure = structuredData self.numExamples = 0 self.numVariables = 0 self.ps = Numeric.ones((3,0), Numeric.Float) if structuredData: numFiles = reduce(lambda a,b: a+len(b[1]), structuredData, 0) lenSD = len(structuredData) self.infoa.setText("%d set%s, total of %d data file%s." % (lenSD, ["","s"][lenSD!=1], numFiles, ["","s"][numFiles!=1])) numExamplesList = [] numVariablesList = [] # construct a list of ExampleTable lengths and a list of number of variables for (name, etList) in structuredData: for et in etList: numExamplesList.append(len(et)) numVariablesList.append(len(et.domain.variables)) # test that all ExampleTables consist of equal number of examples and variables if len(numExamplesList) == 0 or Numeric.add.reduce(Numeric.equal(numExamplesList, numExamplesList[0])) != len(numExamplesList): self.dataStructure = None self.numExamples = -1 self.infob.setText("Error: data files contain unequal number of examples, aborting ANOVA computation.") self.infoc.setText('') elif len(numVariablesList) == 0 or Numeric.add.reduce(Numeric.equal(numVariablesList, numVariablesList[0])) != len(numVariablesList): self.dataStructure = None self.numVariables = -1 self.infob.setText("Error: data files contain unequal number of variables, aborting ANOVA computation.") self.infoc.setText('') else: self.numExamples = numExamplesList[0] self.numVariables = numVariablesList[0] self.infob.setText("%d variable%s, %d example%s in each file." % (self.numVariables, ["","s"][self.numVariables!=1], self.numExamples, ["","s"][self.numExamples!=1])) if self.numExamples > 0: self.infoc.setText('Press Commit button to start ANOVA computation.') else: self.infoc.setText('') self.boxAnovaType.setEnabled(1) self.boxSelection.setEnabled(1) self.btnCommit.setEnabled(True) else: self.infoa.setText('No data on input.') self.infob.setText('') self.infoc.setText('') # enable/disable anova type selection depending on the type of input data self.updateAnovaTypeBox() self.updateSelectorBox() if self.autoUpdateSelName: self.updateSelectorName() # run ANOVA if self.commitOnChange: self.runANOVA() self.senddata() self.updateSelectorInfos()
def residusMaximus(self, atomValues, mask=None): """ Take list of value per atom, return list where all atoms of any residue are set to the highest value of any atom in that residue. (after applying mask) @param atomValues: list 1 x N, values per atom @type atomValues: [ float ] @param mask: list 1 x N, 0|1, 'master' atoms of each residue @type mask: [1|0] @return: Numpy array 1 x N of float @rtype: array """ if mask is None: mask = N.ones(len(self.frames[0]), N.int32) ## eliminate all values that do not belong to the selected atoms masked = atomValues * mask result = [] ## set all atoms of each residue to uniform value for res in range(0, self.resMap()[-1] + 1): ## get atom entries for this residue resAtoms = N.compress(N.equal(self.resMap(), res), masked) ## get maximum value masterValue = max(resAtoms) result += resAtoms * 0.0 + masterValue return N.array(result)
def __init__(self, elements, nocheck = None): self.array = Numeric.array(elements) if nocheck is None: if not Numeric.logical_and.reduce( Numeric.equal(Numeric.array(self.array.shape), 3)): raise ValueError, 'Tensor must have length 3 along any axis' self.rank = len(self.array.shape)
def residusMaximus( self, atomValues, mask=None ): """ Take list of value per atom, return list where all atoms of any residue are set to the highest value of any atom in that residue. (after applying mask) @param atomValues: list 1 x N, values per atom @type atomValues: [ float ] @param mask: list 1 x N, 0|1, 'master' atoms of each residue @type mask: [1|0] @return: Numpy array 1 x N of float @rtype: array """ if mask is None: mask = N.ones( len( self.frames[0] ), N.int32 ) ## eliminate all values that do not belong to the selected atoms masked = atomValues * mask result = [] ## set all atoms of each residue to uniform value for res in range( 0, self.resMap()[-1]+1 ): ## get atom entries for this residue resAtoms = N.compress( N.equal( self.resMap(), res ), masked ) ## get maximum value masterValue = max( resAtoms ) result += resAtoms * 0.0 + masterValue return N.array( result )
def permutInverse(n): """Returns inverse permutation given integers in range(len(n)), such that permitInverse(permutInverse(range(4)))==range(4). """ n = Numeric.asarray(n) pInv = Numeric.argsort(n) assert Numeric.all(Numeric.equal(n, Numeric.argsort(pInv))), "Inverse not successful; input should be permutation of range(len(input))." return pInv
def dotMA(a, b): """Returns dot-product for MA arrays; fixed masked values. """ a = MA.asarray(a) b = MA.asarray(b) ab = MA.dot(a,b) # fix masked values in ab (MA.dot returns 0 instead of MA.masked) nonMasked = Numeric.dot(1-MA.getmaskarray(a).astype(Numeric.Int), 1-MA.getmaskarray(b).astype(Numeric.Int)) return MA.where(Numeric.equal(nonMasked,0), MA.masked, ab)
def __init__(self, crv1, crv2): if not isinstance(crv1, Crv.Crv): raise NURBSError, 'Parameter crv1 not derived from Crv class!' if not isinstance(crv2, Crv.Crv): raise NURBSError, 'Parameter crv2 not derived from Crv class!' # ensure both curves have a common degree d = max(crv1.degree, crv2.degree) crv1.degelev(d - crv1.degree) crv2.degelev(d - crv2.degree) # merge the knot vectors, to obtain a common knot vector k1 = crv1.uknots k2 = crv2.uknots ku = [] for item in k1: if not numerix.sometrue(numerix.equal(k2, item)): if item not in ku: ku.append(item) for item in k2: if not numerix.sometrue(numerix.equal(k1, item)): if item not in ku: ku.append(item) ku = numerix.sort(numerix.asarray(ku, numerix.Float)) n = ku.shape[0] ka = numerix.array([], numerix.Float) kb = numerix.array([], numerix.Float) for i in range(0, n): i1 = numerix.compress(numerix.equal(k1, ku[i]), k1).shape[0] i2 = numerix.compress(numerix.equal(k2, ku[i]), k2).shape[0] m = max(i1, i2) ka = numerix.concatenate((ka, ku[i] * numerix.ones( (m - i1, ), numerix.Float))) kb = numerix.concatenate((kb, ku[i] * numerix.ones( (m - i2, ), numerix.Float))) crv1.kntins(ka) crv2.kntins(kb) coefs = numerix.zeros((4, crv1.cntrl.shape[1], 2), numerix.Float) coefs[:, :, 0] = crv1.cntrl coefs[:, :, 1] = crv2.cntrl Srf.__init__(self, coefs, crv1.uknots, [0., 0., 1., 1.])
def kNNimputeMA(arr2d, K=20, callback=None): """Returns a new 2D MA.array with missing values imputed from K nearest neighbours. Find K rows (axis 0) with the most similar values where similarity measure corresponds to weighted Euclidean distance. Imputed value = weighted average of the corresponding values of K nearest neighbours, where weights equal to tricubic distribution of distances to all rows. Impute missing rows by average over all rows. Version: 30.8.2005 """ arr2d = MA.asarray(arr2d) assert len(arr2d.shape) == 2, "2D array expected" # make a copy for imputation aImp2 = MA.array(arr2d) # leave out columns with 0 known values (columnInd: non-zero columns) columnCond = Numeric.greater(MA.count(arr2d, axis=0), 0) columnIndAll = Numeric.arange(arr2d.shape[1]) columnInd = Numeric.compress(columnCond, columnIndAll) # impute the rows where 0 < #known_values < #non_zero_columns, i.e. exclude the rows with 0 and all (non-zero-column) values countByRows = MA.count(arr2d, axis=1) for rowIdx in Numeric.compress(Numeric.logical_and(Numeric.greater(countByRows, 0), Numeric.less(countByRows, columnInd.shape[0])), Numeric.arange(arr2d.shape[0])): rowResized = MA.resize(arr2d[rowIdx], arr2d.shape) diff = arr2d - rowResized distances = MA.sqrt(MA.add.reduce((diff)**2, 1) / MA.count(diff, axis=1)) # nearest neighbours row indices (without the current row index) indSorted = MA.argsort(distances)[1:] distSorted = distances.take(indSorted) # number of distances different from MA.masked numNonMasked = distSorted.shape[0] - Numeric.add.reduce(Numeric.asarray(MA.getmaskarray(distSorted), Numeric.Int)) # number of distances to account for (K or less) if numNonMasked > 1: weightsSorted = MA.power(1-MA.power(distSorted/distSorted[numNonMasked-1],3),3) # tricubic distribution of all weights else: weightsSorted = Numeric.ones(distSorted.shape[0]) # compute average for each column separately in order to account for K non-masked values colInd4CurrRow = Numeric.compress(Numeric.logical_and(MA.getmaskarray(arr2d[rowIdx]), columnCond), columnIndAll) for colIdx in colInd4CurrRow: # column values sorted by distances columnVals = arr2d[:,colIdx].take(indSorted) # take only those weights where columnVals does not equal MA.masked weightsSortedCompressed = MA.compress(1-MA.getmaskarray(columnVals), weightsSorted) # impute from K (or possibly less) values aImp2[rowIdx,colIdx] = MA.average(columnVals.compressed()[:K], weights=weightsSortedCompressed[:K]) if callback: callback() # impute the unknown rows with average profile avrgRow = MA.average(arr2d, 0) for rowIdx in Numeric.compress(Numeric.equal(countByRows, 0), Numeric.arange(arr2d.shape[0])): aImp2[rowIdx] = avrgRow if callback: callback() return aImp2
def __init__(self, crv1, crv2): if not isinstance(crv1, Crv.Crv): raise NURBSError, 'Parameter crv1 not derived from Crv class!' if not isinstance(crv2, Crv.Crv): raise NURBSError, 'Parameter crv2 not derived from Crv class!' # ensure both curves have a common degree d = max(crv1.degree, crv2.degree) crv1.degelev(d - crv1.degree) crv2.degelev(d - crv2.degree) # merge the knot vectors, to obtain a common knot vector k1 = crv1.uknots k2 = crv2.uknots ku = [] for item in k1: if not numerix.sometrue(numerix.equal(k2, item)): if item not in ku: ku.append(item) for item in k2: if not numerix.sometrue(numerix.equal(k1, item)): if item not in ku: ku.append(item) ku = numerix.sort(numerix.asarray(ku, numerix.Float)) n = ku.shape[0] ka = numerix.array([], numerix.Float) kb = numerix.array([], numerix.Float) for i in range(0, n): i1 = numerix.compress(numerix.equal(k1, ku[i]), k1).shape[0] i2 = numerix.compress(numerix.equal(k2, ku[i]), k2).shape[0] m = max(i1, i2) ka = numerix.concatenate((ka , ku[i] * numerix.ones((m - i1,), numerix.Float))) kb = numerix.concatenate((kb , ku[i] * numerix.ones((m - i2,), numerix.Float))) crv1.kntins(ka) crv2.kntins(kb) coefs = numerix.zeros((4, crv1.cntrl.shape[1], 2), numerix.Float) coefs[:,:,0] = crv1.cntrl coefs[:,:,1] = crv2.cntrl Srf.__init__(self, coefs, crv1.uknots, [0., 0., 1., 1.])
def anova2(self, ma3d, groupLens, addInteraction, repMeasuresOnA, callback): """Conducts two-way ANOVA on individual examples; returns a Numeric array of p-values in shape (2, numExamples) or (3, numExamples), depending whether we test for interaction; Note: levels of factors A and B that cause empty cells are removed prior to conducting ANOVA. """ groupLens = Numeric.asarray(groupLens) # arrays to store p-vals if addInteraction: ps = Numeric.ones((3, ma3d.shape[0]), Numeric.Float) else: ps = Numeric.ones((2, ma3d.shape[0]), Numeric.Float) # decide between non-repeated / repeated measures ANOVA for factor time if repMeasuresOnA: fAnova = Anova.AnovaRM12LR else: fAnova = Anova.Anova2wayLR # check for empty cells for all genes at once and remove them tInd2rem = [] ax2Ind = Numeric.concatenate(([0], Numeric.add.accumulate(groupLens))) for aIdx in range(ma3d.shape[1]): for rIdx in range(groupLens.shape[0]): if Numeric.add.reduce(MA.count(ma3d[:,aIdx,ax2Ind[rIdx]:ax2Ind[rIdx+1]],1)) == 0: tInd2rem.append(aIdx) break if len(tInd2rem) > 0: print "Warning: removing time indices %s for all genes" % (str(tInd2rem)) tInd2keep = range(ma3d.shape[1]) for aIdx in tInd2rem: tInd2keep.remove(aIdx) ma3d = ma3d.take(tInd2keep, 1) # for each gene... for eIdx in range(ma3d.shape[0]): # faster check for empty cells for that gene -> remove time indices with empty cells ma2d = ma3d[eIdx] cellCount = MA.zeros((ma2d.shape[0], groupLens.shape[0]), Numeric.Int) for g,(i0,i1) in enumerate(zip(ax2Ind[:-1], ax2Ind[1:])): cellCount[:,g] = MA.count(ma2d[:,i0:i1], 1) ma2dTakeInd = Numeric.logical_not(Numeric.add.reduce(Numeric.equal(cellCount,0),1)) # 1 where to take, 0 where not to take if Numeric.add.reduce(ma2dTakeInd) != ma2dTakeInd.shape[0]: print "Warning: removing time indices %s for gene %i" % (str(Numeric.compress(ma2dTakeInd == 0, Numeric.arange(ma2dTakeInd.shape[0]))), eIdx) ma2d = MA.compress(ma2dTakeInd, ma2d, 0) an = fAnova(ma2d, groupLens, addInteraction, allowReductA=True, allowReductB=True) ps[:,eIdx] = an.ps callback() return ps
def __setAll_1D(self, a): """ Replace content of this sparseArray with values from Numeric array or list of numbers -- only for 1-dimensional arrays. @param a: array OR list @type a: array OR [ number ] """ if type(a) is list: a = N.array(a, self.__typecode) if self.shape != a.shape: raise SparseArrayError, 'dimensions not aligned' self.indices = N.nonzero(N.logical_not(N.equal(a, self.__default))) self.indices = self.indices.tolist() self.values = N.take(a, self.indices) self.values = self.values.tolist()
def __setAll_1D( self, a ): """ Replace content of this sparseArray with values from Numeric array or list of numbers -- only for 1-dimensional arrays. @param a: array OR list @type a: array OR [ number ] """ if type( a ) is list: a = N.array( a, self.__typecode ) if self.shape != a.shape: raise SparseArrayError, 'dimensions not aligned' self.indices = N.nonzero( N.logical_not( N.equal(a, self.__default) ) ) self.indices = self.indices.tolist() self.values = N.take( a, self.indices ) self.values = self.values.tolist()
def test_Ramachandran(self): """Ramachandran test""" self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat') self.traj.ref.atoms.set('mass', self.traj.ref.masses()) self.mdl = [self.traj[0], self.traj[11]] self.mdl = [md.compress(md.maskProtein()) for md in self.mdl] self.rama = Ramachandran(self.mdl, name='test', profileName='mass', verbose=self.local) self.psi = N.array(self.rama.psi) if self.local: self.rama.show() r = N.sum(N.compress(N.logical_not(N.equal(self.psi, None)), self.psi)) self.assertAlmostEqual(r, -11717.909796797909, 2)
def memberFrames( self, threshold=0. ): """ Get indices of all frames belonging to each cluster. Each frame is guaranteed to belong, at least, to the cluster for which it has its maximum membership. If threshold > 0, it can additionally pop up in other clusters. @param threshold: minimal cluster membership or 0 to consider only max membership (default: 0) @type threshold: float @return: n_cluster, lst of lst of int, frame indices @rtype: [[int]] """ ## best cluster for each frame msm = self.memberships() maxMemb = N.argmax( msm, 0 ) r = [N.nonzero( N.equal(maxMemb, i) ) for i in range(0, self.n_clusters)] r = [ x.tolist() for x in r ] ## same thing but now taking all above threshold ## -> same frame can end up in several clusters if threshold > 0.: r2 = [ N.nonzero( N.greater( l, threshold) ) for l in msm ] ## add only additional frames for i in range(0, len( r ) ): try: frames = r[i].tolist() except: frames = r[i] r[i] = frames + [ fr for fr in r2[i] if fr not in r[i] ] ## sort frames within each cluster by their membership r = [ self.membershipSort( r[i], i) for i in range(0, len(r) )] return r
def memberFrames(self, threshold=0.): """ Get indices of all frames belonging to each cluster. Each frame is guaranteed to belong, at least, to the cluster for which it has its maximum membership. If threshold > 0, it can additionally pop up in other clusters. @param threshold: minimal cluster membership or 0 to consider only max membership (default: 0) @type threshold: float @return: n_cluster, lst of lst of int, frame indices @rtype: [[int]] """ ## best cluster for each frame msm = self.memberships() maxMemb = N.argmax(msm, 0) r = [N.nonzero(N.equal(maxMemb, i)) for i in range(0, self.n_clusters)] r = [x.tolist() for x in r] ## same thing but now taking all above threshold ## -> same frame can end up in several clusters if threshold > 0.: r2 = [N.nonzero(N.greater(l, threshold)) for l in msm] ## add only additional frames for i in range(0, len(r)): try: frames = r[i].tolist() except: frames = r[i] r[i] = frames + [fr for fr in r2[i] if fr not in r[i]] ## sort frames within each cluster by their membership r = [self.membershipSort(r[i], i) for i in range(0, len(r))] return r
def __init__(self, u1, u2, v1, v2): if not isinstance(u1, Crv.Crv): raise NURBSError, 'Parameter u1 not derived from Crv class!' if not isinstance(u2, Crv.Crv): raise NURBSError, 'Parameter u2 not derived from Crv class!' if not isinstance(v1, Crv.Crv): raise NURBSError, 'Parameter v1 not derived from Crv class!' if not isinstance(v2, Crv.Crv): raise NURBSError, 'Parameter v2 not derived from Crv class!' r1 = Ruled(u1, u2) r2 = Ruled(v1, v2) r2.swapuv() t = Bilinear(u1.cntrl[:, 0], u1.cntrl[:, -1], u2.cntrl[:, 0], u2.cntrl[:, -1]) # Raise all surfaces to a common degree du = max(r1.degree[0], r2.degree[0], t.degree[0]) dv = max(r1.degree[1], r2.degree[1], t.degree[1]) r1.degelev(du - r1.degree[0], dv - r1.degree[1]) r2.degelev(du - r2.degree[0], dv - r2.degree[1]) t.degelev(du - t.degree[0], dv - t.degree[1]) # Merge the knot vectors, to obtain a common knot vector # uknots: k1 = r1.uknots k2 = r2.uknots k3 = t.uknots k = [] for item in k1: if not numerix.sometrue(numerix.equal(k2, item)): if not numerix.sometrue(numerix.equal(k3, item)): if item not in k: k.append(item) for item in k2: if not numerix.sometrue(numerix.equal(k1, item)): if not numerix.sometrue(numerix.equal(k3, item)): if item not in k: k.append(item) for item in k3: if not numerix.sometrue(numerix.equal(k1, item)): if not numerix.sometrue(numerix.equal(k2, item)): if item not in k: k.append(item) k = numerix.sort(numerix.asarray(k, numerix.Float)) n = k.shape[0] kua = numerix.array([], numerix.Float) kub = numerix.array([], numerix.Float) kuc = numerix.array([], numerix.Float) for i in range(0, n): i1 = numerix.compress(numerix.equal(k1, k[i]), k1).shape[0] i2 = numerix.compress(numerix.equal(k2, k[i]), k2).shape[0] i3 = numerix.compress(numerix.equal(k3, k[i]), k3).shape[0] m = max(i1, i2, i3) kua = numerix.concatenate((kua, k[i] * numerix.ones( (m - i1, ), numerix.Float))) kub = numerix.concatenate((kub, k[i] * numerix.ones( (m - i2, ), numerix.Float))) kuc = numerix.concatenate((kuc, k[i] * numerix.ones( (m - i3, ), numerix.Float))) # vknots: k1 = r1.vknots k2 = r2.vknots k3 = t.vknots k = [] for item in k1: if not numerix.sometrue(numerix.equal(k2, item)): if not numerix.sometrue(numerix.equal(k3, item)): if item not in k: k.append(item) for item in k2: if not numerix.sometrue(numerix.equal(k1, item)): if not numerix.sometrue(numerix.equal(k3, item)): if item not in k: k.append(item) for item in k3: if not numerix.sometrue(numerix.equal(k1, item)): if not numerix.sometrue(numerix.equal(k2, item)): if item not in k: k.append(item) k = numerix.sort(numerix.asarray(k, numerix.Float)) n = k.shape[0] kva = numerix.array([], numerix.Float) kvb = numerix.array([], numerix.Float) kvc = numerix.array([], numerix.Float) for i in range(0, n): i1 = numerix.compress(numerix.equal(k1, k[i]), k1).shape[0] i2 = numerix.compress(numerix.equal(k2, k[i]), k2).shape[0] i3 = numerix.compress(numerix.equal(k3, k[i]), k3).shape[0] m = max(i1, i2, i3) kva = numerix.concatenate((kva, k[i] * numerix.ones( (m - i1, ), numerix.Float))) kvb = numerix.concatenate((kvb, k[i] * numerix.ones( (m - i2, ), numerix.Float))) kvc = numerix.concatenate((kvc, k[i] * numerix.ones( (m - i3, ), numerix.Float))) r1.kntins(kua, kva) r2.kntins(kub, kvb) t.kntins(kuc, kvc) coefs = numerix.zeros((4, t.cntrl.shape[1], t.cntrl.shape[2]), numerix.Float) coefs[ 0, :, :] = r1.cntrl[0, :, :] + r2.cntrl[0, :, :] - t.cntrl[0, :, :] coefs[ 1, :, :] = r1.cntrl[1, :, :] + r2.cntrl[1, :, :] - t.cntrl[1, :, :] coefs[ 2, :, :] = r1.cntrl[2, :, :] + r2.cntrl[2, :, :] - t.cntrl[2, :, :] coefs[ 3, :, :] = r1.cntrl[3, :, :] + r2.cntrl[3, :, :] - t.cntrl[3, :, :] Srf.__init__(self, coefs, r1.uknots, r1.vknots)
def rgrd(self, dataIn, missingValueIn, missingMatch, logYes = 'yes', positionIn = None, missingValueOut = None): """ #--------------------------------------------------------------------------------- # # PURPOSE: To perform all the tasks required to regrid the input data, dataIn, into the ouput data, # dataout along the level dimension only. # # DEFINITION: # # def rgrd(self, dataIn, missingValueIn, missingMatch, positionIn = None, missingValueOut = None): # # # PASSED : dataIn -- data to regrid # # missingValueIn -- the missing data value to use in setting missing in the mask. It is required # and there are two choices: # None -- there is no missing data # A number -- the value to use in the search for possible missing data. # The presence of missing data at a grid point leads to recording 0.0 in the mask. # # missingMatch -- the comparison scheme used in searching for missing data in dataIn using the value passed # in as missingValueIn. The choices are: # None -- used if None is the entry for missingValueIn # exact -- used if missingValue is the exact value from the file # greater -- the missing data value is equal to or greater than missingValueIn # less -- the missing data value is equal to or less than missingValueIn # # logYes -- choose the level regrid as linear in log of level or linear in level. Set to # 'yes' for log. Anything else is linear in level. # # # # positionIn -- a tuple with the numerical position of the dimensions # in C or Python order specified in the sequence longitude, # latitude, level and time. Longitude, latitude and level are # required. If time is missing submit None in its slot in the # tuple. Notice that the length of the tuple is always four. # # Explicitly, in terms of the shape of dataIn as returned by Python's shape function # # positionIn[0] contains the position of longitude in dataIn # positionIn[1] contains the position of latitude in dataIn # positionIn[2] contains the position of level in dataIn or None # positionIn[3] contains the position of time in dataIn or None # # As examples: # If the C order shape of 4D data is # (number of longitudes, number of times, number of levels, number of latitudes) # submit # (0, 3, 2, 1) # # If the C order shape of 3D data is # (number of longitudes, number of times, number oflatitudes) # submit # (0, 2, 1, None) # # Send in None if the shape is a subset of (time, level, # latitude, longitude) which is evaluated as follows: # 3D -- code assumes (2,1,0,None) # 4D -- code assumes (3,2,1,0) # # missingValueOut -- the value for the missing data used in writing the output data. If left at the # default entry, None, the code uses missingValueIn if present or as a last resort # 1.0e20 # # # RETURNED : dataOut -- the regridded data # # # USAGE: # # Example 1. To regrid dataIn into dataOut using all the defaults where None, None signifies no # missing data. # dataOut = x.rgrd(dataIn, None, None) # # Example 2. To regrid dataIn into dataOut using 1.0e20 and greater as the missing data # # dataOut = x.rgrd(dataIn, 1.e20, 'greater') # #---------------------------------------------------------------------------------------------------------------------""" # check the required input -- dataIn, missingValueIn and missingMatch # make sure that dataIn is an array try: z = len(dataIn) except TypeError: sendmsg('Error in calling the rgrd method -- dataIn must be an array') raise TypeError # check the missingValueIn pass if missingValueIn != None: try: z = abs(missingValueIn) except TypeError: sendmsg('Error in calling the rgrd method -- missingvalueIn must be None or a number. Now it is ', missingValueIn) raise TypeError # check the missingMatch pass missingPossibilities = ['greater', 'equal', 'less', None] if missingMatch not in missingPossibilities: msg = 'Error in missingMatch -- it must be None or the string greater, equal, or less. Now it is ' sendmsg(msg, missingMatch) raise ValueError # --- Check data type and change to float if necessary ---- if dataIn.dtype.char != 'f': dataIn = dataIn.astype(Numeric.Float32) dataShape = dataIn.shape numberDim = len(dataShape) if numberDim < 2: msg = 'Error in call to rgrd -- data must have at least 2 dimensions' sendmsg(msg) raise TypeError # --- evaluate positionIn ---- # --- make standard positionIn as a check---- positionList =[] for n in range(numberDim): # insert a sequence of numbers positionList.append(n) positionList.reverse() for n in range(numberDim, 4): # fill end of list with Nones positionList.append(None) positionCheck = tuple(positionList) standardPosition = 0 # transpose required if positionIn == None: # construct the default positionIn tuple positionIn = positionCheck standardPosition = 1 # no need for a transpose with this data else: if positionIn == positionCheck: # compare to the standard standardPosition = 1 # no need for a transpose with this data if len(positionIn) != 4: msg = 'Error in call to rgrd -- positionIn must be a tuple of length 4' sendmsg(msg) raise TypeError if standardPosition == 0: # transpose data to the standard order (t,z,y,x) newOrder, inverseOrder = checkorder(positionIn) dataIn = Numeric.transpose(dataIn, newOrder) # transpose data to standard order (t,z,y,x) dataIn = Numeric.array(dataIn.astype(Numeric.Float32), Numeric.Float32) # make contiguous # set dimension sizes and check for consistency if positionIn[0] != None: self.nlon = (dataShape[ positionIn[0] ]) else: self.nlon = 0 if positionIn[1] != None: self.nlat = (dataShape[ positionIn[1] ]) else: self.nlat = 0 if positionIn[2] != None: if self.nlevi != (dataShape[ positionIn[2] ]): msg = 'Level size is inconsistent with input data' sendmsg(msg) raise ValueError if positionIn[3] != None: self.ntime = (dataShape[ positionIn[3] ]) else: self.ntime = 0 # allocate memory for dataOut -- the array with new number of levels outList = list(dataIn.shape) for i in range(len(outList)): if outList[i] == self.nlevi: outList[i] = self.nlevo break dataOut = Numeric.zeros(tuple(outList), Numeric.Float32) # memory for aout if missingMatch == None: # if no missing do not pass None missingMatch = 'none' if missingValueIn == None: # if no missing do not pass None missingValueIn = 1.333e33 if logYes != 'yes': logYes = 'no' levIn = self.axisIn[:].astype(Numeric.Float64) levOut = self.axisOut[:].astype(Numeric.Float64) _regrid.rgdpressure(self.nlevi, self.nlevo, self.nlat, self.nlon, self.ntime, missingValueIn, missingMatch, logYes, levIn, levOut, dataIn, dataOut) if missingMatch == 'none': # if no missing do not pass None missingMatch = None if missingValueIn == 1.333e33: missingValueIn = None if standardPosition == 0: dataOut = Numeric.transpose(dataOut, inverseOrder) # transpose data to original order dataOut = Numeric.array(dataOut.astype(Numeric.Float32), Numeric.Float32) # make contiguous if missingValueOut != None: # set the missing value in data to missingValueOut if missingMatch == 'greater': if missingValueIn > 0.0: missing = 0.99*missingValueIn else: missing = 1.01*missingValueIn dataOut = Numeric.where(Numeric.greater(dataOut,missing), missingValueOut, dataOut) elif missingMatch == 'equal': missing = missingValueIn dataOut = Numeric.where(Numeric.equal(dataOut,missing), missingValueOut, dataOut) elif missingMatch == 'less': if missingValueIn < 0.0: missing = 0.99*missingValueIn else: missing = 1.01*missingValueIn dataOut = Numeric.where(Numeric.less(dataOut,missing), missingValueOut, dataOut) return dataOut
def __init__(self, u1, u2, v1, v2): if not isinstance(u1, Crv.Crv): raise NURBSError, 'Parameter u1 not derived from Crv class!' if not isinstance(u2, Crv.Crv): raise NURBSError, 'Parameter u2 not derived from Crv class!' if not isinstance(v1, Crv.Crv): raise NURBSError, 'Parameter v1 not derived from Crv class!' if not isinstance(v2, Crv.Crv): raise NURBSError, 'Parameter v2 not derived from Crv class!' r1 = Ruled(u1, u2) r2 = Ruled(v1, v2) r2.swapuv() t = Bilinear(u1.cntrl[:,0], u1.cntrl[:,-1], u2.cntrl[:,0], u2.cntrl[:,-1]) # Raise all surfaces to a common degree du = max(r1.degree[0], r2.degree[0], t.degree[0]) dv = max(r1.degree[1], r2.degree[1], t.degree[1]) r1.degelev(du - r1.degree[0], dv - r1.degree[1]) r2.degelev(du - r2.degree[0], dv - r2.degree[1]) t.degelev(du - t.degree[0], dv - t.degree[1]) # Merge the knot vectors, to obtain a common knot vector # uknots: k1 = r1.uknots k2 = r2.uknots k3 = t.uknots k = [] for item in k1: if not numerix.sometrue(numerix.equal(k2, item)): if not numerix.sometrue(numerix.equal(k3, item)): if item not in k: k.append(item) for item in k2: if not numerix.sometrue(numerix.equal(k1, item)): if not numerix.sometrue(numerix.equal(k3, item)): if item not in k: k.append(item) for item in k3: if not numerix.sometrue(numerix.equal(k1, item)): if not numerix.sometrue(numerix.equal(k2, item)): if item not in k: k.append(item) k = numerix.sort(numerix.asarray(k, numerix.Float)) n = k.shape[0] kua = numerix.array([], numerix.Float) kub = numerix.array([], numerix.Float) kuc = numerix.array([], numerix.Float) for i in range(0, n): i1 = numerix.compress(numerix.equal(k1, k[i]), k1).shape[0] i2 = numerix.compress(numerix.equal(k2, k[i]), k2).shape[0] i3 = numerix.compress(numerix.equal(k3, k[i]), k3).shape[0] m = max(i1, i2, i3) kua = numerix.concatenate((kua , k[i] * numerix.ones((m - i1,), numerix.Float))) kub = numerix.concatenate((kub , k[i] * numerix.ones((m - i2,), numerix.Float))) kuc = numerix.concatenate((kuc , k[i] * numerix.ones((m - i3,), numerix.Float))) # vknots: k1 = r1.vknots k2 = r2.vknots k3 = t.vknots k = [] for item in k1: if not numerix.sometrue(numerix.equal(k2, item)): if not numerix.sometrue(numerix.equal(k3, item)): if item not in k: k.append(item) for item in k2: if not numerix.sometrue(numerix.equal(k1, item)): if not numerix.sometrue(numerix.equal(k3, item)): if item not in k: k.append(item) for item in k3: if not numerix.sometrue(numerix.equal(k1, item)): if not numerix.sometrue(numerix.equal(k2, item)): if item not in k: k.append(item) k = numerix.sort(numerix.asarray(k, numerix.Float)) n = k.shape[0] kva = numerix.array([], numerix.Float) kvb = numerix.array([], numerix.Float) kvc = numerix.array([], numerix.Float) for i in range(0, n): i1 = numerix.compress(numerix.equal(k1, k[i]), k1).shape[0] i2 = numerix.compress(numerix.equal(k2, k[i]), k2).shape[0] i3 = numerix.compress(numerix.equal(k3, k[i]), k3).shape[0] m = max(i1, i2, i3) kva = numerix.concatenate((kva , k[i] * numerix.ones((m - i1,), numerix.Float))) kvb = numerix.concatenate((kvb , k[i] * numerix.ones((m - i2,), numerix.Float))) kvc = numerix.concatenate((kvc , k[i] * numerix.ones((m - i3,), numerix.Float))) r1.kntins(kua, kva) r2.kntins(kub, kvb) t.kntins(kuc, kvc) coefs = numerix.zeros((4 , t.cntrl.shape[1], t.cntrl.shape[2]), numerix.Float) coefs[0,:,:] = r1.cntrl[0,:,:] + r2.cntrl[0,:,:] - t.cntrl[0,:,:] coefs[1,:,:] = r1.cntrl[1,:,:] + r2.cntrl[1,:,:] - t.cntrl[1,:,:] coefs[2,:,:] = r1.cntrl[2,:,:] + r2.cntrl[2,:,:] - t.cntrl[2,:,:] coefs[3,:,:] = r1.cntrl[3,:,:] + r2.cntrl[3,:,:] - t.cntrl[3,:,:] Srf.__init__(self, coefs, r1.uknots, r1.vknots)
def kNNimputeMA(arr2d, K=20, callback=None): """Returns a new 2D MA.array with missing values imputed from K nearest neighbours. Find K rows (axis 0) with the most similar values where similarity measure corresponds to weighted Euclidean distance. Imputed value = weighted average of the corresponding values of K nearest neighbours, where weights equal to tricubic distribution of distances to all rows. Impute missing rows by average over all rows. Version: 30.8.2005 """ arr2d = MA.asarray(arr2d) assert len(arr2d.shape) == 2, "2D array expected" # make a copy for imputation aImp2 = MA.array(arr2d) # leave out columns with 0 known values (columnInd: non-zero columns) columnCond = Numeric.greater(MA.count(arr2d, axis=0), 0) columnIndAll = Numeric.arange(arr2d.shape[1]) columnInd = Numeric.compress(columnCond, columnIndAll) # impute the rows where 0 < #known_values < #non_zero_columns, i.e. exclude the rows with 0 and all (non-zero-column) values countByRows = MA.count(arr2d, axis=1) for rowIdx in Numeric.compress( Numeric.logical_and(Numeric.greater(countByRows, 0), Numeric.less(countByRows, columnInd.shape[0])), Numeric.arange(arr2d.shape[0])): rowResized = MA.resize(arr2d[rowIdx], arr2d.shape) diff = arr2d - rowResized distances = MA.sqrt( MA.add.reduce((diff)**2, 1) / MA.count(diff, axis=1)) # nearest neighbours row indices (without the current row index) indSorted = MA.argsort(distances)[1:] distSorted = distances.take(indSorted) # number of distances different from MA.masked numNonMasked = distSorted.shape[0] - Numeric.add.reduce( Numeric.asarray(MA.getmaskarray(distSorted), Numeric.Int)) # number of distances to account for (K or less) if numNonMasked > 1: weightsSorted = MA.power( 1 - MA.power(distSorted / distSorted[numNonMasked - 1], 3), 3) # tricubic distribution of all weights else: weightsSorted = Numeric.ones(distSorted.shape[0]) # compute average for each column separately in order to account for K non-masked values colInd4CurrRow = Numeric.compress( Numeric.logical_and(MA.getmaskarray(arr2d[rowIdx]), columnCond), columnIndAll) for colIdx in colInd4CurrRow: # column values sorted by distances columnVals = arr2d[:, colIdx].take(indSorted) # take only those weights where columnVals does not equal MA.masked weightsSortedCompressed = MA.compress( 1 - MA.getmaskarray(columnVals), weightsSorted) # impute from K (or possibly less) values aImp2[rowIdx, colIdx] = MA.average(columnVals.compressed()[:K], weights=weightsSortedCompressed[:K]) if callback: callback() # impute the unknown rows with average profile avrgRow = MA.average(arr2d, 0) for rowIdx in Numeric.compress(Numeric.equal(countByRows, 0), Numeric.arange(arr2d.shape[0])): aImp2[rowIdx] = avrgRow if callback: callback() return aImp2
def conservationScore( self, cons_type='cons_ent', ranNr=150, log=StdLog(), verbose=1 ): """ Score of conserved residue pairs in the interaction surface. Optionally, normalized by radom surface contacts. @param cons_type: precalculated conservation profile name, see L{Biskit.PDBDope}. @type cons_type: str @param ranNr: number of random matricies to use (default: 150) @type ranNr: int @param log: log file [STDOUT] @type log: Biskit.LogFile @param verbose: give progress report [1] @type verbose: bool | int @return: conservation score @rtype: float """ try: recCons = self.rec().profile( cons_type, updateMissing=1 ) except: if verbose: log.add('\n'+'*'*30+'\nNO HHM PROFILE FOR RECEPTOR\n'+\ '*'*30+'\n') recCons = N.ones( self.rec().lenResidues() ) try: ligCons = self.lig().profile( cons_type, updateMissing=1 ) except: if verbose: log.add(\ '\n'+'*'*30+'\nNO HHM PROFILE FOR LIGAND\n'+'*'*30+'\n') ligCons = N.ones( self.lig().lenResidues() ) if self.rec().profile( 'surfMask' ): recSurf = self.rec().profile( 'surfMask' ) else: d = PDBDope(self.rec()) d.addSurfaceMask() if self.lig().profile( 'surfMask' ): ligSurf = self.lig().profile( 'surfMask' ) else: d = PDBDope(self.lig()) d.addSurfaceMask() surfMask = N.ravel(N.outerproduct( recSurf, ligSurf )) missing = N.outerproduct( N.equal( recCons, 0), N.equal(ligCons,0)) cont = self.resContacts() * N.logical_not(missing) consMat = N.outerproduct( recCons, ligCons ) score = cont* consMat # get a random score if ranNr != 0: if self.verbose: self.log.write('.') ranMat = mathUtils.random2DArray( cont, ranNr, mask=surfMask ) random_score = N.sum(N.sum( ranMat * consMat ))/( ranNr*1.0 ) return N.sum(N.sum(score))/random_score else: return N.sum(N.sum(score))/ N.sum(N.sum(cont))
def __cmp__(self, other): if self.rank != other.rank: return 1 else: return not Numeric.logical_and.reduce( Numeric.equal(self.array, other.array).ravel())