Ejemplo n.º 1
0
def histogram(data, nbins, range = None):
    """
    Comes from Konrad Hinsen: Scientific Python
    """
    
    data = Numeric.array(data, Numeric.Float)
    
    if range is None:
        min = Numeric.minimum.reduce(data)
        max = Numeric.maximum.reduce(data)
    else:
        min, max = range
        data = Numeric.repeat(data,
                  Numeric.logical_and(Numeric.less_equal(data, max),
                          Numeric.greater_equal(data,
                                    min)))
    # end if
    bin_width = (max-min)/nbins
    
    data = Numeric.floor((data - min)/bin_width).astype(Numeric.Int)
    histo = Numeric.add.reduce(Numeric.equal(
    Numeric.arange(nbins)[:,Numeric.NewAxis], data), -1)
    histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data))
    bins = min + bin_width*(Numeric.arange(nbins)+0.5)
    return Numeric.transpose(Numeric.array([bins, histo]))
Ejemplo n.º 2
0
def histogram(data, nbins, range=None):
    """
    Create a histogram.
    Comes from Konrad Hinsen: Scientific Python

    @param data: data list or array
    @type  data: [any]
    @param nbins: number of bins
    @type  nbins: int
    @param range: data range to create histogram from (min val, max val)
    @type  range: (float, float) OR None

    @return: array (2 x len(data) ) with start of bin and witdh of bin. 
    @rtype: array
    """
    data = Numeric.array(data, Numeric.Float)
    if range is None:
        min = Numeric.minimum.reduce(data)
        max = Numeric.maximum.reduce(data)
    else:
        min, max = range
        data = Numeric.repeat(
            data,
            Numeric.logical_and(Numeric.less_equal(data, max),
                                Numeric.greater_equal(data, min)))
    bin_width = (max - min) / nbins
    data = Numeric.floor((data - min) / bin_width).astype(Numeric.Int)
    histo = Numeric.add.reduce(
        Numeric.equal(Numeric.arange(nbins)[:, Numeric.NewAxis], data), -1)
    histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data))
    bins = min + bin_width * (Numeric.arange(nbins) + 0.5)
    return Numeric.transpose(Numeric.array([bins, histo]))
Ejemplo n.º 3
0
def histogram(data, nbins, range = None):
    """
    Create a histogram.
    Comes from Konrad Hinsen: Scientific Python

    @param data: data list or array
    @type  data: [any]
    @param nbins: number of bins
    @type  nbins: int
    @param range: data range to create histogram from (min val, max val)
    @type  range: (float, float) OR None

    @return: array (2 x len(data) ) with start of bin and witdh of bin. 
    @rtype: array
    """
    data = Numeric.array(data, Numeric.Float)
    if range is None:
        min = Numeric.minimum.reduce(data)
        max = Numeric.maximum.reduce(data)
    else:
        min, max = range
        data = Numeric.repeat(data,
                              Numeric.logical_and(Numeric.less_equal(data, max),
                                                  Numeric.greater_equal(data, min)))
    bin_width = (max-min)/nbins
    data = Numeric.floor((data - min)/bin_width).astype(Numeric.Int)
    histo = Numeric.add.reduce(Numeric.equal(
        Numeric.arange(nbins)[:,Numeric.NewAxis], data), -1)
    histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data))
    bins = min + bin_width*(Numeric.arange(nbins)+0.5)
    return Numeric.transpose(Numeric.array([bins, histo]))
Ejemplo n.º 4
0
def histogram(data, nbins, range=None):
    """
    Comes from Konrad Hinsen: Scientific Python
    """

    data = Numeric.array(data, Numeric.Float)

    if range is None:
        min = Numeric.minimum.reduce(data)
        max = Numeric.maximum.reduce(data)
    else:
        min, max = range
        data = Numeric.repeat(
            data,
            Numeric.logical_and(Numeric.less_equal(data, max),
                                Numeric.greater_equal(data, min)))
    # end if
    bin_width = (max - min) / nbins

    data = Numeric.floor((data - min) / bin_width).astype(Numeric.Int)
    histo = Numeric.add.reduce(
        Numeric.equal(Numeric.arange(nbins)[:, Numeric.NewAxis], data), -1)
    histo[-1] = histo[-1] + Numeric.add.reduce(Numeric.equal(nbins, data))
    bins = min + bin_width * (Numeric.arange(nbins) + 0.5)
    return Numeric.transpose(Numeric.array([bins, histo]))
Ejemplo n.º 5
0
 def onDataInput(self, structuredData):
     """handles input data; sets self.dataStructure, self.numExamples, self.numVariables and self.ps;
     updates info, calls updateAnovaTypeBox(), runs ANOVA and sends out new data.
     """
     self.dataStructure = structuredData
     self.numExamples = 0
     self.numVariables = 0
     self.ps = Numeric.ones((3,0), Numeric.Float)
     if structuredData:
         numFiles = reduce(lambda a,b: a+len(b[1]), structuredData, 0)
         lenSD = len(structuredData)
         self.infoa.setText("%d set%s, total of %d data file%s." % (lenSD, ["","s"][lenSD!=1], numFiles, ["","s"][numFiles!=1]))
         numExamplesList = []
         numVariablesList = []
         # construct a list of ExampleTable lengths and a list of number of variables
         for (name, etList) in structuredData:
             for et in etList:
                 numExamplesList.append(len(et))
                 numVariablesList.append(len(et.domain.variables))
         # test that all ExampleTables consist of equal number of examples and variables
         if len(numExamplesList) == 0 or Numeric.add.reduce(Numeric.equal(numExamplesList, numExamplesList[0])) != len(numExamplesList):
             self.dataStructure = None
             self.numExamples = -1
             self.infob.setText("Error: data files contain unequal number of examples, aborting ANOVA computation.")
             self.infoc.setText('')
         elif len(numVariablesList) == 0 or Numeric.add.reduce(Numeric.equal(numVariablesList, numVariablesList[0])) != len(numVariablesList):
             self.dataStructure = None
             self.numVariables = -1
             self.infob.setText("Error: data files contain unequal number of variables, aborting ANOVA computation.")
             self.infoc.setText('')
         else:
             self.numExamples = numExamplesList[0]
             self.numVariables = numVariablesList[0]
             self.infob.setText("%d variable%s, %d example%s in each file." % (self.numVariables, ["","s"][self.numVariables!=1], self.numExamples, ["","s"][self.numExamples!=1]))
             if self.numExamples > 0:
                 self.infoc.setText('Press Commit button to start ANOVA computation.')
             else:
                 self.infoc.setText('')
             self.boxAnovaType.setEnabled(1)
             self.boxSelection.setEnabled(1)
             self.btnCommit.setEnabled(True)
     else:
         self.infoa.setText('No data on input.')
         self.infob.setText('')
         self.infoc.setText('')
     # enable/disable anova type selection depending on the type of input data
     self.updateAnovaTypeBox()
     self.updateSelectorBox()
     if self.autoUpdateSelName:
         self.updateSelectorName()
     # run ANOVA
     if self.commitOnChange:
         self.runANOVA()
         self.senddata()
     self.updateSelectorInfos()
Ejemplo n.º 6
0
    def residusMaximus(self, atomValues, mask=None):
        """
        Take list of value per atom, return list where all atoms of any
        residue are set to the highest value of any atom in that residue.
        (after applying mask)

        @param atomValues: list 1 x N, values per atom
        @type  atomValues: [ float ]
        @param mask: list 1 x N, 0|1, 'master' atoms of each residue
        @type  mask: [1|0]

        @return: Numpy array 1 x N of float
        @rtype: array
        """
        if mask is None:
            mask = N.ones(len(self.frames[0]), N.int32)

        ## eliminate all values that do not belong to the selected atoms
        masked = atomValues * mask

        result = []

        ## set all atoms of each residue to uniform value
        for res in range(0, self.resMap()[-1] + 1):

            ## get atom entries for this residue
            resAtoms = N.compress(N.equal(self.resMap(), res), masked)

            ## get maximum value
            masterValue = max(resAtoms)

            result += resAtoms * 0.0 + masterValue

        return N.array(result)
Ejemplo n.º 7
0
    def __init__(self, elements, nocheck = None):
	self.array = Numeric.array(elements)
	if nocheck is None:
	    if not Numeric.logical_and.reduce(
		Numeric.equal(Numeric.array(self.array.shape), 3)):
		raise ValueError, 'Tensor must have length 3 along any axis'
	self.rank = len(self.array.shape)
Ejemplo n.º 8
0
    def residusMaximus( self, atomValues, mask=None ):
        """
        Take list of value per atom, return list where all atoms of any
        residue are set to the highest value of any atom in that residue.
        (after applying mask)

        @param atomValues: list 1 x N, values per atom
        @type  atomValues: [ float ]
        @param mask: list 1 x N, 0|1, 'master' atoms of each residue
        @type  mask: [1|0]

        @return: Numpy array 1 x N of float
        @rtype: array
        """
        if mask is None:
            mask = N.ones( len( self.frames[0] ), N.int32 )

        ## eliminate all values that do not belong to the selected atoms
        masked = atomValues * mask

        result = []

        ## set all atoms of each residue to uniform value
        for res in range( 0, self.resMap()[-1]+1 ):

            ## get atom entries for this residue
            resAtoms = N.compress( N.equal( self.resMap(), res ), masked )

            ## get maximum value
            masterValue = max( resAtoms )

            result += resAtoms * 0.0 + masterValue

        return N.array( result )
Ejemplo n.º 9
0
def permutInverse(n):
    """Returns inverse permutation given integers in range(len(n)),
    such that permitInverse(permutInverse(range(4)))==range(4).
    """
    n = Numeric.asarray(n)
    pInv = Numeric.argsort(n)
    assert Numeric.all(Numeric.equal(n, Numeric.argsort(pInv))), "Inverse not successful; input should be permutation of range(len(input))."
    return pInv
Ejemplo n.º 10
0
def dotMA(a, b):
    """Returns dot-product for MA arrays; fixed masked values.
    """
    a = MA.asarray(a)
    b = MA.asarray(b)
    ab = MA.dot(a,b)
    # fix masked values in ab (MA.dot returns 0 instead of MA.masked)
    nonMasked = Numeric.dot(1-MA.getmaskarray(a).astype(Numeric.Int), 1-MA.getmaskarray(b).astype(Numeric.Int))
    return MA.where(Numeric.equal(nonMasked,0), MA.masked, ab)
Ejemplo n.º 11
0
 def __init__(self, crv1, crv2):
     if not isinstance(crv1, Crv.Crv):
         raise NURBSError, 'Parameter crv1 not derived from Crv class!'
     if not isinstance(crv2, Crv.Crv):
         raise NURBSError, 'Parameter crv2 not derived from Crv class!'
     # ensure both curves have a common degree
     d = max(crv1.degree, crv2.degree)
     crv1.degelev(d - crv1.degree)
     crv2.degelev(d - crv2.degree)
     # merge the knot vectors, to obtain a common knot vector
     k1 = crv1.uknots
     k2 = crv2.uknots
     ku = []
     for item in k1:
         if not numerix.sometrue(numerix.equal(k2, item)):
             if item not in ku:
                 ku.append(item)
     for item in k2:
         if not numerix.sometrue(numerix.equal(k1, item)):
             if item not in ku:
                 ku.append(item)
     ku = numerix.sort(numerix.asarray(ku, numerix.Float))
     n = ku.shape[0]
     ka = numerix.array([], numerix.Float)
     kb = numerix.array([], numerix.Float)
     for i in range(0, n):
         i1 = numerix.compress(numerix.equal(k1, ku[i]), k1).shape[0]
         i2 = numerix.compress(numerix.equal(k2, ku[i]), k2).shape[0]
         m = max(i1, i2)
         ka = numerix.concatenate((ka, ku[i] * numerix.ones(
             (m - i1, ), numerix.Float)))
         kb = numerix.concatenate((kb, ku[i] * numerix.ones(
             (m - i2, ), numerix.Float)))
     crv1.kntins(ka)
     crv2.kntins(kb)
     coefs = numerix.zeros((4, crv1.cntrl.shape[1], 2), numerix.Float)
     coefs[:, :, 0] = crv1.cntrl
     coefs[:, :, 1] = crv2.cntrl
     Srf.__init__(self, coefs, crv1.uknots, [0., 0., 1., 1.])
Ejemplo n.º 12
0
def kNNimputeMA(arr2d, K=20, callback=None):
    """Returns a new 2D MA.array with missing values imputed from K nearest neighbours.
    Find K rows (axis 0) with the most similar values where similarity measure corresponds to weighted Euclidean distance.
    Imputed value = weighted average of the corresponding values of K nearest neighbours,
    where weights equal to tricubic distribution of distances to all rows.
    Impute missing rows by average over all rows.
    Version: 30.8.2005
    """
    arr2d = MA.asarray(arr2d)
    assert len(arr2d.shape) == 2, "2D array expected"
    # make a copy for imputation
    aImp2 = MA.array(arr2d)
    # leave out columns with 0 known values (columnInd: non-zero columns)
    columnCond = Numeric.greater(MA.count(arr2d, axis=0), 0)
    columnIndAll = Numeric.arange(arr2d.shape[1])
    columnInd = Numeric.compress(columnCond, columnIndAll)
    # impute the rows where 0 < #known_values < #non_zero_columns, i.e. exclude the rows with 0 and all (non-zero-column) values
    countByRows = MA.count(arr2d, axis=1)
    for rowIdx in Numeric.compress(Numeric.logical_and(Numeric.greater(countByRows, 0), Numeric.less(countByRows, columnInd.shape[0])), Numeric.arange(arr2d.shape[0])):
        rowResized = MA.resize(arr2d[rowIdx], arr2d.shape)
        diff = arr2d - rowResized
        distances = MA.sqrt(MA.add.reduce((diff)**2, 1) / MA.count(diff, axis=1))
        # nearest neighbours row indices (without the current row index)
        indSorted = MA.argsort(distances)[1:]
        distSorted = distances.take(indSorted)
        # number of distances different from MA.masked
        numNonMasked = distSorted.shape[0] - Numeric.add.reduce(Numeric.asarray(MA.getmaskarray(distSorted), Numeric.Int))
        # number of distances to account for (K or less)
        if numNonMasked > 1:
            weightsSorted = MA.power(1-MA.power(distSorted/distSorted[numNonMasked-1],3),3) # tricubic distribution of all weights
        else:
            weightsSorted = Numeric.ones(distSorted.shape[0])
        # compute average for each column separately in order to account for K non-masked values
        colInd4CurrRow = Numeric.compress(Numeric.logical_and(MA.getmaskarray(arr2d[rowIdx]), columnCond), columnIndAll)
        for colIdx in colInd4CurrRow:
            # column values sorted by distances
            columnVals = arr2d[:,colIdx].take(indSorted)
            # take only those weights where columnVals does not equal MA.masked
            weightsSortedCompressed = MA.compress(1-MA.getmaskarray(columnVals), weightsSorted)
            # impute from K (or possibly less) values
            aImp2[rowIdx,colIdx] = MA.average(columnVals.compressed()[:K], weights=weightsSortedCompressed[:K])
        if callback:
            callback()
    # impute the unknown rows with average profile
    avrgRow = MA.average(arr2d, 0)
    for rowIdx in Numeric.compress(Numeric.equal(countByRows, 0), Numeric.arange(arr2d.shape[0])):
        aImp2[rowIdx] = avrgRow
        if callback:
            callback()
    return aImp2
Ejemplo n.º 13
0
 def __init__(self, crv1, crv2):
     if not isinstance(crv1, Crv.Crv):
             raise NURBSError, 'Parameter crv1 not derived from Crv class!'
     if not isinstance(crv2, Crv.Crv):
             raise NURBSError, 'Parameter crv2 not derived from Crv class!'
     # ensure both curves have a common degree
     d = max(crv1.degree, crv2.degree)
     crv1.degelev(d - crv1.degree)
     crv2.degelev(d - crv2.degree)
     # merge the knot vectors, to obtain a common knot vector
     k1 = crv1.uknots
     k2 = crv2.uknots
     ku = []
     for item in k1:
         if not numerix.sometrue(numerix.equal(k2, item)):
             if item not in ku:
                 ku.append(item)
     for item in k2:
         if not numerix.sometrue(numerix.equal(k1, item)):
             if item not in ku:
                 ku.append(item)
     ku = numerix.sort(numerix.asarray(ku, numerix.Float))
     n = ku.shape[0]
     ka = numerix.array([], numerix.Float)
     kb = numerix.array([], numerix.Float)
     for i in range(0, n):
         i1 = numerix.compress(numerix.equal(k1, ku[i]), k1).shape[0]
         i2 = numerix.compress(numerix.equal(k2, ku[i]), k2).shape[0]
         m = max(i1, i2)
         ka = numerix.concatenate((ka , ku[i] * numerix.ones((m - i1,), numerix.Float)))
         kb = numerix.concatenate((kb , ku[i] * numerix.ones((m - i2,), numerix.Float)))
     crv1.kntins(ka)
     crv2.kntins(kb)
     coefs = numerix.zeros((4, crv1.cntrl.shape[1], 2), numerix.Float)
     coefs[:,:,0] = crv1.cntrl
     coefs[:,:,1] = crv2.cntrl
     Srf.__init__(self, coefs, crv1.uknots, [0., 0., 1., 1.])
Ejemplo n.º 14
0
 def anova2(self, ma3d, groupLens, addInteraction, repMeasuresOnA, callback):
     """Conducts two-way ANOVA on individual examples;
     returns a Numeric array of p-values in shape (2, numExamples) or (3, numExamples), depending whether we test for interaction;
     Note: levels of factors A and B that cause empty cells are removed prior to conducting ANOVA.
     """
     groupLens = Numeric.asarray(groupLens)
     # arrays to store p-vals
     if addInteraction:
         ps = Numeric.ones((3, ma3d.shape[0]), Numeric.Float)
     else:
         ps = Numeric.ones((2, ma3d.shape[0]), Numeric.Float)
     # decide between non-repeated / repeated measures ANOVA for factor time
     if repMeasuresOnA:
         fAnova = Anova.AnovaRM12LR
     else:
         fAnova = Anova.Anova2wayLR
     # check for empty cells for all genes at once and remove them
     tInd2rem = []
     ax2Ind = Numeric.concatenate(([0], Numeric.add.accumulate(groupLens)))
     for aIdx in range(ma3d.shape[1]):
         for rIdx in range(groupLens.shape[0]):
             if Numeric.add.reduce(MA.count(ma3d[:,aIdx,ax2Ind[rIdx]:ax2Ind[rIdx+1]],1)) == 0:
                 tInd2rem.append(aIdx)
                 break
     if len(tInd2rem) > 0:
         print "Warning: removing time indices %s for all genes" % (str(tInd2rem))
         tInd2keep = range(ma3d.shape[1])
         for aIdx in tInd2rem:
             tInd2keep.remove(aIdx)
         ma3d = ma3d.take(tInd2keep, 1)
     # for each gene...
     for eIdx in range(ma3d.shape[0]):
         # faster check for empty cells for that gene -> remove time indices with empty cells
         ma2d = ma3d[eIdx]
         cellCount = MA.zeros((ma2d.shape[0], groupLens.shape[0]), Numeric.Int)
         for g,(i0,i1) in enumerate(zip(ax2Ind[:-1], ax2Ind[1:])):
             cellCount[:,g] = MA.count(ma2d[:,i0:i1], 1)
         ma2dTakeInd = Numeric.logical_not(Numeric.add.reduce(Numeric.equal(cellCount,0),1)) # 1 where to take, 0 where not to take
         if Numeric.add.reduce(ma2dTakeInd) != ma2dTakeInd.shape[0]:
             print "Warning: removing time indices %s for gene %i" % (str(Numeric.compress(ma2dTakeInd == 0, Numeric.arange(ma2dTakeInd.shape[0]))), eIdx)
             ma2d = MA.compress(ma2dTakeInd, ma2d, 0)
         an = fAnova(ma2d, groupLens, addInteraction, allowReductA=True, allowReductB=True)
         ps[:,eIdx] = an.ps
         callback()
     return ps
Ejemplo n.º 15
0
    def __setAll_1D(self, a):
        """
        Replace content of this sparseArray with values from Numeric array
        or list of numbers -- only for 1-dimensional arrays.

        @param a: array OR list
        @type  a: array OR [ number ]
        """
        if type(a) is list:
            a = N.array(a, self.__typecode)

        if self.shape != a.shape:
            raise SparseArrayError, 'dimensions not aligned'

        self.indices = N.nonzero(N.logical_not(N.equal(a, self.__default)))
        self.indices = self.indices.tolist()

        self.values = N.take(a, self.indices)
        self.values = self.values.tolist()
Ejemplo n.º 16
0
    def __setAll_1D( self, a ):
        """
        Replace content of this sparseArray with values from Numeric array
        or list of numbers -- only for 1-dimensional arrays.

        @param a: array OR list
        @type  a: array OR [ number ]
        """
        if type( a ) is list:
            a = N.array( a, self.__typecode )

        if self.shape != a.shape:
            raise SparseArrayError, 'dimensions not aligned'

        self.indices = N.nonzero( N.logical_not( N.equal(a, self.__default) ) )
        self.indices = self.indices.tolist()

        self.values = N.take( a, self.indices )
        self.values = self.values.tolist()
Ejemplo n.º 17
0
    def test_Ramachandran(self):
        """Ramachandran test"""
        self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat')

        self.traj.ref.atoms.set('mass', self.traj.ref.masses())

        self.mdl = [self.traj[0], self.traj[11]]
        self.mdl = [md.compress(md.maskProtein()) for md in self.mdl]

        self.rama = Ramachandran(self.mdl,
                                 name='test',
                                 profileName='mass',
                                 verbose=self.local)

        self.psi = N.array(self.rama.psi)

        if self.local:
            self.rama.show()

        r = N.sum(N.compress(N.logical_not(N.equal(self.psi, None)), self.psi))
        self.assertAlmostEqual(r, -11717.909796797909, 2)
Ejemplo n.º 18
0
    def memberFrames( self, threshold=0. ):
        """
        Get indices of all frames belonging to each cluster. Each frame
        is guaranteed to belong, at least, to the cluster for which it has
        its maximum membership. If threshold > 0, it can additionally pop
        up in other clusters.

        @param threshold: minimal cluster membership or 0 to consider
                          only max membership (default: 0)
        @type  threshold: float

        @return: n_cluster, lst of lst of int, frame indices
        @rtype: [[int]]
        """
        ## best cluster for each frame
        msm = self.memberships()
        maxMemb = N.argmax( msm, 0 )

        r = [N.nonzero( N.equal(maxMemb, i) ) for i in range(0, self.n_clusters)]
        r = [ x.tolist() for x in r ]

        ## same thing but now taking all above threshold
        ## -> same frame can end up in several clusters
        if threshold > 0.:
            r2 = [ N.nonzero( N.greater( l, threshold) ) for l in msm ]

            ## add only additional frames
            for i in range(0, len( r ) ):
                try:
                    frames = r[i].tolist()
                except:
                    frames = r[i]

                r[i] = frames + [ fr for fr in r2[i] if fr not in r[i] ]

        ## sort frames within each cluster by their membership
        r = [ self.membershipSort( r[i], i) for i in range(0, len(r) )]

        return r
Ejemplo n.º 19
0
    def memberFrames(self, threshold=0.):
        """
        Get indices of all frames belonging to each cluster. Each frame
        is guaranteed to belong, at least, to the cluster for which it has
        its maximum membership. If threshold > 0, it can additionally pop
        up in other clusters.

        @param threshold: minimal cluster membership or 0 to consider
                          only max membership (default: 0)
        @type  threshold: float

        @return: n_cluster, lst of lst of int, frame indices
        @rtype: [[int]]
        """
        ## best cluster for each frame
        msm = self.memberships()
        maxMemb = N.argmax(msm, 0)

        r = [N.nonzero(N.equal(maxMemb, i)) for i in range(0, self.n_clusters)]
        r = [x.tolist() for x in r]

        ## same thing but now taking all above threshold
        ## -> same frame can end up in several clusters
        if threshold > 0.:
            r2 = [N.nonzero(N.greater(l, threshold)) for l in msm]

            ## add only additional frames
            for i in range(0, len(r)):
                try:
                    frames = r[i].tolist()
                except:
                    frames = r[i]

                r[i] = frames + [fr for fr in r2[i] if fr not in r[i]]

        ## sort frames within each cluster by their membership
        r = [self.membershipSort(r[i], i) for i in range(0, len(r))]

        return r
Ejemplo n.º 20
0
    def __init__(self, u1, u2, v1, v2):
        if not isinstance(u1, Crv.Crv):
            raise NURBSError, 'Parameter u1 not derived from Crv class!'
        if not isinstance(u2, Crv.Crv):
            raise NURBSError, 'Parameter u2 not derived from Crv class!'
        if not isinstance(v1, Crv.Crv):
            raise NURBSError, 'Parameter v1 not derived from Crv class!'
        if not isinstance(v2, Crv.Crv):
            raise NURBSError, 'Parameter v2 not derived from Crv class!'
        r1 = Ruled(u1, u2)
        r2 = Ruled(v1, v2)
        r2.swapuv()
        t = Bilinear(u1.cntrl[:, 0], u1.cntrl[:, -1], u2.cntrl[:, 0],
                     u2.cntrl[:, -1])
        # Raise all surfaces to a common degree
        du = max(r1.degree[0], r2.degree[0], t.degree[0])
        dv = max(r1.degree[1], r2.degree[1], t.degree[1])
        r1.degelev(du - r1.degree[0], dv - r1.degree[1])
        r2.degelev(du - r2.degree[0], dv - r2.degree[1])
        t.degelev(du - t.degree[0], dv - t.degree[1])
        # Merge the knot vectors, to obtain a common knot vector
        # uknots:
        k1 = r1.uknots
        k2 = r2.uknots
        k3 = t.uknots
        k = []
        for item in k1:
            if not numerix.sometrue(numerix.equal(k2, item)):
                if not numerix.sometrue(numerix.equal(k3, item)):
                    if item not in k:
                        k.append(item)
        for item in k2:
            if not numerix.sometrue(numerix.equal(k1, item)):
                if not numerix.sometrue(numerix.equal(k3, item)):
                    if item not in k:
                        k.append(item)
        for item in k3:
            if not numerix.sometrue(numerix.equal(k1, item)):
                if not numerix.sometrue(numerix.equal(k2, item)):
                    if item not in k:
                        k.append(item)
        k = numerix.sort(numerix.asarray(k, numerix.Float))
        n = k.shape[0]
        kua = numerix.array([], numerix.Float)
        kub = numerix.array([], numerix.Float)
        kuc = numerix.array([], numerix.Float)
        for i in range(0, n):
            i1 = numerix.compress(numerix.equal(k1, k[i]), k1).shape[0]
            i2 = numerix.compress(numerix.equal(k2, k[i]), k2).shape[0]
            i3 = numerix.compress(numerix.equal(k3, k[i]), k3).shape[0]
            m = max(i1, i2, i3)
            kua = numerix.concatenate((kua, k[i] * numerix.ones(
                (m - i1, ), numerix.Float)))
            kub = numerix.concatenate((kub, k[i] * numerix.ones(
                (m - i2, ), numerix.Float)))
            kuc = numerix.concatenate((kuc, k[i] * numerix.ones(
                (m - i3, ), numerix.Float)))

        # vknots:
        k1 = r1.vknots
        k2 = r2.vknots
        k3 = t.vknots
        k = []
        for item in k1:
            if not numerix.sometrue(numerix.equal(k2, item)):
                if not numerix.sometrue(numerix.equal(k3, item)):
                    if item not in k:
                        k.append(item)
        for item in k2:
            if not numerix.sometrue(numerix.equal(k1, item)):
                if not numerix.sometrue(numerix.equal(k3, item)):
                    if item not in k:
                        k.append(item)
        for item in k3:
            if not numerix.sometrue(numerix.equal(k1, item)):
                if not numerix.sometrue(numerix.equal(k2, item)):
                    if item not in k:
                        k.append(item)
        k = numerix.sort(numerix.asarray(k, numerix.Float))
        n = k.shape[0]
        kva = numerix.array([], numerix.Float)
        kvb = numerix.array([], numerix.Float)
        kvc = numerix.array([], numerix.Float)
        for i in range(0, n):
            i1 = numerix.compress(numerix.equal(k1, k[i]), k1).shape[0]
            i2 = numerix.compress(numerix.equal(k2, k[i]), k2).shape[0]
            i3 = numerix.compress(numerix.equal(k3, k[i]), k3).shape[0]
            m = max(i1, i2, i3)
            kva = numerix.concatenate((kva, k[i] * numerix.ones(
                (m - i1, ), numerix.Float)))
            kvb = numerix.concatenate((kvb, k[i] * numerix.ones(
                (m - i2, ), numerix.Float)))
            kvc = numerix.concatenate((kvc, k[i] * numerix.ones(
                (m - i3, ), numerix.Float)))

        r1.kntins(kua, kva)
        r2.kntins(kub, kvb)
        t.kntins(kuc, kvc)
        coefs = numerix.zeros((4, t.cntrl.shape[1], t.cntrl.shape[2]),
                              numerix.Float)
        coefs[
            0, :, :] = r1.cntrl[0, :, :] + r2.cntrl[0, :, :] - t.cntrl[0, :, :]
        coefs[
            1, :, :] = r1.cntrl[1, :, :] + r2.cntrl[1, :, :] - t.cntrl[1, :, :]
        coefs[
            2, :, :] = r1.cntrl[2, :, :] + r2.cntrl[2, :, :] - t.cntrl[2, :, :]
        coefs[
            3, :, :] = r1.cntrl[3, :, :] + r2.cntrl[3, :, :] - t.cntrl[3, :, :]
        Srf.__init__(self, coefs, r1.uknots, r1.vknots)
Ejemplo n.º 21
0
    def rgrd(self, dataIn, missingValueIn, missingMatch, logYes = 'yes', positionIn = None, missingValueOut = None):

        """        #---------------------------------------------------------------------------------
        #
        #    PURPOSE: To perform all the tasks required to regrid the input data, dataIn, into the ouput data,
        #             dataout along the level dimension only.
        #
        #    DEFINITION:
        #
        #             def rgrd(self, dataIn, missingValueIn, missingMatch, positionIn = None, missingValueOut = None):
        # 
        # 
        #    PASSED :  dataIn -- data to regrid
        #
        #             missingValueIn -- the missing data value to use in setting missing in the mask. It is required
        #                               and there are two choices:
        #                                     None -- there is no missing data
        #                                     A number -- the value to use in the search for possible missing data.
        #                               The presence of missing data at a grid point leads to recording 0.0 in the mask.
        #
        #             missingMatch -- the comparison scheme used in searching for missing data in dataIn using the value passed
        #                             in as missingValueIn. The choices are:
        #                                  None -- used if None is the entry for missingValueIn
        #                                  exact -- used if missingValue is the exact value from the file
        #                                  greater -- the missing data value is equal to or greater than missingValueIn
        #                                  less -- the missing data value is equal to or less than missingValueIn
        #
        #             logYes -- choose the level regrid as linear in log of level or linear in level. Set to 
        #                       'yes' for log. Anything else is linear in level.
        #         
        #
        #
        #             positionIn -- a tuple with the numerical position of the dimensions
        #                           in C or Python order specified in the sequence longitude,
        #                           latitude, level and time. Longitude, latitude and level are
        #                           required. If time is missing submit None in its slot in the 
        #                           tuple. Notice that the length of the tuple is always four.
        #
        #                           Explicitly, in terms of the shape of dataIn as returned by Python's shape function
        #
        #                                positionIn[0] contains the position of longitude in dataIn      
        #                                positionIn[1] contains the position of latitude in dataIn      
        #                                positionIn[2] contains the position of level in dataIn or None      
        #                                positionIn[3] contains the position of time in dataIn or None      
        #
        #                           As  examples:
        #                                If the C order shape of 4D data is
        #                                    (number of longitudes, number of times, number of levels, number of latitudes)
        #                                submit
        #                                     (0, 3, 2, 1) 
        #
        #                                If the C order shape of 3D data is 
        #                                    (number of longitudes, number of times, number oflatitudes)
        #                                submit
        #                                    (0, 2, 1, None) 
        #
        #                           Send in None if the shape is a subset of (time, level,
        #                           latitude, longitude) which is evaluated as follows:
        #                              3D -- code assumes (2,1,0,None)
        #                              4D -- code assumes (3,2,1,0)
        #
        #              missingValueOut -- the value for the missing data used in writing the output data. If left at the
        #                                 default entry, None, the code uses missingValueIn if present or as a last resort
        #                                 1.0e20
        #
        # 
        #    RETURNED : dataOut -- the regridded data
        #
        #                
        #    USAGE: 
        #                
        #          Example 1.  To regrid dataIn into dataOut using all the defaults where None, None signifies no
        #                      missing data.                   
        #              dataOut = x.rgrd(dataIn, None, None)    
        #
        #          Example 2.  To regrid dataIn into dataOut using 1.0e20 and greater as the missing data
        #                
        #                      dataOut = x.rgrd(dataIn, 1.e20, 'greater')    
        #
        #---------------------------------------------------------------------------------------------------------------------"""

        # check the required input -- dataIn, missingValueIn and  missingMatch

        # make sure that dataIn is an array

        try:
            z = len(dataIn)
        except TypeError:
            sendmsg('Error in calling the rgrd method -- dataIn must be an array')
            raise TypeError

        # check the missingValueIn pass

        if missingValueIn != None:
            try:
                z = abs(missingValueIn)
            except TypeError:
                sendmsg('Error in calling the rgrd method -- missingvalueIn must be None or a number. Now it is  ', missingValueIn)
                raise TypeError

        # check the missingMatch pass

        missingPossibilities = ['greater', 'equal', 'less', None]
        if missingMatch not in missingPossibilities:
            msg = 'Error in missingMatch -- it must be None or the string greater, equal, or less. Now it is '
            sendmsg(msg, missingMatch)
            raise ValueError

        # --- Check data type and change to float if necessary ----

        if dataIn.dtype.char != 'f':
            dataIn = dataIn.astype(Numeric.Float32)

        dataShape = dataIn.shape
        numberDim = len(dataShape)

        if numberDim < 2: 
            msg = 'Error in call to rgrd -- data must have at least 2 dimensions'
            sendmsg(msg)
            raise TypeError

        # --- evaluate positionIn ----
        
        # --- make standard positionIn as a check----
        positionList =[]
        for n in range(numberDim):              # insert a sequence of numbers
            positionList.append(n)
        positionList.reverse()

        for n in range(numberDim, 4):            # fill end of list with Nones
            positionList.append(None)

        positionCheck = tuple(positionList)      


        standardPosition = 0                            # transpose required

        if positionIn == None:                          # construct the default positionIn tuple
            positionIn = positionCheck
            standardPosition = 1                        # no need for a transpose with this data
        else:
            if positionIn == positionCheck:             # compare to the standard
                standardPosition = 1                    # no need for a transpose with this data

        if len(positionIn) != 4: 
            msg = 'Error in call to rgrd -- positionIn must be a tuple of length 4'
            sendmsg(msg)
            raise TypeError

        if standardPosition == 0:                        # transpose data to the standard order (t,z,y,x)

            newOrder, inverseOrder = checkorder(positionIn)

            dataIn = Numeric.transpose(dataIn, newOrder)                    # transpose data to standard order (t,z,y,x)
            dataIn = Numeric.array(dataIn.astype(Numeric.Float32), Numeric.Float32)       # make contiguous 


        # set dimension sizes and check for consistency 

        if positionIn[0] != None: 
            self.nlon = (dataShape[ positionIn[0] ]) 
        else:
            self.nlon = 0 
        if positionIn[1] != None: 
            self.nlat = (dataShape[ positionIn[1] ]) 
        else:
            self.nlat = 0 
        if positionIn[2] != None: 
            if self.nlevi != (dataShape[ positionIn[2] ]): 
                msg = 'Level size is inconsistent with input data'
                sendmsg(msg)
                raise ValueError
        if positionIn[3] != None: 
            self.ntime = (dataShape[ positionIn[3] ]) 
        else:
            self.ntime = 0 

        # allocate memory for dataOut -- the array with new number of levels

        outList = list(dataIn.shape)

        for i in range(len(outList)):
            if outList[i] == self.nlevi:
                outList[i] = self.nlevo
                break

        dataOut = Numeric.zeros(tuple(outList), Numeric.Float32)                      # memory for aout


        if missingMatch == None:                                                # if no missing do not pass None
            missingMatch = 'none'

        if missingValueIn == None:                                                # if no missing do not pass None
            missingValueIn = 1.333e33

        if logYes != 'yes':
            logYes = 'no'

        levIn = self.axisIn[:].astype(Numeric.Float64)
        levOut = self.axisOut[:].astype(Numeric.Float64)
        _regrid.rgdpressure(self.nlevi, self.nlevo, self.nlat, self.nlon, self.ntime, missingValueIn, missingMatch, logYes, levIn, levOut, dataIn, dataOut)  

        if missingMatch == 'none':                                                # if no missing do not pass None
            missingMatch = None
        if missingValueIn == 1.333e33:              
            missingValueIn = None

        if standardPosition == 0:
            dataOut = Numeric.transpose(dataOut, inverseOrder)                                   # transpose data to original order
            dataOut = Numeric.array(dataOut.astype(Numeric.Float32), Numeric.Float32)            # make contiguous 

        if missingValueOut != None:                # set the missing value in data to missingValueOut

            if missingMatch == 'greater': 
                if missingValueIn > 0.0: 
                    missing = 0.99*missingValueIn
                else: 
                    missing = 1.01*missingValueIn

                dataOut = Numeric.where(Numeric.greater(dataOut,missing), missingValueOut, dataOut)

            elif missingMatch == 'equal': 
                missing = missingValueIn
                dataOut = Numeric.where(Numeric.equal(dataOut,missing), missingValueOut, dataOut)

            elif missingMatch == 'less': 
                if missingValueIn < 0.0: 
                    missing = 0.99*missingValueIn
                else: 
                    missing = 1.01*missingValueIn

                dataOut = Numeric.where(Numeric.less(dataOut,missing), missingValueOut, dataOut)

        return dataOut 
Ejemplo n.º 22
0
    def __init__(self, u1, u2, v1, v2):
        if not isinstance(u1, Crv.Crv):
                raise NURBSError, 'Parameter u1 not derived from Crv class!'
        if not isinstance(u2, Crv.Crv):
                raise NURBSError, 'Parameter u2 not derived from Crv class!'
        if not isinstance(v1, Crv.Crv):
                raise NURBSError, 'Parameter v1 not derived from Crv class!'
        if not isinstance(v2, Crv.Crv):
                raise NURBSError, 'Parameter v2 not derived from Crv class!'
        r1 = Ruled(u1, u2)
        r2 = Ruled(v1, v2)
        r2.swapuv()
        t = Bilinear(u1.cntrl[:,0], u1.cntrl[:,-1], u2.cntrl[:,0], u2.cntrl[:,-1])
        # Raise all surfaces to a common degree
        du = max(r1.degree[0], r2.degree[0], t.degree[0])
        dv = max(r1.degree[1], r2.degree[1], t.degree[1])
        r1.degelev(du - r1.degree[0], dv - r1.degree[1])
        r2.degelev(du - r2.degree[0], dv - r2.degree[1])
        t.degelev(du - t.degree[0], dv - t.degree[1])
        # Merge the knot vectors, to obtain a common knot vector
        # uknots:
        k1 = r1.uknots
        k2 = r2.uknots
        k3 = t.uknots
        k = []
        for item in k1:
            if not numerix.sometrue(numerix.equal(k2, item)):
                if not numerix.sometrue(numerix.equal(k3, item)):
                    if item not in k:
                        k.append(item)
        for item in k2:
            if not numerix.sometrue(numerix.equal(k1, item)):
                if not numerix.sometrue(numerix.equal(k3, item)):
                    if item not in k:
                        k.append(item)
        for item in k3:
            if not numerix.sometrue(numerix.equal(k1, item)):
                if not numerix.sometrue(numerix.equal(k2, item)):
                    if item not in k:
                        k.append(item)        
        k = numerix.sort(numerix.asarray(k, numerix.Float))
        n = k.shape[0]
        kua = numerix.array([], numerix.Float)
        kub = numerix.array([], numerix.Float)
        kuc = numerix.array([], numerix.Float)
        for i in range(0, n):
            i1 = numerix.compress(numerix.equal(k1, k[i]), k1).shape[0]
            i2 = numerix.compress(numerix.equal(k2, k[i]), k2).shape[0]
            i3 = numerix.compress(numerix.equal(k3, k[i]), k3).shape[0]
            m = max(i1, i2, i3)
            kua = numerix.concatenate((kua , k[i] * numerix.ones((m - i1,), numerix.Float)))
            kub = numerix.concatenate((kub , k[i] * numerix.ones((m - i2,), numerix.Float)))
            kuc = numerix.concatenate((kuc , k[i] * numerix.ones((m - i3,), numerix.Float)))

        # vknots:
        k1 = r1.vknots
        k2 = r2.vknots
        k3 = t.vknots
        k = []
        for item in k1:
            if not numerix.sometrue(numerix.equal(k2, item)):
                if not numerix.sometrue(numerix.equal(k3, item)):
                    if item not in k:
                        k.append(item)
        for item in k2:
            if not numerix.sometrue(numerix.equal(k1, item)):
                if not numerix.sometrue(numerix.equal(k3, item)):
                    if item not in k:
                        k.append(item)
        for item in k3:
            if not numerix.sometrue(numerix.equal(k1, item)):
                if not numerix.sometrue(numerix.equal(k2, item)):
                    if item not in k:
                        k.append(item)        
        k = numerix.sort(numerix.asarray(k, numerix.Float))
        n = k.shape[0]
        kva = numerix.array([], numerix.Float)
        kvb = numerix.array([], numerix.Float)
        kvc = numerix.array([], numerix.Float)
        for i in range(0, n):
            i1 = numerix.compress(numerix.equal(k1, k[i]), k1).shape[0]
            i2 = numerix.compress(numerix.equal(k2, k[i]), k2).shape[0]
            i3 = numerix.compress(numerix.equal(k3, k[i]), k3).shape[0]
            m = max(i1, i2, i3)
            kva = numerix.concatenate((kva , k[i] * numerix.ones((m - i1,), numerix.Float)))
            kvb = numerix.concatenate((kvb , k[i] * numerix.ones((m - i2,), numerix.Float)))
            kvc = numerix.concatenate((kvc , k[i] * numerix.ones((m - i3,), numerix.Float)))

        r1.kntins(kua, kva)
        r2.kntins(kub, kvb)
        t.kntins(kuc, kvc)
        coefs = numerix.zeros((4 , t.cntrl.shape[1], t.cntrl.shape[2]), numerix.Float)
        coefs[0,:,:] = r1.cntrl[0,:,:] + r2.cntrl[0,:,:] - t.cntrl[0,:,:]
        coefs[1,:,:] = r1.cntrl[1,:,:] + r2.cntrl[1,:,:] - t.cntrl[1,:,:]
        coefs[2,:,:] = r1.cntrl[2,:,:] + r2.cntrl[2,:,:] - t.cntrl[2,:,:]
        coefs[3,:,:] = r1.cntrl[3,:,:] + r2.cntrl[3,:,:] - t.cntrl[3,:,:]
        Srf.__init__(self, coefs, r1.uknots, r1.vknots)
Ejemplo n.º 23
0
def kNNimputeMA(arr2d, K=20, callback=None):
    """Returns a new 2D MA.array with missing values imputed from K nearest neighbours.
    Find K rows (axis 0) with the most similar values where similarity measure corresponds to weighted Euclidean distance.
    Imputed value = weighted average of the corresponding values of K nearest neighbours,
    where weights equal to tricubic distribution of distances to all rows.
    Impute missing rows by average over all rows.
    Version: 30.8.2005
    """
    arr2d = MA.asarray(arr2d)
    assert len(arr2d.shape) == 2, "2D array expected"
    # make a copy for imputation
    aImp2 = MA.array(arr2d)
    # leave out columns with 0 known values (columnInd: non-zero columns)
    columnCond = Numeric.greater(MA.count(arr2d, axis=0), 0)
    columnIndAll = Numeric.arange(arr2d.shape[1])
    columnInd = Numeric.compress(columnCond, columnIndAll)
    # impute the rows where 0 < #known_values < #non_zero_columns, i.e. exclude the rows with 0 and all (non-zero-column) values
    countByRows = MA.count(arr2d, axis=1)
    for rowIdx in Numeric.compress(
            Numeric.logical_and(Numeric.greater(countByRows, 0),
                                Numeric.less(countByRows, columnInd.shape[0])),
            Numeric.arange(arr2d.shape[0])):
        rowResized = MA.resize(arr2d[rowIdx], arr2d.shape)
        diff = arr2d - rowResized
        distances = MA.sqrt(
            MA.add.reduce((diff)**2, 1) / MA.count(diff, axis=1))
        # nearest neighbours row indices (without the current row index)
        indSorted = MA.argsort(distances)[1:]
        distSorted = distances.take(indSorted)
        # number of distances different from MA.masked
        numNonMasked = distSorted.shape[0] - Numeric.add.reduce(
            Numeric.asarray(MA.getmaskarray(distSorted), Numeric.Int))
        # number of distances to account for (K or less)
        if numNonMasked > 1:
            weightsSorted = MA.power(
                1 - MA.power(distSorted / distSorted[numNonMasked - 1], 3),
                3)  # tricubic distribution of all weights
        else:
            weightsSorted = Numeric.ones(distSorted.shape[0])
        # compute average for each column separately in order to account for K non-masked values
        colInd4CurrRow = Numeric.compress(
            Numeric.logical_and(MA.getmaskarray(arr2d[rowIdx]), columnCond),
            columnIndAll)
        for colIdx in colInd4CurrRow:
            # column values sorted by distances
            columnVals = arr2d[:, colIdx].take(indSorted)
            # take only those weights where columnVals does not equal MA.masked
            weightsSortedCompressed = MA.compress(
                1 - MA.getmaskarray(columnVals), weightsSorted)
            # impute from K (or possibly less) values
            aImp2[rowIdx,
                  colIdx] = MA.average(columnVals.compressed()[:K],
                                       weights=weightsSortedCompressed[:K])
        if callback:
            callback()
    # impute the unknown rows with average profile
    avrgRow = MA.average(arr2d, 0)
    for rowIdx in Numeric.compress(Numeric.equal(countByRows, 0),
                                   Numeric.arange(arr2d.shape[0])):
        aImp2[rowIdx] = avrgRow
        if callback:
            callback()
    return aImp2
Ejemplo n.º 24
0
    def conservationScore( self, cons_type='cons_ent', ranNr=150,
                           log=StdLog(), verbose=1 ):
        """
        Score of conserved residue pairs in the interaction surface.
        Optionally, normalized by radom surface contacts.

        @param cons_type: precalculated conservation profile name,
                          see L{Biskit.PDBDope}.
        @type  cons_type: str
        @param ranNr: number of random matricies to use (default: 150)
        @type  ranNr: int
        @param log: log file [STDOUT]
        @type  log: Biskit.LogFile
        @param verbose: give progress report [1]
        @type  verbose: bool | int

        @return: conservation score
        @rtype: float
        """
        try:
            recCons = self.rec().profile( cons_type, updateMissing=1 )
        except:
            if verbose:
                log.add('\n'+'*'*30+'\nNO HHM PROFILE FOR RECEPTOR\n'+\
                        '*'*30+'\n')
            recCons = N.ones( self.rec().lenResidues() )
        try:
            ligCons = self.lig().profile( cons_type, updateMissing=1 )
        except:
            if verbose:
                log.add(\
                            '\n'+'*'*30+'\nNO HHM PROFILE FOR LIGAND\n'+'*'*30+'\n')
            ligCons = N.ones( self.lig().lenResidues() )

        if self.rec().profile( 'surfMask' ):
            recSurf = self.rec().profile( 'surfMask' )
        else:
            d = PDBDope(self.rec())
            d.addSurfaceMask()

        if self.lig().profile( 'surfMask' ):
            ligSurf = self.lig().profile( 'surfMask' )
        else:
            d = PDBDope(self.lig())
            d.addSurfaceMask()

        surfMask = N.ravel(N.outerproduct( recSurf, ligSurf ))

        missing = N.outerproduct( N.equal( recCons, 0), N.equal(ligCons,0))

        cont = self.resContacts() * N.logical_not(missing)

        consMat = N.outerproduct( recCons, ligCons )

        score = cont* consMat

        # get a random score
        if ranNr != 0:
            if self.verbose:
                self.log.write('.')
            ranMat =  mathUtils.random2DArray( cont, ranNr, mask=surfMask )
            random_score = N.sum(N.sum( ranMat * consMat ))/( ranNr*1.0 )
            return N.sum(N.sum(score))/random_score

        else:
            return N.sum(N.sum(score))/ N.sum(N.sum(cont))
Ejemplo n.º 25
0
    def __cmp__(self, other):
	if self.rank != other.rank:
	    return 1
	else:
	    return not Numeric.logical_and.reduce(
		Numeric.equal(self.array, other.array).ravel())