Example #1
0
 def testMultipleDimensionCreation (self):       
     'Test creation of multidimensional arrays.'
     alist = [[2,3],[4,5]]
     x = MA.array(alist)
     assert x.shape == (2,2)
     y = MA.array([x, x + 1, x + 2])
     assert y.shape == (3,2,2)
Example #2
0
 def testDiagonal (self):
     "Test the diagonal function."
     b=MA.array([[1,2,3,4],[5,6,7,8]]*2)
     assert eq(MA.diagonal(b), [1,6,3,8])
     assert eq(MA.diagonal(b, -1), [5,2,7])
     c = MA.array([b,b])
     assert eq(MA.diagonal(c,1), [[2,7,4], [2,7,4]])
Example #3
0
def Init_diag(Ain,Aout,fignum):
	global Ptot,RatioM,Tmincrit,Cpa,Cpv,Lv,namincrit,Hmincrit,Tmaxcrit,Tminopt
	global Ratiofig,fig_hauteur,fig_longueur,RatioAxes,lineprops,lineprops_HR,x1,y1,x3,y3
	Ptot=101325.0
	MO=15.9994
	MN=14.0067
	MAr=39.948
	Mair=0.78084*2*MN+0.20946*2*MO+0.009340*MAr
	MH=1.00794
	MH2O=2*MH+MO
	RatioM=MH2O/Mair
	Tmincrit=-30.0
	Cpa=Cpas(C2K(Tmincrit))
	Cpv=Cpvap(C2K(0.1))
	Lv=ChLv(C2K(0.1))
	namincrit=RatioM*Pvap(Tmincrit)/(Ptot-Pvap(Tmincrit))
	Hmincrit=Cpa*Tmincrit+namincrit*(Lv+Cpv*Tmincrit)
	Tmaxcrit=260.0
	Tminopt=-30.0
	Ratiofig=21.0/29.7
	fig_hauteur=6 #inches
	fig_longueur=fig_hauteur/Ratiofig
	figure(fignum,figsize=(fig_longueur,fig_hauteur))
	lineprops = dict(linewidth=0.5, color='gray', linestyle='-',antialiased=True)
	lineprops_HR = dict(linewidth=1, color='blue', linestyle='-',antialiased=True)
	Tinit=20.0
	Cpa=Cpas(C2K(Tinit))
	Cpv=Cpvap(C2K(Tinit))
	Lv=ChLv(C2K(Tinit))
	maskin=Ma.getmask(Ain.car)
	maskout=Ma.getmask(Aout.car)
	
# Il s'agit ici de fixer les valeurs moyennes de Cpa,Cpv et Lv
# en faisant le calcul de l'air moyen (i.e. au centre du diagramme
# le calcul s'arrête à 1e-6 de différence sur Cpa,Cpv et Lv
	crit=True
	while crit:
		Ain.car=Ma.array(Ain.car,copy=0,mask=maskin) 
		Aout.car=Ma.array(Aout.car,copy=0,mask=maskout) 
		Fill_Air(Ain)
		Fill_Air(Aout)
		Amoy=air()
		Amoy.definir(["H","na"],[mean([Ain.car[0],Aout.car[0]]),
						            mean([Ain.car[1],Aout.car[1]])])
		Fill_Air(Amoy)
		Tmoy=Amoy.car[2]
		Cpa_new=Cpas(C2K(Tmoy))
		Cpv_new=Cpvap(C2K(Tmoy))
		Lv_new=ChLv(C2K(Tmoy))
		crit=((Cpa-Cpa_new)**2+(Cpv-Cpv_new)**2+(Lv-Lv_new)**2)>1e-6
		Cpa=Cpa_new
		Cpv=Cpv_new
		Lv=Lv_new

	LH,Lna,LT,LHR,Lair=Return_levels_HnaTHR(Ain,Aout)
	x1,y1=Transf_xy(Lna[0],Lair[0].car[0])
	x3,y3=Transf_xy(Lna[-1],Lair[2].car[0])
	RatioAxes=(y3-y1)/(x3-x1)
	return LH,Lna,LT,LHR,Lair
Example #4
0
 def testLogical (self):
     "Test logical_and, logical_or, sometrue, alltrue"
     x = MA.array([1,1,0,0])
     y = MA.array([1,0,1,0])
     assert eq(MA.logical_and (x,y), [1,0,0,0])
     assert eq(MA.logical_or (x,y), [1,1,1,0])
     assert MA.sometrue(x)
     assert not MA.sometrue(MA.zeros((3,)))
     assert MA.alltrue(MA.ones((3,)))
     assert not MA.alltrue(x)
Example #5
0
 def testTypecodeSpecifying(self):
     'Test construction using the type codes.'
     from Precision import typecodes
     thetypes = typecodes['Integer']  \
                + typecodes['UnsignedInteger'] \
                + typecodes['Float'] \
                + typecodes['Complex'] 
     for t in thetypes:
         x = MA.array([1,2,3], t)
         assert x.typecode() == t
     x = MA.array(['hi', 'hoo'], 'c')
     assert x.typecode() == 'c'
Example #6
0
 def testSpacesaver (self):
     "Test the spacesaver property (Travis Oliphant)"
     # Test of savespace property:  Travis Oliphant
     a = MA.array([1,2,3,4],savespace=1)
     assert a.spacesaver()
     self.assertEqual(a.typecode(), 's')
     b = MA.array(a,'f')
     self.assertEqual(b.typecode(), 'f')
     assert b.spacesaver()
     a.savespace(0)
     assert not a.spacesaver()
     assert b.spacesaver()
     d = 4 * b
     assert b.typecode() == d.typecode()
     self.failUnlessRaises, TypeError, MA.arccos, (b/10.0)
Example #7
0
 def from_summset(cls, ds, shaped_like=None):
     self = cls(ds.name)
     st = time.time()
     cols = ds.get_columns()
     if shaped_like is not None:
         for axis in xtab_axes(shaped_like):
             try:
                 col = ds[axis.name]
             except KeyError:
                 pass
             else:
                 self.axes.append(CrossTabAxis.from_col(col, axis.values))
                 cols.remove(col)
     for col in cols:
         if col.is_discrete() and not col.name.startswith('_'):
             self.axes.append(CrossTabAxis.from_col(col))
     if not self.axes:
         raise Error('dataset %r must have at least one discrete column' % 
                     (ds.name,))
     indices = [axis.indices.filled() for axis in self.axes]
     masks = [axis.indices.mask() for axis in self.axes]
     map = MA.transpose(MA.array(indices, mask=masks))
     shape = self.get_shape()
     for col in ds.get_columns():
         if col.is_scalar():
             self.add_table(col.name, 
                            data=self.from_vector(map, col.data, shape),
                            label=col.label)
     elapsed = time.time() - st
     soom.info('%r crosstab generation took %.3f, %.1f rows/s' % 
                 (self.name, elapsed, len(map) / elapsed))
     return self
Example #8
0
def accumulate24Hourly(data):
    """Returns 12-hourly data accumulated to 24-hours."""
    newTimeValues=[]
    taxis=data.getTime()
    tunits=data.units
    print len(data.getTime())
    newarray=[]

    for i in range((tlen/2)):
        p1=data(time=slice(i,i+1))
        p2=data(time=slice(i+1,i+2))
        accum=p1+p2
        newarray.append(accum)
        newTimeValues.append(p2.getTime()[0])

    array=MA.concatenate(newarray)
    array=MA.array(array, 'f', fill_value=data.getMissing())
    axes=data.getAxisList()
    newTimeAxis=cdms.createAxis(newTimeValues)
    newTimeAxis.units=tunits
    newTimeAxis.designateTime()
    newTimeAxis.id=newTimeAxis.long_name=newTimeAxis.title="time"
    
    newaxes=[newTimeAxis]+axes[1:]
    var=cdms.createVariable(array, axes=newaxes, id=data.id)
    for att in ("units", "long_name"):
        setattr(var, att, getattr(data, att))
    return var 
Example #9
0
    def testIndexing (self):
        'Test indexing operations.'
        x = MA.array([0,1,2,3,4,5])
        for i in range(len(x)):
            assert i == x[i]
        x[2] = 20
        assert eq(x, x[...])
        w = MA.array([None])
        assert w.typecode() == MA.PyObject
        assert w[0] is None
        assert isinstance(x[2], types.IntType)
        assert x[2] == 20
        x = MA.array([0,1,2,3,4,5,6])
        assert eq (x[4:1:-1], [4,3,2])
        assert eq(x[4:1:-2], [4,2])
        assert eq(x[::-1], [6, 5,4,3,2,1,0])
        assert eq(x[2:-1], [2,3,4,5])
        m = MA.array([[1,2,3],[11,12,13]])
        assert m[0,2] == 3
        assert isinstance(m[0,2], types.IntType)
        assert eq(m[...,1], [2,12])
        assert eq(MA.arange(6)[..., MA.NewAxis], [[0],[1],[2],[3],[4],[5]])
        x = MA.array([1,2,3])
        y = MA.array(x)
        x[0] == 66
        assert y[0] != 66
        b=MA.array([[1,2,3,4],[5,6,7,8]]*2)
#        assert b[1:1].shape == (0,4)
#        assert b[1:1, :].shape == (0,4)
#        assert b[10:].shape == (0,4)
        assert eq(b[2:10], [[1,2,3,4],[5,6,7,8]])
        assert eq(b[2:10, ...], [[1,2,3,4],[5,6,7,8]])
Example #10
0
 def testSort (self):
     "Test sort, argsort, argmax, argmin"
     s = (3,2,5,1,4,0)
     sm = [s, MA.array(s)[::-1]]
     se = MA.array(s)[0:0]
     assert eq(MA.sort(s), self.a)
     assert len(MA.sort(se)) == 0
     assert eq(MA.argsort(s), [5,3,1,0,4,2])
     assert len(MA.argsort(se)) == 0
     assert eq(MA.sort(sm, axis = -1), [[0,1,2,3,4,5],[0,1,2,3,4,5]])
     assert eq(MA.sort(sm, axis = 0), [[0,2,1,1,2,0],[3,4,5,5,4,3]])
     assert MA.argmax(s) == 2
     assert MA.argmin(s) == 5
     assert eq(MA.argmax(sm, axis=-1), [2,3])
     assert eq(MA.argmax(sm, axis=1), [2,3])
     assert eq(MA.argmax(sm, axis=0), [0,1,0,1,0,1])
     assert eq(MA.argmin(sm, axis=-1), [5,0])
     assert eq(MA.argmin(sm, axis=1), [5,0])
Example #11
0
    def testOperators (self):
        "Test the operators +, -, *, /, %, ^, &, |"
        x = MA.array([1.,2.,3.,4.,5.,6.])
        y = MA.array([-1.,2.,0.,2.,-1, 3.])
        assert eq(x + y, [0., 4., 3., 6., 4., 9.])
        assert eq(x - y, [2., 0., 3., 2., 6., 3.])
        assert eq(x * y, [-1., 4., 0., 8., -5., 18.])
        assert eq(y / x, [-1, 1., 0., .5, -.2, .5])
        assert eq(x**2, [1., 4., 9., 16., 25., 36.])
        xc = MA.array([1.,2.,3.,4.,5.,6.])
        xc += y
        assert eq(xc, x + y)
        xc = MA.array([1.,2.,3.,4.,5.,6.])
        xc -= y
        assert eq(xc, x - y)
        yc = MA.array(y, copy=1)
        yc /= x
        assert eq ( yc, y / x)
        xc = MA.array([1.,2.,3.,4.,5.,6.])
        y1 = [-1.,2.,0.,2.,-1, 3.]
        xc *= y1
        assert eq(xc, x * y1)

        assert eq (x + y, MA.add(x, y))
        assert eq (x - y, MA.subtract(x, y))
        assert eq (x * y, MA.multiply(x, y))
        assert eq (y / x, MA.divide (y, x))
        d = x / y
        assert d[2] is MA.masked 
        assert (MA.array(1) / MA.array(0)) is MA.masked
        assert eq (x**2, MA.power(x,2))
        x = MA.array([1,2])
        y = MA.zeros((2,))
        assert eq (x%x, y)
        assert eq (MA.remainder(x,x), y)
        assert eq (x <<1, [2,4])
        assert eq (MA.left_shift(x,1), [2,4])
        assert eq (x >>1, [0,1])
        assert eq (MA.right_shift(x,1), [0,1])
        assert eq (x & 2, [0,2])
        assert eq (MA.bitwise_and (x, 2), [0,2])
        assert eq (x | 1, [1,3])
        assert eq (MA.bitwise_or (x, 1), [1,3])
        assert eq (x ^ 2, [3,0])
        assert eq (MA.bitwise_xor(x,2), [3,0])
#        x = divmod(MA.array([2,1]), MA.array([1,2]))
#        assert eq (x[0], [2,0])
#        assert eq (x[1], [0,1])
        assert (4L*MA.arange(3)).typecode() == MA.PyObject
Example #12
0
 def testOnes(self):
     "Test ones"
     y = MA.ones((2,3))
     assert y.shape == (2,3)
     assert y.typecode() == MA.Int
     assert eq(y.flat, 1)
     z = MA.ones((2,3), MA.Float)
     assert z.shape == (2,3)
     assert eq(y, z)
     w = MA.ones((2,3), MA.Int16)
     assert eq(w, MA.array([[1,1,1],[1,1,1]],'s'))
     self.failUnlessRaises(ValueError, MA.ones, (-5,))
Example #13
0
 def __getitem__(self, key):
     index = self.dict[key]
     blob = self.store[index]
     if blob.type == BLOB_ARRAY:
         return MmapArray(blob)
     elif blob.type == BLOB_FILLED:
         data = MmapArray(blob)
         blob = self.store[blob.other]
         mask = MmapArray(blob)
         return MA.array(data, mask=mask)
     elif blob.type == BLOB_STRING:
         return blob.as_str()
     else:
         raise Error('bad BLOB type %s in index' % blob.type)
Example #14
0
 def copyTest (self):
     "Test how MA works with the copy module."
     import copy
     x = MA.array([1,2,3])
     y = [1, x, 3]
     c1 = copy.copy(x)
     assert MA.allclose(x,c1)
     x[1] = 4
     assert not MA.allclose(x,c1)
     c2 = copy.copy(y)
     assert id(c2[1]) == id(x)
     c3 = copy.deepcopy(y)
     assert id(c3[1]) != id(x)
     assert MA.allclose(c3[1], x)
Example #15
0
 def __getitem__(self, key):
     index = self.dict[key]
     blob = self.store[index]
     if blob.type == BLOB_ARRAY:
         return MmapArray(blob)
     elif blob.type == BLOB_FILLED:
         data = MmapArray(blob)
         blob = self.store[blob.other]
         mask = MmapArray(blob)
         return MA.array(data, mask = mask)
     elif blob.type == BLOB_STRING:
         return blob.as_str()
     else:
         raise Error('bad BLOB type %s in index' % blob.type)
Example #16
0
 def testReductions (self):
     "Tests of reduce attribute."
     a = MA.arange(6)
     m = MA.array([[1,2,3],[11,12,13]])
     assert MA.add.reduce(a) == 15
     assert MA.multiply.reduce(m.shape) == len(m.flat)
     assert eq(MA.add.reduce (m, 0), [12,14,16])
     assert eq(MA.add.reduce (m, -1), [6,36])
     assert eq(MA.multiply.reduce (m, 0), [11,24,39])
     assert eq(MA.multiply.reduce (m, -1), [6,11*12*13])
     assert MA.add.reduce([1]) == 1
     assert MA.add.reduce([]) == 0
     assert MA.multiply.reduce([]) == 1
     assert MA.minimum.reduce(a) == 0
     assert MA.maximum.reduce(a) == 5
Example #17
0
 def store_data(self, data, mask, filename = None):
     if mask is None:
         data = Numeric.array(data, typecode=self.numeric_type)
     else:
         data = MA.array(data, typecode=self.numeric_type, mask=mask)
     if filename:
         try:
             os.unlink(filename)
         except OSError:
             pass
         data_blob = ArrayDict(filename, 'w+')
         data_blob['data'] = data
         del data_blob               # this writes the data to disc - 
                                     # we really need a .sync() method...
         return None                 # Flag for load on demand
     else:
         return data
Example #18
0
 def store_data(self, data, mask, filename=None):
     if mask is None:
         data = Numeric.array(data, typecode=self.numeric_type)
     else:
         data = MA.array(data, typecode=self.numeric_type, mask=mask)
     if filename:
         try:
             os.unlink(filename)
         except OSError:
             pass
         data_blob = ArrayDict(filename, 'w+')
         data_blob['data'] = data
         del data_blob  # this writes the data to disc -
         # we really need a .sync() method...
         return None  # Flag for load on demand
     else:
         return data
Example #19
0
 def testBasicConstruction (self):
     "Test of basic construction."
     alist = [1,2,3]
     x = MA.array(alist)
     assert len(x) == 3
     assert x.typecode() == MA.Int
     assert x.spacesaver() == 0
     assert x.shape == (3,)
     assert eq (x,alist)
     y = MA.array(x, copy=0)
     assert x.raw_data() is y.raw_data()
     y[2] = 9
     assert x[2] == 9
     z = MA.array(x, savespace = 1, typecode = MA.Float)
     assert z.typecode() == MA.Float
     assert z.spacesaver() == 1
     x = MA.array([1,2,3.])
     assert x.typecode() == MA.Float
     x = MA.array([1,'who', 3.], MA.PyObject)
     assert x.typecode() == MA.PyObject
     w = MA.array([1,2], MA.Int32)
     assert w.itemsize() == 4
     assert w.iscontiguous()
     assert w.astype(MA.Float).typecode() == MA.Float
Example #20
0
def to_numpy_float_array(
        values_list,
        missing_value=ValueError(
            "Found value interpreted as missing value and no missing_value was specified"
        ),
        mapping=ValueError(
            "Could not convert value to float and no mapping was specified"),
        mapping_start=1.0,
        is_missing=default_is_missing):
    """
    Transforms a list of values into a numpy array of floats.
    Values that are not floats are handled automatically, according to the following policies:    

      * missing_value specifies what to do if we encounter a value that we consider a missing value:
        If missing_value is a float, then its value is used
        If missing_value is None, then we'll return an apporpriately masked array (MA.array)
        If missing_value is not specified as a float nor None, it will get raised as an exception.        
      Note that a value x is considered a missing value if it satisfies is_missing(x)
      (defaults to None or blank string or single dash '-').
      
      * If the value x is not missing, an attempt will be made to convert it to float using float(x).      
        This has the effect of converting strings representing float to that float,
        and of converting a bool to 1. or 0.

      * If the value is the string 'True' or 'False' it will similarly be changed to 1. or 0.      

      * If all the above fails, mapping can be used to specify how to automatically handle the case:
          If mapping is a float, then its value will be used.
          If mapping is a dictionary, then it will be looked up to find the corresponding value to use
          and if it is not found, then new corresponding mapping will automatically be added
          starting at mapping_start (which will get incremented ny 1).
          If you don't want automatical adding of mappings you can specify mapping_start = None
          (in this case an exception will be raised if not present in the mapping)
          If mapping is not specified as a float or a dict it will get raised as an exception.B 
      """
    if type(missing_value) is int:
        missing_value = float(missing_value)
    if type(mapping) is int:
        mapping = float(mapping)

    vec = []
    missing_pos = []
    for i in xrange(len(values_list)):
        val = values_list[i]
        if is_missing(val):
            if type(missing_value) is float:
                val = missing_value
            elif missing_value is None:
                val = 0.
                missing_pos.append(1)
            else:
                raise missing_value
        else:
            try:
                val = float(val)
            except ValueError:
                if val == 'True':
                    val = 1.
                elif val == 'False':
                    val = 0.
                elif type(mapping) is float:
                    val = mapping
                elif type(mapping) is dict:
                    if val in mapping:
                        val = mapping[val]
                    elif mapping_start is None:
                        raise ValueError("At position " + str(i) + " value " +
                                         str(val) +
                                         " not present in specified mapping.")
                    else:
                        mapping[val] = mapping_start
                        val = mapping_start
                        mapping_start += 1.
                else:  # mapping is neither float nor dict:
                    raise mapping
        # Now we have a val that should be a valid float
        vec.append(val)

    # now return a proper numpy.array or MA.array (if we wanted masking).
    if len(missing_pos) == 0:  # return numpy.array
        return numpy.array(vec)
    else:  # return masked array
        n = len(vec)
        mask = [0] * n
        for pos in missing_pos:
            mask[pos] = 1
        return MA.array(vec, mask=mask)
Example #21
0
def to_numpy_float_array(values_list,
                         missing_value = ValueError("Found value interpreted as missing value and no missing_value was specified"),
                         mapping = ValueError("Could not convert value to float and no mapping was specified"),
                         mapping_start = 1.0,
                         is_missing = default_is_missing ):
    """
    Transforms a list of values into a numpy array of floats.
    Values that are not floats are handled automatically, according to the following policies:    

      * missing_value specifies what to do if we encounter a value that we consider a missing value:
        If missing_value is a float, then its value is used
        If missing_value is None, then we'll return an apporpriately masked array (MA.array)
        If missing_value is not specified as a float nor None, it will get raised as an exception.        
      Note that a value x is considered a missing value if it satisfies is_missing(x)
      (defaults to None or blank string or single dash '-').
      
      * If the value x is not missing, an attempt will be made to convert it to float using float(x).      
        This has the effect of converting strings representing float to that float,
        and of converting a bool to 1. or 0.

      * If the value is the string 'True' or 'False' it will similarly be changed to 1. or 0.      

      * If all the above fails, mapping can be used to specify how to automatically handle the case:
          If mapping is a float, then its value will be used.
          If mapping is a dictionary, then it will be looked up to find the corresponding value to use
          and if it is not found, then new corresponding mapping will automatically be added
          starting at mapping_start (which will get incremented ny 1).
          If you don't want automatical adding of mappings you can specify mapping_start = None
          (in this case an exception will be raised if not present in the mapping)
          If mapping is not specified as a float or a dict it will get raised as an exception.B 
      """
    if type(missing_value) is int:
        missing_value = float(missing_value)
    if type(mapping) is int:
        mapping = float(mapping)

    vec = []
    missing_pos = []
    for i in xrange(len(values_list)):
        val = values_list[i]
        if is_missing(val):
            if type(missing_value) is float:
                val = missing_value
            elif missing_value is None:
                val = 0.
                missing_pos.append(1)
            else:
                raise missing_value
        else:
            try:
                val = float(val)
            except ValueError:
                if val=='True':
                    val = 1.
                elif val=='False':
                    val = 0.
                elif type(mapping) is float:
                    val = mapping
                elif type(mapping) is dict:
                    if val in mapping:
                        val = mapping[val]
                    elif mapping_start is None:
                        raise ValueError("At position "+str(i)+" value "+str(val)+" not present in specified mapping.")
                    else:
                        mapping[val] = mapping_start
                        val = mapping_start
                        mapping_start += 1.
                else: # mapping is neither float nor dict:
                    raise mapping
        # Now we have a val that should be a valid float
        vec.append(val)
            
    # now return a proper numpy.array or MA.array (if we wanted masking).
    if len(missing_pos)==0: # return numpy.array
        return numpy.array(vec)
    else: # return masked array
        n = len(vec)
        mask = [0]*n
        for pos in missing_pos:
            mask[pos] = 1
        return MA.array(vec, mask=mask)
Example #22
0
def calc_stratified_rates(summset,
                          popset,
                          conflev=0.95,
                          basepop=100000,
                          timeinterval='years',
                          ci_method='dobson',
                          popset_popcol='_freq_',
                          debug=False):
    """
    Calculate stratified population rates

    summset     is a straified summary dataset of counts of events for
                the population-of-interest
    popset      is the stratified population counts for the
                population-of-interest
    """
    from rpy import r, get_default_mode, set_default_mode, BASIC_CONVERSION

    alpha = get_alpha(conflev)

    if ci_method not in ('dobson', 'ff'):
        raise Error('Only Dobson et al. (dobson) and Fay-Feuer (ff) '
                    'methods for confidence intervals currently '
                    'implemented')
    if not popset.has_column(popset_popcol):
        raise Error('Denominator population dataset %r does not have a '
                    '%r column' % (popset.label or popset.name, popset_popcol))

    st = time.time()
    r_mode = get_default_mode()
    try:
        set_default_mode(BASIC_CONVERSION)

        # We turn the summset into an Ncondcols-dimensional matrix
        summtab = CrossTab.from_summset(summset)

        # The population dataset must have at least as many dimensions as
        # summary dataset. Any additional axes are eliminated by summing.
        # any missing axes are created by replication.
        poptab = CrossTab.from_summset(popset, shaped_like=summtab)
        poptab.collapse_axes_not_in(summtab)
        poptab.replicate_axes(summtab)
        popfreq = poptab[popset_popcol].data.astype(Numeric.Float64)

        # Manufacture a CrossTab for the result
        result = summtab.empty_copy()

        basepop = float(basepop)

        for table, name, n_add, l_add in just_freq_tables(summtab):
            # avoid integer overflows...
            summfreq = table.data.astype(Numeric.Float64)

            strata_rate = summfreq / popfreq

            result.add_table('summfreq' + n_add,
                             data=summfreq,
                             label='Events' + l_add)
            result.add_table('popfreq' + n_add,
                             data=popfreq,
                             label='Person-' + timeinterval + ' at risk' +
                             l_add)
            result.add_table('sr' + n_add,
                             data=strata_rate * basepop,
                             label='Strata-specific Rate per ' +
                             '%d' % basepop + ' person-' + timeinterval +
                             l_add)

            if alpha is not None:
                # CIs for stratified rates
                summfreq_shape = summfreq.shape
                summfreq_flat = MA.ravel(summfreq)
                assert popfreq.shape == summfreq.shape
                popfreq_flat = MA.ravel(popfreq)

                sr_ll = Numeric.empty(len(summfreq_flat),
                                      typecode=Numeric.Float64)
                sr_ul = Numeric.empty(len(summfreq_flat),
                                      typecode=Numeric.Float64)
                sr_ll_mask = Numeric.zeros(len(summfreq_flat),
                                           typecode=Numeric.Int8)
                sr_ul_mask = Numeric.zeros(len(summfreq_flat),
                                           typecode=Numeric.Int8)

                for i, v in enumerate(summfreq_flat):
                    try:
                        if v == 0:
                            sr_ll[i] = 0.0
                        else:
                            sr_ll[i] = (
                                (r.qchisq(alpha / 2., df=2.0 * v) / 2.0) /
                                popfreq_flat[i]) * basepop
                        sr_ul[i] = (
                            (r.qchisq(1. - alpha / 2., df=2.0 *
                                      (v + 1)) / 2.0) /
                            popfreq_flat[i]) * basepop
                    except:
                        sr_ll[i] = 0.0
                        sr_ul[i] = 0.0
                        sr_ll_mask[i] = 1
                        sr_ul_mask[i] = 1

                sr_ll = MA.array(sr_ll, mask=sr_ll_mask, typecode=MA.Float64)
                sr_ul = MA.array(sr_ul, mask=sr_ul_mask, typecode=MA.Float64)
                sr_ll.shape = summfreq_shape
                sr_ul.shape = summfreq_shape

                sr_base = 'Stratified rate %s%%' % (100.0 * conflev)
                result.add_table('sr_ll' + n_add,
                                 data=sr_ll,
                                 label=sr_base + ' lower confidence limit ' +
                                 l_add)
                result.add_table('sr_ul' + n_add,
                                 data=sr_ul,
                                 label=sr_base + ' upper confidence limit ' +
                                 l_add)

    finally:
        set_default_mode(r_mode)
    soom.info('calc_stratified_rates took %.03f' % (time.time() - st))
    name = 'stratified_rates_' + summset.name
    label = 'Stratified Rates for ' + (summset.label or summset.name)
    if conflev:
        label += ' (%g%% conf. limits)' % (conflev * 100)
    if debug:
        global vars
        vars = Vars(locals())
    return result.to_summset(name, label=label)
Example #23
0
 def testeq (self):
     "Test the eq function"
     assert eq(3,3)
     assert not eq(3,4)
     assert eq([3,3,3],3)
     assert eq([2.,3.,4.], MA.array([2.,3.,4.]))
Example #24
0
def calc_indirectly_std_ratios(summset,
                               popset,
                               stdsummset,
                               stdpopset,
                               conflev=0.95,
                               baseratio=100,
                               timeinterval='years',
                               popset_popcol='_freq_',
                               stdpopset_popcol='_stdpop_',
                               ci_method='daly',
                               debug=False):
    """
    Calculate Indirectly Standardised Population Event Ratios

    - summset is a summary dataset of counts of events for the
      population-of-interest being compared to the standard population.
    - popset is the stratified population counts for the
      population-of-interest
    - stdsummset is a summary dataset of counts of events for the
      standard population
    - stdpopset is the stratified population counts for the standard
      population
    """
    from rpy import r, get_default_mode, set_default_mode, BASIC_CONVERSION

    alpha = get_alpha(conflev)

    if ci_method != 'daly':
        raise Error("Only Daly method for confidence intervals "
                    "currently implemented")
    if not popset.has_column(popset_popcol):
        raise Error('Denominator population dataset %r does not have a '
                    '%r column' % (popset.label or popset.name, popset_popcol))
    if not stdpopset.has_column(stdpopset_popcol):
        raise Error('Standard population dataset %r does not have a '
                    '%r column' %
                    (stdpopset.label or stdpopset.name, stdpopset_popcol))

    st = time.time()
    r_mode = get_default_mode()
    try:
        set_default_mode(BASIC_CONVERSION)

        shape = shape_union(stdsummset, summset)

        summtab = CrossTab.from_summset(summset, shaped_like=shape)

        stdsummtab = CrossTab.from_summset(stdsummset, shaped_like=shape)

        stdpoptab = CrossTab.from_summset(stdpopset, shaped_like=shape)
        stdpoptab.collapse_axes_not_in(stdsummtab)

        stdsummtab.replicate_axes(shape)
        stdpoptab.replicate_axes(shape)

        poptab = CrossTab.from_summset(popset, shaped_like=shape)
        poptab.collapse_axes_not_in(shape)
        if poptab.get_shape() != stdsummtab.get_shape():
            raise Error(
                'Observed population does not have all the required columns')
        popfreq = poptab[popset_popcol].data.astype(MA.Float64)

        result = stdsummtab.empty_copy()
        result.add_table('popfreq',
                         data=popfreq,
                         label='Total person-' + timeinterval + ' at risk')

        expected_cols = []
        for table, name, n_add, l_add in just_freq_tables(stdsummtab):
            stdsummfreq = stdsummtab[name].data.astype(MA.Float64)
            stdpopfreq = stdpoptab[stdpopset_popcol].data.astype(MA.Float64)
            std_strata_rates = stdsummfreq / stdpopfreq
            strata_expected_freq = std_strata_rates * popfreq
            #            print stdsummfreq[0,0,0], stdpopfreq[0,0,0], popfreq[0,0,0]
            result.add_table('expected' + n_add,
                             data=strata_expected_freq,
                             label='Expected events' + l_add)
            expected_cols.append('expected' + n_add)

        result.collapse_axes_not_in(summtab)

        axis = 0
        baseratio = float(baseratio)

        for table, name, n_add, l_add in just_freq_tables(summtab):
            observed = table.data.astype(Numeric.Float64)
            result.add_table('observed' + n_add,
                             data=observed,
                             label='Observed events' + l_add)

            expected = result['expected' + n_add].data

            isr = observed / expected
            result.add_table('isr' + n_add,
                             data=isr * baseratio,
                             label='Indirectly Standardised Event Ratio')

            # Confidence Intervals
            if alpha is None or name != '_freq_':
                # Can only calculate confidence intervals on freq cols
                continue

            conflev_l = (1 - conflev) / 2.0
            conflev_u = (1 + conflev) / 2.0

            # get shape of observed
            observed_shape = observed.shape
            # flattened version
            observed_flat = MA.ravel(observed)

            # sanity check on shapes - should be the same!
            assert expected.shape == observed.shape

            # flattened version of expecetd
            expected_flat = MA.ravel(expected)

            # lists to hold results
            isr_ll = Numeric.empty(len(observed_flat),
                                   typecode=Numeric.Float64)
            isr_ul = Numeric.empty(len(observed_flat),
                                   typecode=Numeric.Float64)
            isr_ll_mask = Numeric.zeros(len(observed_flat),
                                        typecode=Numeric.Int8)
            isr_ul_mask = Numeric.zeros(len(observed_flat),
                                        typecode=Numeric.Int8)

            obs_mask = MA.getmaskarray(observed_flat)
            exp_mask = MA.getmaskarray(expected_flat)

            for i, v in enumerate(observed_flat):
                if obs_mask[i] or exp_mask[i]:
                    isr_ll[i] = 0.0
                    isr_ul[i] = 0.0
                    isr_ll_mask[i] = 1
                    isr_ul_mask[i] = 1
                else:
                    if v == 0.:
                        obs_ll = 0.0
                        obs_ul = -math.log(1 - conflev)
                    else:
                        obs_ll = r.qgamma(conflev_l, v, scale=1.)
                        obs_ul = r.qgamma(conflev_u, v + 1., scale=1.)
                    isr_ll[i] = obs_ll / expected_flat[i]
                    isr_ul[i] = obs_ul / expected_flat[i]

            isr_ll = MA.array(isr_ll, typecode=MA.Float64, mask=isr_ll_mask)
            isr_ul = MA.array(isr_ul, typecode=MA.Float64, mask=isr_ul_mask)
            isr_ll.shape = observed_shape
            isr_ul.shape = observed_shape

            isr_base = 'ISR %d%%' % (100.0 * conflev)
            result.add_table('isr_ll' + n_add,
                             data=isr_ll * baseratio,
                             label=isr_base + ' lower confidence limit' +
                             l_add)
            result.add_table('isr_ul' + n_add,
                             data=isr_ul * baseratio,
                             label=isr_base + ' upper confidence limit' +
                             l_add)
    finally:
        set_default_mode(r_mode)
    soom.info('calc_indirectly_std_ratios took %.03f' % (time.time() - st))
    name = 'indir_std_ratios_' + summset.name
    label = 'Indirectly Standardised Ratios for ' + (summset.label
                                                     or summset.name)
    if conflev:
        label += ' (%g%% conf. limits)' % (conflev * 100)

    if debug:
        global vars
        vars = Vars(locals())
    return result.to_summset(name, label=label)
Example #25
0
def calc_directly_std_rates(summset,
                            popset,
                            stdpopset=None,
                            conflev=0.95,
                            basepop=100000,
                            timeinterval='years',
                            ci_method='dobson',
                            popset_popcol='_freq_',
                            stdpopset_popcol='_stdpop_',
                            axis=0,
                            debug=False):
    """
    Calculate Directly Standardised Population Rates

    summset     is a summary dataset of counts of events for the
                population-of-interest being compared to the standard
                population.  
    popset      is the stratified population counts for the
                population-of-interest
    stdpopset   is the stratified population counts for the standard
                population
    """
    from rpy import r, get_default_mode, set_default_mode, BASIC_CONVERSION

    alpha = get_alpha(conflev)

    if ci_method not in ('dobson', 'ff'):
        raise Error('Only Dobson et al. (dobson) and Fay-Feuer (ff) methods '
                    'for confidence intervals currently implemented')
    if not popset.has_column(popset_popcol):
        raise Error('Denominator population dataset %r does not have a '
                    '%r column' % (popset.label or popset.name, popset_popcol))
    if stdpopset is not None and not stdpopset.has_column(stdpopset_popcol):
        raise Error('Standard population dataset %r does not have a '
                    '%r column' %
                    (stdpopset.label or stdpopset.name, stdpopset_popcol))

    st = time.time()
    r_mode = get_default_mode()
    try:
        set_default_mode(BASIC_CONVERSION)

        # We turn the summset into an Ncondcols-dimensional matrix
        summtab = CrossTab.from_summset(summset)

        if stdpopset is not None:
            # Then attempt to do the same to the stdpop data, summing any
            # axes not required and replicate any missing until we have an
            # array the same shape as the summtab array.
            stdtab = CrossTab.from_summset(stdpopset, shaped_like=summtab)
            stdtab.collapse_axes_not_in(summtab)
            stdtab.replicate_axes(summtab)
            stdpop = stdtab[stdpopset_popcol].data.astype(Numeric.Float64)

        # The population dataset must have at least as many dimensions as
        # summary dataset. Any additional axes are eliminated by summing.
        # any missing axes are created by replication.
        poptab = CrossTab.from_summset(popset, shaped_like=summtab)
        poptab.collapse_axes_not_in(summtab)
        poptab.replicate_axes(summtab)
        popfreq = poptab[popset_popcol].data.astype(Numeric.Float64)

        # Manufacture a CrossTab for the result, with one less axis (the first)
        result = summtab.empty_copy()
        del result.axes[axis]

        if stdpopset is not None:
            sum_stdpop = sumaxis(stdpop)
            stdwgts = stdpop / sum_stdpop
            stdpop_sq = stdpop**2
            sum_stdpop_sq = sum_stdpop**2
            ffwi = stdwgts / popfreq
            ffwm = MA.maximum(MA.ravel(ffwi))

        basepop = float(basepop)

        for table, name, n_add, l_add in just_freq_tables(summtab):

            # avoid integer overflows...
            summfreq = table.data.astype(Numeric.Float64)
            strata_rate = summfreq / popfreq

            result.add_table('summfreq' + n_add,
                             data=sumaxis(summfreq, axis),
                             label='Total events' + l_add)
            result.add_table('popfreq' + n_add,
                             data=sumaxis(popfreq, axis),
                             label='Total person-' + timeinterval +
                             ' at risk' + l_add)

            if stdpopset is not None:
                std_strata_summfreq = summfreq * Numeric.where(
                    MA.getmask(stdwgts), 0., 1.)
                wgtrate = strata_rate * stdwgts
                result.add_table('std_strata_summfreq' + n_add,
                                 data=sumaxis(std_strata_summfreq, axis),
                                 label="Total events in standard strata" +
                                 l_add)

            # Crude rate
            cr = sumaxis(summfreq, axis) / sumaxis(popfreq, axis) * basepop
            result.add_table('cr' + n_add,
                             data=cr,
                             label='Crude Rate per ' + '%d' % basepop +
                             ' person-' + timeinterval + l_add)

            if alpha is not None:
                # CIs for crude rate
                count = sumaxis(summfreq, axis)
                count_shape = count.shape
                count_flat = MA.ravel(count)
                totpop = sumaxis(popfreq, axis)
                assert totpop.shape == count.shape
                totpop_flat = MA.ravel(totpop)

                cr_ll = Numeric.empty(len(count_flat),
                                      typecode=Numeric.Float64)
                cr_ul = Numeric.empty(len(count_flat),
                                      typecode=Numeric.Float64)
                cr_ll_mask = Numeric.zeros(len(count_flat),
                                           typecode=Numeric.Int8)
                cr_ul_mask = Numeric.zeros(len(count_flat),
                                           typecode=Numeric.Int8)

                for i, v in enumerate(count_flat):
                    try:
                        if v == 0:
                            cr_ll[i] = 0.0
                        else:
                            cr_ll[i] = (
                                (r.qchisq(alpha / 2., df=2.0 * v) / 2.0) /
                                totpop_flat[i]) * basepop
                        cr_ul[i] = (
                            (r.qchisq(1. - alpha / 2., df=2.0 *
                                      (v + 1)) / 2.0) /
                            totpop_flat[i]) * basepop
                    except:
                        cr_ll[i] = 0.0
                        cr_ul[i] = 0.0
                        cr_ll_mask[i] = 1
                        cr_ul_mask[i] = 1

                cr_ll = MA.array(cr_ll, mask=cr_ll_mask, typecode=MA.Float64)
                cr_ul = MA.array(cr_ul, mask=cr_ul_mask, typecode=MA.Float64)
                cr_ll.shape = count_shape
                cr_ul.shape = count_shape

                cr_base = 'Crude rate %d%%' % (100.0 * conflev)
                result.add_table('cr_ll' + n_add,
                                 data=cr_ll,
                                 label=cr_base + ' lower confidence limit ' +
                                 l_add)
                result.add_table('cr_ul' + n_add,
                                 data=cr_ul,
                                 label=cr_base + ' upper confidence limit ' +
                                 l_add)

            if stdpopset is not None:

                # Directly Standardised Rate
                dsr = sumaxis(wgtrate, axis)
                result.add_table('dsr' + n_add,
                                 data=dsr * basepop,
                                 label='Directly Standardised Rate per ' +
                                 '%d' % basepop + ' person-' + timeinterval +
                                 l_add)

                # Confidence Intervals
                if alpha is None or name != '_freq_':
                    # Can only calculate confidence intervals on freq cols
                    continue

                if ci_method == 'dobson':
                    # Dobson et al method
                    # see: Dobson A, Kuulasmaa K, Eberle E, Schere J. Confidence intervals for weighted sums
                    # of Poisson parameters, Statistics in Medicine, Vol. 10, 1991, pp. 457-62.
                    # se_wgtrate = summfreq*((stdwgts/(popfreq/basepop))**2)
                    se_wgtrate = summfreq * ((stdwgts / (popfreq))**2)
                    stderr = stdpop_sq * strata_rate * (1.0 - strata_rate)
                    se_rate = sumaxis(se_wgtrate, axis)
                    sumsei = sumaxis(stderr, axis)
                    total_freq = sumaxis(std_strata_summfreq, axis)
                    # get shape of total_freq
                    total_freq_shape = total_freq.shape

                    total_freq_flat = MA.ravel(total_freq)

                    # flat arrays to hold results and associated masks
                    l_lam = Numeric.empty(len(total_freq_flat),
                                          typecode=Numeric.Float64)
                    u_lam = Numeric.empty(len(total_freq_flat),
                                          typecode=Numeric.Float64)
                    l_lam_mask = Numeric.zeros(len(total_freq_flat),
                                               typecode=Numeric.Int8)
                    u_lam_mask = Numeric.zeros(len(total_freq_flat),
                                               typecode=Numeric.Int8)

                    conflev_l = (1 - conflev) / 2.0
                    conflev_u = (1 + conflev) / 2.0

                    for i, v in enumerate(total_freq_flat):
                        try:
                            if v == 0.:
                                u_lam[i] = -math.log(1 - conflev)
                                l_lam[i] = 0.0
                            else:
                                l_lam[i] = r.qgamma(conflev_l, v, scale=1.)
                                u_lam[i] = r.qgamma(conflev_u,
                                                    v + 1.,
                                                    scale=1.)
                        except:
                            l_lam[i] = 0.0
                            u_lam[i] = 0.0
                            l_lam_mask[i] = 1
                            u_lam_mask[i] = 1

                    l_lam = MA.array(l_lam,
                                     mask=l_lam_mask,
                                     typecode=MA.Float64)
                    u_lam = MA.array(u_lam,
                                     mask=u_lam_mask,
                                     typecode=MA.Float64)
                    l_lam.shape = total_freq_shape
                    u_lam.shape = total_freq_shape
                    dsr_ll = dsr + (((se_rate / total_freq)**0.5) *
                                    (l_lam - total_freq))
                    dsr_ul = dsr + (((se_rate / total_freq)**0.5) *
                                    (u_lam - total_freq))

                elif ci_method == 'ff':
                    # Fay and Feuer method
                    # see: Fay MP, Feuer EJ. Confidence intervals for directly standardized rates:
                    # a method based on the gamma distribution. Statistics in Medicine 1997 Apr 15;16(7):791-801.

                    ffvari = summfreq * ffwi**2.0
                    ffvar = sumaxis(ffvari, axis)

                    dsr_flat = Numeric.ravel(MA.filled(dsr, 0))
                    dsr_shape = dsr.shape

                    ffvar_flat = Numeric.ravel(MA.filled(ffvar, 0))

                    # flat arrays to hold results and associated masks
                    dsr_ll = Numeric.empty(len(dsr_flat),
                                           typecode=Numeric.Float64)
                    dsr_ul = Numeric.empty(len(dsr_flat),
                                           typecode=Numeric.Float64)
                    dsr_ll_mask = Numeric.zeros(len(dsr_flat),
                                                typecode=Numeric.Int8)
                    dsr_ul_mask = Numeric.zeros(len(dsr_flat),
                                                typecode=Numeric.Int8)

                    for i, y in enumerate(dsr_flat):
                        try:
                            dsr_ll[i] = (ffvar_flat[i] / (2.0 * y)) * r.qchisq(
                                alpha / 2., df=(2.0 * (y**2.) / ffvar_flat[i]))
                            dsr_ul[i] = ((ffvar_flat[i] + (ffwm**2.0)) /
                                         (2.0 * (y + ffwm))) * r.qchisq(
                                             1. - alpha / 2.,
                                             df=((2.0 * ((y + ffwm)**2.0)) /
                                                 (ffvar_flat[i] + ffwm**2.0)))
                        except:
                            dsr_ll[i] = 0.0
                            dsr_ul[i] = 0.0
                            dsr_ll_mask[i] = 1
                            dsr_ul_mask[i] = 1
                    dsr_ll = MA.array(dsr_ll,
                                      mask=dsr_ll_mask,
                                      typecode=MA.Float64)
                    dsr_ul = MA.array(dsr_ul,
                                      mask=dsr_ul_mask,
                                      typecode=MA.Float64)
                    dsr_ll.shape = dsr_shape
                    dsr_ul.shape = dsr_shape

                result.add_table('dsr_ll' + n_add,
                                 data=dsr_ll * basepop,
                                 label='DSR ' + '%d' % (100.0 * conflev) +
                                 '% lower confidence limit' + l_add)
                result.add_table('dsr_ul' + n_add,
                                 data=dsr_ul * basepop,
                                 label='DSR ' + '%d' % (100.0 * conflev) +
                                 '% upper confidence limit' + l_add)

    finally:
        set_default_mode(r_mode)
    soom.info('calc_directly_std_rates took %.03f' % (time.time() - st))
    if stdpopset is not None:
        name = 'dir_std_rates_' + summset.name
        label = 'Directly Standardised Rates for ' + (summset.label
                                                      or summset.name)
    else:
        name = 'crude_rates_' + summset.name
        label = 'Crude Rates for ' + (summset.label or summset.name)
    if conflev:
        label += ' (%g%% conf. limits)' % (conflev * 100)
    if debug:
        global vars
        vars = Vars(locals())
    return result.to_summset(name, label=label)
Example #26
0
def fully_masked(shape, typecode='i'):
    return MA.array(Numeric.empty(shape, typecode=typecode),
                    mask=Numeric.ones(shape, typecode='b', savespace=1))
Example #27
0
#   the State of New South Wales, Australia.
#
#   Copyright (C) 2004,2005 Health Administration Corporation.
#   All Rights Reserved.
#
# $Id: matest.py 2626 2007-03-09 04:35:54Z andrewm $
# $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/soomext/matest.py,v $

import MA
import Numeric
from soomarray import ArrayDict

ad = ArrayDict('blah.dat', 'r+')
a = Numeric.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], Numeric.Int)
m = Numeric.array([0, 0, 0, 0, 0, 1, 0, 0, 1, 0], Numeric.Int)
ad['matest1'] = MA.array(a, mask=m)
del ad

ad = ArrayDict('blah.dat')
matest = ad['matest1']

print "matest: ", matest
print "sum of matest: ", MA.sum(matest)
print "length of matest: ", len(matest)
print "count of matest: ", MA.count(matest)
print "average of matest: ", MA.average(matest)
print "minimum of matest: ", MA.minimum(matest)
print "maximum of matest: ", MA.maximum(matest)

del ad
Example #28
0
 def setUp (self):
     self.a = .01 + MA.arange(6) / 8.0 
     self.m = MA.array([[1,2,3],[11,12,13]]) / 16.0
Example #29
0
import cdms, time, MA

f = cdms.open('test0.nc', 'a')

t = f.variables['air_temperature']

print time.time()
x = t.getValue()
for i in range(t.shape[0]):
    for j in range(t.shape[1]):
        x[i, j, :] += 2.
t[:, :, :] = x
print time.time()

for i in range(t.shape[0]):
    for j in range(t.shape[1]):
        t[i, j, :] = MA.array(t[i, j, :] + 2., 'f')

print time.time()

f.close()
Example #30
0
	def __init__(self) :
		self.car=Ma.array([-1.,-1.,-1.,-1.,-1.,-1.],mask=[1,1,1,1,1,1],fill_value=-1e3)
		self.status="indefini"
		self.header=["H","na","T","HR","Tr","Th"]
Example #31
0
 def setUp (self):
     self.a = MA.arange(6)
     self.m = MA.array([[1,2,3],[11,12,13]])
Example #32
0
# How to use numpy with 'None' value in Python?
import MA
a = MA.array([1, 2, None], mask = [0, 0, 1])
print "average =", MA.average(a)