def testMultipleDimensionCreation (self): 'Test creation of multidimensional arrays.' alist = [[2,3],[4,5]] x = MA.array(alist) assert x.shape == (2,2) y = MA.array([x, x + 1, x + 2]) assert y.shape == (3,2,2)
def testDiagonal (self): "Test the diagonal function." b=MA.array([[1,2,3,4],[5,6,7,8]]*2) assert eq(MA.diagonal(b), [1,6,3,8]) assert eq(MA.diagonal(b, -1), [5,2,7]) c = MA.array([b,b]) assert eq(MA.diagonal(c,1), [[2,7,4], [2,7,4]])
def Init_diag(Ain,Aout,fignum): global Ptot,RatioM,Tmincrit,Cpa,Cpv,Lv,namincrit,Hmincrit,Tmaxcrit,Tminopt global Ratiofig,fig_hauteur,fig_longueur,RatioAxes,lineprops,lineprops_HR,x1,y1,x3,y3 Ptot=101325.0 MO=15.9994 MN=14.0067 MAr=39.948 Mair=0.78084*2*MN+0.20946*2*MO+0.009340*MAr MH=1.00794 MH2O=2*MH+MO RatioM=MH2O/Mair Tmincrit=-30.0 Cpa=Cpas(C2K(Tmincrit)) Cpv=Cpvap(C2K(0.1)) Lv=ChLv(C2K(0.1)) namincrit=RatioM*Pvap(Tmincrit)/(Ptot-Pvap(Tmincrit)) Hmincrit=Cpa*Tmincrit+namincrit*(Lv+Cpv*Tmincrit) Tmaxcrit=260.0 Tminopt=-30.0 Ratiofig=21.0/29.7 fig_hauteur=6 #inches fig_longueur=fig_hauteur/Ratiofig figure(fignum,figsize=(fig_longueur,fig_hauteur)) lineprops = dict(linewidth=0.5, color='gray', linestyle='-',antialiased=True) lineprops_HR = dict(linewidth=1, color='blue', linestyle='-',antialiased=True) Tinit=20.0 Cpa=Cpas(C2K(Tinit)) Cpv=Cpvap(C2K(Tinit)) Lv=ChLv(C2K(Tinit)) maskin=Ma.getmask(Ain.car) maskout=Ma.getmask(Aout.car) # Il s'agit ici de fixer les valeurs moyennes de Cpa,Cpv et Lv # en faisant le calcul de l'air moyen (i.e. au centre du diagramme # le calcul s'arrête à 1e-6 de différence sur Cpa,Cpv et Lv crit=True while crit: Ain.car=Ma.array(Ain.car,copy=0,mask=maskin) Aout.car=Ma.array(Aout.car,copy=0,mask=maskout) Fill_Air(Ain) Fill_Air(Aout) Amoy=air() Amoy.definir(["H","na"],[mean([Ain.car[0],Aout.car[0]]), mean([Ain.car[1],Aout.car[1]])]) Fill_Air(Amoy) Tmoy=Amoy.car[2] Cpa_new=Cpas(C2K(Tmoy)) Cpv_new=Cpvap(C2K(Tmoy)) Lv_new=ChLv(C2K(Tmoy)) crit=((Cpa-Cpa_new)**2+(Cpv-Cpv_new)**2+(Lv-Lv_new)**2)>1e-6 Cpa=Cpa_new Cpv=Cpv_new Lv=Lv_new LH,Lna,LT,LHR,Lair=Return_levels_HnaTHR(Ain,Aout) x1,y1=Transf_xy(Lna[0],Lair[0].car[0]) x3,y3=Transf_xy(Lna[-1],Lair[2].car[0]) RatioAxes=(y3-y1)/(x3-x1) return LH,Lna,LT,LHR,Lair
def testLogical (self): "Test logical_and, logical_or, sometrue, alltrue" x = MA.array([1,1,0,0]) y = MA.array([1,0,1,0]) assert eq(MA.logical_and (x,y), [1,0,0,0]) assert eq(MA.logical_or (x,y), [1,1,1,0]) assert MA.sometrue(x) assert not MA.sometrue(MA.zeros((3,))) assert MA.alltrue(MA.ones((3,))) assert not MA.alltrue(x)
def testTypecodeSpecifying(self): 'Test construction using the type codes.' from Precision import typecodes thetypes = typecodes['Integer'] \ + typecodes['UnsignedInteger'] \ + typecodes['Float'] \ + typecodes['Complex'] for t in thetypes: x = MA.array([1,2,3], t) assert x.typecode() == t x = MA.array(['hi', 'hoo'], 'c') assert x.typecode() == 'c'
def testSpacesaver (self): "Test the spacesaver property (Travis Oliphant)" # Test of savespace property: Travis Oliphant a = MA.array([1,2,3,4],savespace=1) assert a.spacesaver() self.assertEqual(a.typecode(), 's') b = MA.array(a,'f') self.assertEqual(b.typecode(), 'f') assert b.spacesaver() a.savespace(0) assert not a.spacesaver() assert b.spacesaver() d = 4 * b assert b.typecode() == d.typecode() self.failUnlessRaises, TypeError, MA.arccos, (b/10.0)
def from_summset(cls, ds, shaped_like=None): self = cls(ds.name) st = time.time() cols = ds.get_columns() if shaped_like is not None: for axis in xtab_axes(shaped_like): try: col = ds[axis.name] except KeyError: pass else: self.axes.append(CrossTabAxis.from_col(col, axis.values)) cols.remove(col) for col in cols: if col.is_discrete() and not col.name.startswith('_'): self.axes.append(CrossTabAxis.from_col(col)) if not self.axes: raise Error('dataset %r must have at least one discrete column' % (ds.name,)) indices = [axis.indices.filled() for axis in self.axes] masks = [axis.indices.mask() for axis in self.axes] map = MA.transpose(MA.array(indices, mask=masks)) shape = self.get_shape() for col in ds.get_columns(): if col.is_scalar(): self.add_table(col.name, data=self.from_vector(map, col.data, shape), label=col.label) elapsed = time.time() - st soom.info('%r crosstab generation took %.3f, %.1f rows/s' % (self.name, elapsed, len(map) / elapsed)) return self
def accumulate24Hourly(data): """Returns 12-hourly data accumulated to 24-hours.""" newTimeValues=[] taxis=data.getTime() tunits=data.units print len(data.getTime()) newarray=[] for i in range((tlen/2)): p1=data(time=slice(i,i+1)) p2=data(time=slice(i+1,i+2)) accum=p1+p2 newarray.append(accum) newTimeValues.append(p2.getTime()[0]) array=MA.concatenate(newarray) array=MA.array(array, 'f', fill_value=data.getMissing()) axes=data.getAxisList() newTimeAxis=cdms.createAxis(newTimeValues) newTimeAxis.units=tunits newTimeAxis.designateTime() newTimeAxis.id=newTimeAxis.long_name=newTimeAxis.title="time" newaxes=[newTimeAxis]+axes[1:] var=cdms.createVariable(array, axes=newaxes, id=data.id) for att in ("units", "long_name"): setattr(var, att, getattr(data, att)) return var
def testIndexing (self): 'Test indexing operations.' x = MA.array([0,1,2,3,4,5]) for i in range(len(x)): assert i == x[i] x[2] = 20 assert eq(x, x[...]) w = MA.array([None]) assert w.typecode() == MA.PyObject assert w[0] is None assert isinstance(x[2], types.IntType) assert x[2] == 20 x = MA.array([0,1,2,3,4,5,6]) assert eq (x[4:1:-1], [4,3,2]) assert eq(x[4:1:-2], [4,2]) assert eq(x[::-1], [6, 5,4,3,2,1,0]) assert eq(x[2:-1], [2,3,4,5]) m = MA.array([[1,2,3],[11,12,13]]) assert m[0,2] == 3 assert isinstance(m[0,2], types.IntType) assert eq(m[...,1], [2,12]) assert eq(MA.arange(6)[..., MA.NewAxis], [[0],[1],[2],[3],[4],[5]]) x = MA.array([1,2,3]) y = MA.array(x) x[0] == 66 assert y[0] != 66 b=MA.array([[1,2,3,4],[5,6,7,8]]*2) # assert b[1:1].shape == (0,4) # assert b[1:1, :].shape == (0,4) # assert b[10:].shape == (0,4) assert eq(b[2:10], [[1,2,3,4],[5,6,7,8]]) assert eq(b[2:10, ...], [[1,2,3,4],[5,6,7,8]])
def testSort (self): "Test sort, argsort, argmax, argmin" s = (3,2,5,1,4,0) sm = [s, MA.array(s)[::-1]] se = MA.array(s)[0:0] assert eq(MA.sort(s), self.a) assert len(MA.sort(se)) == 0 assert eq(MA.argsort(s), [5,3,1,0,4,2]) assert len(MA.argsort(se)) == 0 assert eq(MA.sort(sm, axis = -1), [[0,1,2,3,4,5],[0,1,2,3,4,5]]) assert eq(MA.sort(sm, axis = 0), [[0,2,1,1,2,0],[3,4,5,5,4,3]]) assert MA.argmax(s) == 2 assert MA.argmin(s) == 5 assert eq(MA.argmax(sm, axis=-1), [2,3]) assert eq(MA.argmax(sm, axis=1), [2,3]) assert eq(MA.argmax(sm, axis=0), [0,1,0,1,0,1]) assert eq(MA.argmin(sm, axis=-1), [5,0]) assert eq(MA.argmin(sm, axis=1), [5,0])
def testOperators (self): "Test the operators +, -, *, /, %, ^, &, |" x = MA.array([1.,2.,3.,4.,5.,6.]) y = MA.array([-1.,2.,0.,2.,-1, 3.]) assert eq(x + y, [0., 4., 3., 6., 4., 9.]) assert eq(x - y, [2., 0., 3., 2., 6., 3.]) assert eq(x * y, [-1., 4., 0., 8., -5., 18.]) assert eq(y / x, [-1, 1., 0., .5, -.2, .5]) assert eq(x**2, [1., 4., 9., 16., 25., 36.]) xc = MA.array([1.,2.,3.,4.,5.,6.]) xc += y assert eq(xc, x + y) xc = MA.array([1.,2.,3.,4.,5.,6.]) xc -= y assert eq(xc, x - y) yc = MA.array(y, copy=1) yc /= x assert eq ( yc, y / x) xc = MA.array([1.,2.,3.,4.,5.,6.]) y1 = [-1.,2.,0.,2.,-1, 3.] xc *= y1 assert eq(xc, x * y1) assert eq (x + y, MA.add(x, y)) assert eq (x - y, MA.subtract(x, y)) assert eq (x * y, MA.multiply(x, y)) assert eq (y / x, MA.divide (y, x)) d = x / y assert d[2] is MA.masked assert (MA.array(1) / MA.array(0)) is MA.masked assert eq (x**2, MA.power(x,2)) x = MA.array([1,2]) y = MA.zeros((2,)) assert eq (x%x, y) assert eq (MA.remainder(x,x), y) assert eq (x <<1, [2,4]) assert eq (MA.left_shift(x,1), [2,4]) assert eq (x >>1, [0,1]) assert eq (MA.right_shift(x,1), [0,1]) assert eq (x & 2, [0,2]) assert eq (MA.bitwise_and (x, 2), [0,2]) assert eq (x | 1, [1,3]) assert eq (MA.bitwise_or (x, 1), [1,3]) assert eq (x ^ 2, [3,0]) assert eq (MA.bitwise_xor(x,2), [3,0]) # x = divmod(MA.array([2,1]), MA.array([1,2])) # assert eq (x[0], [2,0]) # assert eq (x[1], [0,1]) assert (4L*MA.arange(3)).typecode() == MA.PyObject
def testOnes(self): "Test ones" y = MA.ones((2,3)) assert y.shape == (2,3) assert y.typecode() == MA.Int assert eq(y.flat, 1) z = MA.ones((2,3), MA.Float) assert z.shape == (2,3) assert eq(y, z) w = MA.ones((2,3), MA.Int16) assert eq(w, MA.array([[1,1,1],[1,1,1]],'s')) self.failUnlessRaises(ValueError, MA.ones, (-5,))
def __getitem__(self, key): index = self.dict[key] blob = self.store[index] if blob.type == BLOB_ARRAY: return MmapArray(blob) elif blob.type == BLOB_FILLED: data = MmapArray(blob) blob = self.store[blob.other] mask = MmapArray(blob) return MA.array(data, mask=mask) elif blob.type == BLOB_STRING: return blob.as_str() else: raise Error('bad BLOB type %s in index' % blob.type)
def copyTest (self): "Test how MA works with the copy module." import copy x = MA.array([1,2,3]) y = [1, x, 3] c1 = copy.copy(x) assert MA.allclose(x,c1) x[1] = 4 assert not MA.allclose(x,c1) c2 = copy.copy(y) assert id(c2[1]) == id(x) c3 = copy.deepcopy(y) assert id(c3[1]) != id(x) assert MA.allclose(c3[1], x)
def __getitem__(self, key): index = self.dict[key] blob = self.store[index] if blob.type == BLOB_ARRAY: return MmapArray(blob) elif blob.type == BLOB_FILLED: data = MmapArray(blob) blob = self.store[blob.other] mask = MmapArray(blob) return MA.array(data, mask = mask) elif blob.type == BLOB_STRING: return blob.as_str() else: raise Error('bad BLOB type %s in index' % blob.type)
def testReductions (self): "Tests of reduce attribute." a = MA.arange(6) m = MA.array([[1,2,3],[11,12,13]]) assert MA.add.reduce(a) == 15 assert MA.multiply.reduce(m.shape) == len(m.flat) assert eq(MA.add.reduce (m, 0), [12,14,16]) assert eq(MA.add.reduce (m, -1), [6,36]) assert eq(MA.multiply.reduce (m, 0), [11,24,39]) assert eq(MA.multiply.reduce (m, -1), [6,11*12*13]) assert MA.add.reduce([1]) == 1 assert MA.add.reduce([]) == 0 assert MA.multiply.reduce([]) == 1 assert MA.minimum.reduce(a) == 0 assert MA.maximum.reduce(a) == 5
def store_data(self, data, mask, filename = None): if mask is None: data = Numeric.array(data, typecode=self.numeric_type) else: data = MA.array(data, typecode=self.numeric_type, mask=mask) if filename: try: os.unlink(filename) except OSError: pass data_blob = ArrayDict(filename, 'w+') data_blob['data'] = data del data_blob # this writes the data to disc - # we really need a .sync() method... return None # Flag for load on demand else: return data
def store_data(self, data, mask, filename=None): if mask is None: data = Numeric.array(data, typecode=self.numeric_type) else: data = MA.array(data, typecode=self.numeric_type, mask=mask) if filename: try: os.unlink(filename) except OSError: pass data_blob = ArrayDict(filename, 'w+') data_blob['data'] = data del data_blob # this writes the data to disc - # we really need a .sync() method... return None # Flag for load on demand else: return data
def testBasicConstruction (self): "Test of basic construction." alist = [1,2,3] x = MA.array(alist) assert len(x) == 3 assert x.typecode() == MA.Int assert x.spacesaver() == 0 assert x.shape == (3,) assert eq (x,alist) y = MA.array(x, copy=0) assert x.raw_data() is y.raw_data() y[2] = 9 assert x[2] == 9 z = MA.array(x, savespace = 1, typecode = MA.Float) assert z.typecode() == MA.Float assert z.spacesaver() == 1 x = MA.array([1,2,3.]) assert x.typecode() == MA.Float x = MA.array([1,'who', 3.], MA.PyObject) assert x.typecode() == MA.PyObject w = MA.array([1,2], MA.Int32) assert w.itemsize() == 4 assert w.iscontiguous() assert w.astype(MA.Float).typecode() == MA.Float
def to_numpy_float_array( values_list, missing_value=ValueError( "Found value interpreted as missing value and no missing_value was specified" ), mapping=ValueError( "Could not convert value to float and no mapping was specified"), mapping_start=1.0, is_missing=default_is_missing): """ Transforms a list of values into a numpy array of floats. Values that are not floats are handled automatically, according to the following policies: * missing_value specifies what to do if we encounter a value that we consider a missing value: If missing_value is a float, then its value is used If missing_value is None, then we'll return an apporpriately masked array (MA.array) If missing_value is not specified as a float nor None, it will get raised as an exception. Note that a value x is considered a missing value if it satisfies is_missing(x) (defaults to None or blank string or single dash '-'). * If the value x is not missing, an attempt will be made to convert it to float using float(x). This has the effect of converting strings representing float to that float, and of converting a bool to 1. or 0. * If the value is the string 'True' or 'False' it will similarly be changed to 1. or 0. * If all the above fails, mapping can be used to specify how to automatically handle the case: If mapping is a float, then its value will be used. If mapping is a dictionary, then it will be looked up to find the corresponding value to use and if it is not found, then new corresponding mapping will automatically be added starting at mapping_start (which will get incremented ny 1). If you don't want automatical adding of mappings you can specify mapping_start = None (in this case an exception will be raised if not present in the mapping) If mapping is not specified as a float or a dict it will get raised as an exception.B """ if type(missing_value) is int: missing_value = float(missing_value) if type(mapping) is int: mapping = float(mapping) vec = [] missing_pos = [] for i in xrange(len(values_list)): val = values_list[i] if is_missing(val): if type(missing_value) is float: val = missing_value elif missing_value is None: val = 0. missing_pos.append(1) else: raise missing_value else: try: val = float(val) except ValueError: if val == 'True': val = 1. elif val == 'False': val = 0. elif type(mapping) is float: val = mapping elif type(mapping) is dict: if val in mapping: val = mapping[val] elif mapping_start is None: raise ValueError("At position " + str(i) + " value " + str(val) + " not present in specified mapping.") else: mapping[val] = mapping_start val = mapping_start mapping_start += 1. else: # mapping is neither float nor dict: raise mapping # Now we have a val that should be a valid float vec.append(val) # now return a proper numpy.array or MA.array (if we wanted masking). if len(missing_pos) == 0: # return numpy.array return numpy.array(vec) else: # return masked array n = len(vec) mask = [0] * n for pos in missing_pos: mask[pos] = 1 return MA.array(vec, mask=mask)
def to_numpy_float_array(values_list, missing_value = ValueError("Found value interpreted as missing value and no missing_value was specified"), mapping = ValueError("Could not convert value to float and no mapping was specified"), mapping_start = 1.0, is_missing = default_is_missing ): """ Transforms a list of values into a numpy array of floats. Values that are not floats are handled automatically, according to the following policies: * missing_value specifies what to do if we encounter a value that we consider a missing value: If missing_value is a float, then its value is used If missing_value is None, then we'll return an apporpriately masked array (MA.array) If missing_value is not specified as a float nor None, it will get raised as an exception. Note that a value x is considered a missing value if it satisfies is_missing(x) (defaults to None or blank string or single dash '-'). * If the value x is not missing, an attempt will be made to convert it to float using float(x). This has the effect of converting strings representing float to that float, and of converting a bool to 1. or 0. * If the value is the string 'True' or 'False' it will similarly be changed to 1. or 0. * If all the above fails, mapping can be used to specify how to automatically handle the case: If mapping is a float, then its value will be used. If mapping is a dictionary, then it will be looked up to find the corresponding value to use and if it is not found, then new corresponding mapping will automatically be added starting at mapping_start (which will get incremented ny 1). If you don't want automatical adding of mappings you can specify mapping_start = None (in this case an exception will be raised if not present in the mapping) If mapping is not specified as a float or a dict it will get raised as an exception.B """ if type(missing_value) is int: missing_value = float(missing_value) if type(mapping) is int: mapping = float(mapping) vec = [] missing_pos = [] for i in xrange(len(values_list)): val = values_list[i] if is_missing(val): if type(missing_value) is float: val = missing_value elif missing_value is None: val = 0. missing_pos.append(1) else: raise missing_value else: try: val = float(val) except ValueError: if val=='True': val = 1. elif val=='False': val = 0. elif type(mapping) is float: val = mapping elif type(mapping) is dict: if val in mapping: val = mapping[val] elif mapping_start is None: raise ValueError("At position "+str(i)+" value "+str(val)+" not present in specified mapping.") else: mapping[val] = mapping_start val = mapping_start mapping_start += 1. else: # mapping is neither float nor dict: raise mapping # Now we have a val that should be a valid float vec.append(val) # now return a proper numpy.array or MA.array (if we wanted masking). if len(missing_pos)==0: # return numpy.array return numpy.array(vec) else: # return masked array n = len(vec) mask = [0]*n for pos in missing_pos: mask[pos] = 1 return MA.array(vec, mask=mask)
def calc_stratified_rates(summset, popset, conflev=0.95, basepop=100000, timeinterval='years', ci_method='dobson', popset_popcol='_freq_', debug=False): """ Calculate stratified population rates summset is a straified summary dataset of counts of events for the population-of-interest popset is the stratified population counts for the population-of-interest """ from rpy import r, get_default_mode, set_default_mode, BASIC_CONVERSION alpha = get_alpha(conflev) if ci_method not in ('dobson', 'ff'): raise Error('Only Dobson et al. (dobson) and Fay-Feuer (ff) ' 'methods for confidence intervals currently ' 'implemented') if not popset.has_column(popset_popcol): raise Error('Denominator population dataset %r does not have a ' '%r column' % (popset.label or popset.name, popset_popcol)) st = time.time() r_mode = get_default_mode() try: set_default_mode(BASIC_CONVERSION) # We turn the summset into an Ncondcols-dimensional matrix summtab = CrossTab.from_summset(summset) # The population dataset must have at least as many dimensions as # summary dataset. Any additional axes are eliminated by summing. # any missing axes are created by replication. poptab = CrossTab.from_summset(popset, shaped_like=summtab) poptab.collapse_axes_not_in(summtab) poptab.replicate_axes(summtab) popfreq = poptab[popset_popcol].data.astype(Numeric.Float64) # Manufacture a CrossTab for the result result = summtab.empty_copy() basepop = float(basepop) for table, name, n_add, l_add in just_freq_tables(summtab): # avoid integer overflows... summfreq = table.data.astype(Numeric.Float64) strata_rate = summfreq / popfreq result.add_table('summfreq' + n_add, data=summfreq, label='Events' + l_add) result.add_table('popfreq' + n_add, data=popfreq, label='Person-' + timeinterval + ' at risk' + l_add) result.add_table('sr' + n_add, data=strata_rate * basepop, label='Strata-specific Rate per ' + '%d' % basepop + ' person-' + timeinterval + l_add) if alpha is not None: # CIs for stratified rates summfreq_shape = summfreq.shape summfreq_flat = MA.ravel(summfreq) assert popfreq.shape == summfreq.shape popfreq_flat = MA.ravel(popfreq) sr_ll = Numeric.empty(len(summfreq_flat), typecode=Numeric.Float64) sr_ul = Numeric.empty(len(summfreq_flat), typecode=Numeric.Float64) sr_ll_mask = Numeric.zeros(len(summfreq_flat), typecode=Numeric.Int8) sr_ul_mask = Numeric.zeros(len(summfreq_flat), typecode=Numeric.Int8) for i, v in enumerate(summfreq_flat): try: if v == 0: sr_ll[i] = 0.0 else: sr_ll[i] = ( (r.qchisq(alpha / 2., df=2.0 * v) / 2.0) / popfreq_flat[i]) * basepop sr_ul[i] = ( (r.qchisq(1. - alpha / 2., df=2.0 * (v + 1)) / 2.0) / popfreq_flat[i]) * basepop except: sr_ll[i] = 0.0 sr_ul[i] = 0.0 sr_ll_mask[i] = 1 sr_ul_mask[i] = 1 sr_ll = MA.array(sr_ll, mask=sr_ll_mask, typecode=MA.Float64) sr_ul = MA.array(sr_ul, mask=sr_ul_mask, typecode=MA.Float64) sr_ll.shape = summfreq_shape sr_ul.shape = summfreq_shape sr_base = 'Stratified rate %s%%' % (100.0 * conflev) result.add_table('sr_ll' + n_add, data=sr_ll, label=sr_base + ' lower confidence limit ' + l_add) result.add_table('sr_ul' + n_add, data=sr_ul, label=sr_base + ' upper confidence limit ' + l_add) finally: set_default_mode(r_mode) soom.info('calc_stratified_rates took %.03f' % (time.time() - st)) name = 'stratified_rates_' + summset.name label = 'Stratified Rates for ' + (summset.label or summset.name) if conflev: label += ' (%g%% conf. limits)' % (conflev * 100) if debug: global vars vars = Vars(locals()) return result.to_summset(name, label=label)
def testeq (self): "Test the eq function" assert eq(3,3) assert not eq(3,4) assert eq([3,3,3],3) assert eq([2.,3.,4.], MA.array([2.,3.,4.]))
def calc_indirectly_std_ratios(summset, popset, stdsummset, stdpopset, conflev=0.95, baseratio=100, timeinterval='years', popset_popcol='_freq_', stdpopset_popcol='_stdpop_', ci_method='daly', debug=False): """ Calculate Indirectly Standardised Population Event Ratios - summset is a summary dataset of counts of events for the population-of-interest being compared to the standard population. - popset is the stratified population counts for the population-of-interest - stdsummset is a summary dataset of counts of events for the standard population - stdpopset is the stratified population counts for the standard population """ from rpy import r, get_default_mode, set_default_mode, BASIC_CONVERSION alpha = get_alpha(conflev) if ci_method != 'daly': raise Error("Only Daly method for confidence intervals " "currently implemented") if not popset.has_column(popset_popcol): raise Error('Denominator population dataset %r does not have a ' '%r column' % (popset.label or popset.name, popset_popcol)) if not stdpopset.has_column(stdpopset_popcol): raise Error('Standard population dataset %r does not have a ' '%r column' % (stdpopset.label or stdpopset.name, stdpopset_popcol)) st = time.time() r_mode = get_default_mode() try: set_default_mode(BASIC_CONVERSION) shape = shape_union(stdsummset, summset) summtab = CrossTab.from_summset(summset, shaped_like=shape) stdsummtab = CrossTab.from_summset(stdsummset, shaped_like=shape) stdpoptab = CrossTab.from_summset(stdpopset, shaped_like=shape) stdpoptab.collapse_axes_not_in(stdsummtab) stdsummtab.replicate_axes(shape) stdpoptab.replicate_axes(shape) poptab = CrossTab.from_summset(popset, shaped_like=shape) poptab.collapse_axes_not_in(shape) if poptab.get_shape() != stdsummtab.get_shape(): raise Error( 'Observed population does not have all the required columns') popfreq = poptab[popset_popcol].data.astype(MA.Float64) result = stdsummtab.empty_copy() result.add_table('popfreq', data=popfreq, label='Total person-' + timeinterval + ' at risk') expected_cols = [] for table, name, n_add, l_add in just_freq_tables(stdsummtab): stdsummfreq = stdsummtab[name].data.astype(MA.Float64) stdpopfreq = stdpoptab[stdpopset_popcol].data.astype(MA.Float64) std_strata_rates = stdsummfreq / stdpopfreq strata_expected_freq = std_strata_rates * popfreq # print stdsummfreq[0,0,0], stdpopfreq[0,0,0], popfreq[0,0,0] result.add_table('expected' + n_add, data=strata_expected_freq, label='Expected events' + l_add) expected_cols.append('expected' + n_add) result.collapse_axes_not_in(summtab) axis = 0 baseratio = float(baseratio) for table, name, n_add, l_add in just_freq_tables(summtab): observed = table.data.astype(Numeric.Float64) result.add_table('observed' + n_add, data=observed, label='Observed events' + l_add) expected = result['expected' + n_add].data isr = observed / expected result.add_table('isr' + n_add, data=isr * baseratio, label='Indirectly Standardised Event Ratio') # Confidence Intervals if alpha is None or name != '_freq_': # Can only calculate confidence intervals on freq cols continue conflev_l = (1 - conflev) / 2.0 conflev_u = (1 + conflev) / 2.0 # get shape of observed observed_shape = observed.shape # flattened version observed_flat = MA.ravel(observed) # sanity check on shapes - should be the same! assert expected.shape == observed.shape # flattened version of expecetd expected_flat = MA.ravel(expected) # lists to hold results isr_ll = Numeric.empty(len(observed_flat), typecode=Numeric.Float64) isr_ul = Numeric.empty(len(observed_flat), typecode=Numeric.Float64) isr_ll_mask = Numeric.zeros(len(observed_flat), typecode=Numeric.Int8) isr_ul_mask = Numeric.zeros(len(observed_flat), typecode=Numeric.Int8) obs_mask = MA.getmaskarray(observed_flat) exp_mask = MA.getmaskarray(expected_flat) for i, v in enumerate(observed_flat): if obs_mask[i] or exp_mask[i]: isr_ll[i] = 0.0 isr_ul[i] = 0.0 isr_ll_mask[i] = 1 isr_ul_mask[i] = 1 else: if v == 0.: obs_ll = 0.0 obs_ul = -math.log(1 - conflev) else: obs_ll = r.qgamma(conflev_l, v, scale=1.) obs_ul = r.qgamma(conflev_u, v + 1., scale=1.) isr_ll[i] = obs_ll / expected_flat[i] isr_ul[i] = obs_ul / expected_flat[i] isr_ll = MA.array(isr_ll, typecode=MA.Float64, mask=isr_ll_mask) isr_ul = MA.array(isr_ul, typecode=MA.Float64, mask=isr_ul_mask) isr_ll.shape = observed_shape isr_ul.shape = observed_shape isr_base = 'ISR %d%%' % (100.0 * conflev) result.add_table('isr_ll' + n_add, data=isr_ll * baseratio, label=isr_base + ' lower confidence limit' + l_add) result.add_table('isr_ul' + n_add, data=isr_ul * baseratio, label=isr_base + ' upper confidence limit' + l_add) finally: set_default_mode(r_mode) soom.info('calc_indirectly_std_ratios took %.03f' % (time.time() - st)) name = 'indir_std_ratios_' + summset.name label = 'Indirectly Standardised Ratios for ' + (summset.label or summset.name) if conflev: label += ' (%g%% conf. limits)' % (conflev * 100) if debug: global vars vars = Vars(locals()) return result.to_summset(name, label=label)
def calc_directly_std_rates(summset, popset, stdpopset=None, conflev=0.95, basepop=100000, timeinterval='years', ci_method='dobson', popset_popcol='_freq_', stdpopset_popcol='_stdpop_', axis=0, debug=False): """ Calculate Directly Standardised Population Rates summset is a summary dataset of counts of events for the population-of-interest being compared to the standard population. popset is the stratified population counts for the population-of-interest stdpopset is the stratified population counts for the standard population """ from rpy import r, get_default_mode, set_default_mode, BASIC_CONVERSION alpha = get_alpha(conflev) if ci_method not in ('dobson', 'ff'): raise Error('Only Dobson et al. (dobson) and Fay-Feuer (ff) methods ' 'for confidence intervals currently implemented') if not popset.has_column(popset_popcol): raise Error('Denominator population dataset %r does not have a ' '%r column' % (popset.label or popset.name, popset_popcol)) if stdpopset is not None and not stdpopset.has_column(stdpopset_popcol): raise Error('Standard population dataset %r does not have a ' '%r column' % (stdpopset.label or stdpopset.name, stdpopset_popcol)) st = time.time() r_mode = get_default_mode() try: set_default_mode(BASIC_CONVERSION) # We turn the summset into an Ncondcols-dimensional matrix summtab = CrossTab.from_summset(summset) if stdpopset is not None: # Then attempt to do the same to the stdpop data, summing any # axes not required and replicate any missing until we have an # array the same shape as the summtab array. stdtab = CrossTab.from_summset(stdpopset, shaped_like=summtab) stdtab.collapse_axes_not_in(summtab) stdtab.replicate_axes(summtab) stdpop = stdtab[stdpopset_popcol].data.astype(Numeric.Float64) # The population dataset must have at least as many dimensions as # summary dataset. Any additional axes are eliminated by summing. # any missing axes are created by replication. poptab = CrossTab.from_summset(popset, shaped_like=summtab) poptab.collapse_axes_not_in(summtab) poptab.replicate_axes(summtab) popfreq = poptab[popset_popcol].data.astype(Numeric.Float64) # Manufacture a CrossTab for the result, with one less axis (the first) result = summtab.empty_copy() del result.axes[axis] if stdpopset is not None: sum_stdpop = sumaxis(stdpop) stdwgts = stdpop / sum_stdpop stdpop_sq = stdpop**2 sum_stdpop_sq = sum_stdpop**2 ffwi = stdwgts / popfreq ffwm = MA.maximum(MA.ravel(ffwi)) basepop = float(basepop) for table, name, n_add, l_add in just_freq_tables(summtab): # avoid integer overflows... summfreq = table.data.astype(Numeric.Float64) strata_rate = summfreq / popfreq result.add_table('summfreq' + n_add, data=sumaxis(summfreq, axis), label='Total events' + l_add) result.add_table('popfreq' + n_add, data=sumaxis(popfreq, axis), label='Total person-' + timeinterval + ' at risk' + l_add) if stdpopset is not None: std_strata_summfreq = summfreq * Numeric.where( MA.getmask(stdwgts), 0., 1.) wgtrate = strata_rate * stdwgts result.add_table('std_strata_summfreq' + n_add, data=sumaxis(std_strata_summfreq, axis), label="Total events in standard strata" + l_add) # Crude rate cr = sumaxis(summfreq, axis) / sumaxis(popfreq, axis) * basepop result.add_table('cr' + n_add, data=cr, label='Crude Rate per ' + '%d' % basepop + ' person-' + timeinterval + l_add) if alpha is not None: # CIs for crude rate count = sumaxis(summfreq, axis) count_shape = count.shape count_flat = MA.ravel(count) totpop = sumaxis(popfreq, axis) assert totpop.shape == count.shape totpop_flat = MA.ravel(totpop) cr_ll = Numeric.empty(len(count_flat), typecode=Numeric.Float64) cr_ul = Numeric.empty(len(count_flat), typecode=Numeric.Float64) cr_ll_mask = Numeric.zeros(len(count_flat), typecode=Numeric.Int8) cr_ul_mask = Numeric.zeros(len(count_flat), typecode=Numeric.Int8) for i, v in enumerate(count_flat): try: if v == 0: cr_ll[i] = 0.0 else: cr_ll[i] = ( (r.qchisq(alpha / 2., df=2.0 * v) / 2.0) / totpop_flat[i]) * basepop cr_ul[i] = ( (r.qchisq(1. - alpha / 2., df=2.0 * (v + 1)) / 2.0) / totpop_flat[i]) * basepop except: cr_ll[i] = 0.0 cr_ul[i] = 0.0 cr_ll_mask[i] = 1 cr_ul_mask[i] = 1 cr_ll = MA.array(cr_ll, mask=cr_ll_mask, typecode=MA.Float64) cr_ul = MA.array(cr_ul, mask=cr_ul_mask, typecode=MA.Float64) cr_ll.shape = count_shape cr_ul.shape = count_shape cr_base = 'Crude rate %d%%' % (100.0 * conflev) result.add_table('cr_ll' + n_add, data=cr_ll, label=cr_base + ' lower confidence limit ' + l_add) result.add_table('cr_ul' + n_add, data=cr_ul, label=cr_base + ' upper confidence limit ' + l_add) if stdpopset is not None: # Directly Standardised Rate dsr = sumaxis(wgtrate, axis) result.add_table('dsr' + n_add, data=dsr * basepop, label='Directly Standardised Rate per ' + '%d' % basepop + ' person-' + timeinterval + l_add) # Confidence Intervals if alpha is None or name != '_freq_': # Can only calculate confidence intervals on freq cols continue if ci_method == 'dobson': # Dobson et al method # see: Dobson A, Kuulasmaa K, Eberle E, Schere J. Confidence intervals for weighted sums # of Poisson parameters, Statistics in Medicine, Vol. 10, 1991, pp. 457-62. # se_wgtrate = summfreq*((stdwgts/(popfreq/basepop))**2) se_wgtrate = summfreq * ((stdwgts / (popfreq))**2) stderr = stdpop_sq * strata_rate * (1.0 - strata_rate) se_rate = sumaxis(se_wgtrate, axis) sumsei = sumaxis(stderr, axis) total_freq = sumaxis(std_strata_summfreq, axis) # get shape of total_freq total_freq_shape = total_freq.shape total_freq_flat = MA.ravel(total_freq) # flat arrays to hold results and associated masks l_lam = Numeric.empty(len(total_freq_flat), typecode=Numeric.Float64) u_lam = Numeric.empty(len(total_freq_flat), typecode=Numeric.Float64) l_lam_mask = Numeric.zeros(len(total_freq_flat), typecode=Numeric.Int8) u_lam_mask = Numeric.zeros(len(total_freq_flat), typecode=Numeric.Int8) conflev_l = (1 - conflev) / 2.0 conflev_u = (1 + conflev) / 2.0 for i, v in enumerate(total_freq_flat): try: if v == 0.: u_lam[i] = -math.log(1 - conflev) l_lam[i] = 0.0 else: l_lam[i] = r.qgamma(conflev_l, v, scale=1.) u_lam[i] = r.qgamma(conflev_u, v + 1., scale=1.) except: l_lam[i] = 0.0 u_lam[i] = 0.0 l_lam_mask[i] = 1 u_lam_mask[i] = 1 l_lam = MA.array(l_lam, mask=l_lam_mask, typecode=MA.Float64) u_lam = MA.array(u_lam, mask=u_lam_mask, typecode=MA.Float64) l_lam.shape = total_freq_shape u_lam.shape = total_freq_shape dsr_ll = dsr + (((se_rate / total_freq)**0.5) * (l_lam - total_freq)) dsr_ul = dsr + (((se_rate / total_freq)**0.5) * (u_lam - total_freq)) elif ci_method == 'ff': # Fay and Feuer method # see: Fay MP, Feuer EJ. Confidence intervals for directly standardized rates: # a method based on the gamma distribution. Statistics in Medicine 1997 Apr 15;16(7):791-801. ffvari = summfreq * ffwi**2.0 ffvar = sumaxis(ffvari, axis) dsr_flat = Numeric.ravel(MA.filled(dsr, 0)) dsr_shape = dsr.shape ffvar_flat = Numeric.ravel(MA.filled(ffvar, 0)) # flat arrays to hold results and associated masks dsr_ll = Numeric.empty(len(dsr_flat), typecode=Numeric.Float64) dsr_ul = Numeric.empty(len(dsr_flat), typecode=Numeric.Float64) dsr_ll_mask = Numeric.zeros(len(dsr_flat), typecode=Numeric.Int8) dsr_ul_mask = Numeric.zeros(len(dsr_flat), typecode=Numeric.Int8) for i, y in enumerate(dsr_flat): try: dsr_ll[i] = (ffvar_flat[i] / (2.0 * y)) * r.qchisq( alpha / 2., df=(2.0 * (y**2.) / ffvar_flat[i])) dsr_ul[i] = ((ffvar_flat[i] + (ffwm**2.0)) / (2.0 * (y + ffwm))) * r.qchisq( 1. - alpha / 2., df=((2.0 * ((y + ffwm)**2.0)) / (ffvar_flat[i] + ffwm**2.0))) except: dsr_ll[i] = 0.0 dsr_ul[i] = 0.0 dsr_ll_mask[i] = 1 dsr_ul_mask[i] = 1 dsr_ll = MA.array(dsr_ll, mask=dsr_ll_mask, typecode=MA.Float64) dsr_ul = MA.array(dsr_ul, mask=dsr_ul_mask, typecode=MA.Float64) dsr_ll.shape = dsr_shape dsr_ul.shape = dsr_shape result.add_table('dsr_ll' + n_add, data=dsr_ll * basepop, label='DSR ' + '%d' % (100.0 * conflev) + '% lower confidence limit' + l_add) result.add_table('dsr_ul' + n_add, data=dsr_ul * basepop, label='DSR ' + '%d' % (100.0 * conflev) + '% upper confidence limit' + l_add) finally: set_default_mode(r_mode) soom.info('calc_directly_std_rates took %.03f' % (time.time() - st)) if stdpopset is not None: name = 'dir_std_rates_' + summset.name label = 'Directly Standardised Rates for ' + (summset.label or summset.name) else: name = 'crude_rates_' + summset.name label = 'Crude Rates for ' + (summset.label or summset.name) if conflev: label += ' (%g%% conf. limits)' % (conflev * 100) if debug: global vars vars = Vars(locals()) return result.to_summset(name, label=label)
def fully_masked(shape, typecode='i'): return MA.array(Numeric.empty(shape, typecode=typecode), mask=Numeric.ones(shape, typecode='b', savespace=1))
# the State of New South Wales, Australia. # # Copyright (C) 2004,2005 Health Administration Corporation. # All Rights Reserved. # # $Id: matest.py 2626 2007-03-09 04:35:54Z andrewm $ # $Source: /usr/local/cvsroot/NSWDoH/SOOMv0/soomext/matest.py,v $ import MA import Numeric from soomarray import ArrayDict ad = ArrayDict('blah.dat', 'r+') a = Numeric.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], Numeric.Int) m = Numeric.array([0, 0, 0, 0, 0, 1, 0, 0, 1, 0], Numeric.Int) ad['matest1'] = MA.array(a, mask=m) del ad ad = ArrayDict('blah.dat') matest = ad['matest1'] print "matest: ", matest print "sum of matest: ", MA.sum(matest) print "length of matest: ", len(matest) print "count of matest: ", MA.count(matest) print "average of matest: ", MA.average(matest) print "minimum of matest: ", MA.minimum(matest) print "maximum of matest: ", MA.maximum(matest) del ad
def setUp (self): self.a = .01 + MA.arange(6) / 8.0 self.m = MA.array([[1,2,3],[11,12,13]]) / 16.0
import cdms, time, MA f = cdms.open('test0.nc', 'a') t = f.variables['air_temperature'] print time.time() x = t.getValue() for i in range(t.shape[0]): for j in range(t.shape[1]): x[i, j, :] += 2. t[:, :, :] = x print time.time() for i in range(t.shape[0]): for j in range(t.shape[1]): t[i, j, :] = MA.array(t[i, j, :] + 2., 'f') print time.time() f.close()
def __init__(self) : self.car=Ma.array([-1.,-1.,-1.,-1.,-1.,-1.],mask=[1,1,1,1,1,1],fill_value=-1e3) self.status="indefini" self.header=["H","na","T","HR","Tr","Th"]
def setUp (self): self.a = MA.arange(6) self.m = MA.array([[1,2,3],[11,12,13]])
# How to use numpy with 'None' value in Python? import MA a = MA.array([1, 2, None], mask = [0, 0, 1]) print "average =", MA.average(a)