def to_R(self, ds, dates_as_factor, suppress_all_value=True, **kwargs): if not self.colname: return col = ds[self.colname] if col.is_datetimetype(): rdata = rconv.Missing_Date_to_R(col) if not dates_as_factor: return rdata elif MA.isMaskedArray(col.data): if self.missing_as_none: rdata = MA.filled(col.data, sys.maxint) else: if Numeric.alltrue(col.data.mask()): raise PlotError('%r: all data-points masked' % col.name) rdata = rconv.MA_to_R(col.data) else: rdata = col.data if col.is_discrete(): if 0 and suppress_all_value: row_vecs = [ vec for value, vec in col.inverted.items() if value != col.all_value ] row_vec = union(*row_vecs) rdata = MA.take(rdata, row_vec) return self.discrete_col_to_R(col, rdata, suppress_all_value) else: return self.continuous_col_to_R(col, rdata)
def rgrd(self, dataIn=None): """ -------------------------------------------------------------------------------------------------------- ROUTINE: rgrd PURPOSE: To perform one of the following two possible computations in 3-space: To interpolate random data in 3-space using a modified Shepard's algorithm To find the nearest neighbor to a user specified point USAGE: To interpolate use dataOut = r.rgrd(dataIn) where r -- an instance of Shgrid dataIn -- input data dataOut -- output data To locate the nearest point use np = r.rgrd(numberPoints) where r -- an instance of Shgrid np -- the array with numberPoints indices into the input grid idenifying the nearest points --------------------------------------------------------------------------------------------------------""" if self.callGridded == 'yes': # Interpolation in 3-space if usefilled == 'yes': dataIn = MA.filled(dataIn) if debug == 1: print 'calling shgrid' iwk = numpy.zeros((2 * self.ni, ), 'i') rwk = numpy.zeros((11 * self.ni + 6, ), 'f') dataOut, ier = shgridmodule.shgrid(self.xi, self.yi, self.zi, dataIn, self.xo, self.yo, self.zo, iwk, rwk) dataOut = numpy.transpose(dataOut) if debug == 1: print '***************** returning from shgrid with ier = ', ier if ier != 0: msg = 'Error in return from shgrid call with -- ' + Shgrid.errorTable( self)[ier] raise ValueError, msg # is a reverse the order in the returned arrays necessary? if (self.xreverse == 'yes') or (self.yreverse == 'yes') or (self.zreverse == 'yes'): needReverse = 'yes' else: needReverse = 'no' if needReverse == 'yes': dataOut = Shgrid.reverseData(self, dataOut) return dataOut #--------------------------------------------------------------------------------- else: # Computation of nearest neighbors if debug == 1: print 'calling shgetnp' numberPoints = dataIn np = numpy.zeros((numberPoints, ), numpy.int32) n = self.ni iwk = numpy.zeros((2 * n, ), numpy.int32) rwk = numpy.zeros((11 * n + 6, ), numpy.float32) iflag = 0 np[0], ier = shgridmodule.shgetnp(self.xo, self.yo, self.zo, self.xi, self.yi, self.zi, iflag, iwk, rwk) if debug == 1: print '***************** returning from shgrid with ier = ', ier if ier != 0: msg = 'Error in return from shgetnp call with -- ' + Shgrid.errorTable( self)[ier] raise ValueError, msg iflag = 1 for i in range(1, numberPoints): np[i], ier = shgridmodule.shgetnp(self.xo, self.yo, self.zo, self.xi, self.yi, self.zi, iflag, iwk, rwk) if debug == 1: print '***************** returning from shgrid with ier = ', ier if ier != 0: msg = 'Error in return from shgetnp call with -- ' + Shgrid.errorTable( self)[ier] raise ValueError, msg return np
nhlat = fice.shape[1] nhlon = fice.shape[2] nmo = 0 month = nmo+1 icemon = MA.zeros((nhlat,nhlon),MA.Float0) for i in xrange(fice_masked.shape[0]): for j in xrange(fice_masked.shape[1]): icemon[i,j] = MA.average(fice_masked[i,j,0:ntime:12]) # # Fill the places where icemon is zero with the fill value. # icemon = MA.masked_values(icemon,0.,rtol=0.,atol=1.e-15) icemon = MA.filled(icemon,value=fill_value) # Calculate the January (nmo=0) average. nsub = 16 # Subscript location of northernmost hlat to be plotted. cmap = Numeric.array([ \ [1.00,1.00,1.00], [0.00,0.00,0.00], [1.00,1.00,0.50], \ [0.00,0.00,0.50], [0.50,1.00,1.00], [0.50,0.00,0.00], \ [1.00,0.00,1.00], [0.00,1.00,1.00], [1.00,1.00,0.00], \ [0.00,0.00,1.00], [0.00,1.00,0.00], [1.00,0.00,0.00], \ [0.50,0.00,1.00], [1.00,0.50,0.00], [0.00,0.50,1.00], \ [0.50,1.00,0.00], [0.50,0.00,0.50], [0.50,1.00,0.50], \ [1.00,0.50,1.00], [0.00,0.50,0.00], [0.50,0.50,1.00], \ [1.00,0.00,0.50], [0.50,0.50,0.00], [0.00,0.50,0.50], \
def rgrd(self, dataIn = None): """ -------------------------------------------------------------------------------------------------------- ROUTINE: rgrd PURPOSE: To perform one of the following two possible computations in 3-space: To interpolate random data in 3-space using a modified Shepard's algorithm To find the nearest neighbor to a user specified point USAGE: To interpolate use dataOut = r.rgrd(dataIn) where r -- an instance of Shgrid dataIn -- input data dataOut -- output data To locate the nearest point use np = r.rgrd(numberPoints) where r -- an instance of Shgrid np -- the array with numberPoints indices into the input grid idenifying the nearest points --------------------------------------------------------------------------------------------------------""" if self.callGridded == 'yes': # Interpolation in 3-space if usefilled == 'yes': dataIn = MA.filled(dataIn) if debug == 1: print 'calling shgrid' iwk = numpy.zeros((2*self.ni,),'i') rwk = numpy.zeros((11*self.ni+6,),'f') dataOut, ier = shgridmodule.shgrid(self.xi, self.yi, self.zi, dataIn, self.xo, self.yo, self.zo, iwk,rwk) dataOut = numpy.transpose(dataOut) if debug == 1: print '***************** returning from shgrid with ier = ', ier if ier != 0: msg = 'Error in return from shgrid call with -- ' + Shgrid.errorTable(self)[ier] raise ValueError, msg # is a reverse the order in the returned arrays necessary? if (self.xreverse == 'yes') or (self.yreverse == 'yes') or (self.zreverse == 'yes'): needReverse = 'yes' else: needReverse = 'no' if needReverse == 'yes': dataOut = Shgrid.reverseData(self, dataOut) return dataOut #--------------------------------------------------------------------------------- else: # Computation of nearest neighbors if debug == 1: print 'calling shgetnp' numberPoints = dataIn np = numpy.zeros((numberPoints,),numpy.int32) n = self.ni iwk = numpy.zeros((2*n,),numpy.int32) rwk = numpy.zeros((11*n + 6,), numpy.float32) iflag = 0 np[0], ier = shgridmodule.shgetnp(self.xo, self.yo, self.zo, self.xi, self.yi, self.zi, iflag, iwk, rwk) if debug == 1: print '***************** returning from shgrid with ier = ', ier if ier != 0: msg = 'Error in return from shgetnp call with -- ' + Shgrid.errorTable(self)[ier] raise ValueError, msg iflag = 1 for i in range(1, numberPoints): np[i], ier = shgridmodule.shgetnp(self.xo, self.yo, self.zo, self.xi, self.yi, self.zi, iflag, iwk, rwk) if debug == 1: print '***************** returning from shgrid with ier = ', ier if ier != 0: msg = 'Error in return from shgetnp call with -- ' + Shgrid.errorTable(self)[ier] raise ValueError, msg return np
def calc_directly_std_rates(summset, popset, stdpopset=None, conflev=0.95, basepop=100000, timeinterval='years', ci_method='dobson', popset_popcol='_freq_', stdpopset_popcol='_stdpop_', axis=0, debug=False): """ Calculate Directly Standardised Population Rates summset is a summary dataset of counts of events for the population-of-interest being compared to the standard population. popset is the stratified population counts for the population-of-interest stdpopset is the stratified population counts for the standard population """ from rpy import r, get_default_mode, set_default_mode, BASIC_CONVERSION alpha = get_alpha(conflev) if ci_method not in ('dobson', 'ff'): raise Error('Only Dobson et al. (dobson) and Fay-Feuer (ff) methods ' 'for confidence intervals currently implemented') if not popset.has_column(popset_popcol): raise Error('Denominator population dataset %r does not have a ' '%r column' % (popset.label or popset.name, popset_popcol)) if stdpopset is not None and not stdpopset.has_column(stdpopset_popcol): raise Error('Standard population dataset %r does not have a ' '%r column' % (stdpopset.label or stdpopset.name, stdpopset_popcol)) st = time.time() r_mode = get_default_mode() try: set_default_mode(BASIC_CONVERSION) # We turn the summset into an Ncondcols-dimensional matrix summtab = CrossTab.from_summset(summset) if stdpopset is not None: # Then attempt to do the same to the stdpop data, summing any # axes not required and replicate any missing until we have an # array the same shape as the summtab array. stdtab = CrossTab.from_summset(stdpopset, shaped_like=summtab) stdtab.collapse_axes_not_in(summtab) stdtab.replicate_axes(summtab) stdpop = stdtab[stdpopset_popcol].data.astype(Numeric.Float64) # The population dataset must have at least as many dimensions as # summary dataset. Any additional axes are eliminated by summing. # any missing axes are created by replication. poptab = CrossTab.from_summset(popset, shaped_like=summtab) poptab.collapse_axes_not_in(summtab) poptab.replicate_axes(summtab) popfreq = poptab[popset_popcol].data.astype(Numeric.Float64) # Manufacture a CrossTab for the result, with one less axis (the first) result = summtab.empty_copy() del result.axes[axis] if stdpopset is not None: sum_stdpop = sumaxis(stdpop) stdwgts = stdpop / sum_stdpop stdpop_sq = stdpop**2 sum_stdpop_sq = sum_stdpop**2 ffwi = stdwgts / popfreq ffwm = MA.maximum(MA.ravel(ffwi)) basepop = float(basepop) for table, name, n_add, l_add in just_freq_tables(summtab): # avoid integer overflows... summfreq = table.data.astype(Numeric.Float64) strata_rate = summfreq / popfreq result.add_table('summfreq' + n_add, data=sumaxis(summfreq, axis), label='Total events' + l_add) result.add_table('popfreq' + n_add, data=sumaxis(popfreq, axis), label='Total person-' + timeinterval + ' at risk' + l_add) if stdpopset is not None: std_strata_summfreq = summfreq * Numeric.where( MA.getmask(stdwgts), 0., 1.) wgtrate = strata_rate * stdwgts result.add_table('std_strata_summfreq' + n_add, data=sumaxis(std_strata_summfreq, axis), label="Total events in standard strata" + l_add) # Crude rate cr = sumaxis(summfreq, axis) / sumaxis(popfreq, axis) * basepop result.add_table('cr' + n_add, data=cr, label='Crude Rate per ' + '%d' % basepop + ' person-' + timeinterval + l_add) if alpha is not None: # CIs for crude rate count = sumaxis(summfreq, axis) count_shape = count.shape count_flat = MA.ravel(count) totpop = sumaxis(popfreq, axis) assert totpop.shape == count.shape totpop_flat = MA.ravel(totpop) cr_ll = Numeric.empty(len(count_flat), typecode=Numeric.Float64) cr_ul = Numeric.empty(len(count_flat), typecode=Numeric.Float64) cr_ll_mask = Numeric.zeros(len(count_flat), typecode=Numeric.Int8) cr_ul_mask = Numeric.zeros(len(count_flat), typecode=Numeric.Int8) for i, v in enumerate(count_flat): try: if v == 0: cr_ll[i] = 0.0 else: cr_ll[i] = ( (r.qchisq(alpha / 2., df=2.0 * v) / 2.0) / totpop_flat[i]) * basepop cr_ul[i] = ( (r.qchisq(1. - alpha / 2., df=2.0 * (v + 1)) / 2.0) / totpop_flat[i]) * basepop except: cr_ll[i] = 0.0 cr_ul[i] = 0.0 cr_ll_mask[i] = 1 cr_ul_mask[i] = 1 cr_ll = MA.array(cr_ll, mask=cr_ll_mask, typecode=MA.Float64) cr_ul = MA.array(cr_ul, mask=cr_ul_mask, typecode=MA.Float64) cr_ll.shape = count_shape cr_ul.shape = count_shape cr_base = 'Crude rate %d%%' % (100.0 * conflev) result.add_table('cr_ll' + n_add, data=cr_ll, label=cr_base + ' lower confidence limit ' + l_add) result.add_table('cr_ul' + n_add, data=cr_ul, label=cr_base + ' upper confidence limit ' + l_add) if stdpopset is not None: # Directly Standardised Rate dsr = sumaxis(wgtrate, axis) result.add_table('dsr' + n_add, data=dsr * basepop, label='Directly Standardised Rate per ' + '%d' % basepop + ' person-' + timeinterval + l_add) # Confidence Intervals if alpha is None or name != '_freq_': # Can only calculate confidence intervals on freq cols continue if ci_method == 'dobson': # Dobson et al method # see: Dobson A, Kuulasmaa K, Eberle E, Schere J. Confidence intervals for weighted sums # of Poisson parameters, Statistics in Medicine, Vol. 10, 1991, pp. 457-62. # se_wgtrate = summfreq*((stdwgts/(popfreq/basepop))**2) se_wgtrate = summfreq * ((stdwgts / (popfreq))**2) stderr = stdpop_sq * strata_rate * (1.0 - strata_rate) se_rate = sumaxis(se_wgtrate, axis) sumsei = sumaxis(stderr, axis) total_freq = sumaxis(std_strata_summfreq, axis) # get shape of total_freq total_freq_shape = total_freq.shape total_freq_flat = MA.ravel(total_freq) # flat arrays to hold results and associated masks l_lam = Numeric.empty(len(total_freq_flat), typecode=Numeric.Float64) u_lam = Numeric.empty(len(total_freq_flat), typecode=Numeric.Float64) l_lam_mask = Numeric.zeros(len(total_freq_flat), typecode=Numeric.Int8) u_lam_mask = Numeric.zeros(len(total_freq_flat), typecode=Numeric.Int8) conflev_l = (1 - conflev) / 2.0 conflev_u = (1 + conflev) / 2.0 for i, v in enumerate(total_freq_flat): try: if v == 0.: u_lam[i] = -math.log(1 - conflev) l_lam[i] = 0.0 else: l_lam[i] = r.qgamma(conflev_l, v, scale=1.) u_lam[i] = r.qgamma(conflev_u, v + 1., scale=1.) except: l_lam[i] = 0.0 u_lam[i] = 0.0 l_lam_mask[i] = 1 u_lam_mask[i] = 1 l_lam = MA.array(l_lam, mask=l_lam_mask, typecode=MA.Float64) u_lam = MA.array(u_lam, mask=u_lam_mask, typecode=MA.Float64) l_lam.shape = total_freq_shape u_lam.shape = total_freq_shape dsr_ll = dsr + (((se_rate / total_freq)**0.5) * (l_lam - total_freq)) dsr_ul = dsr + (((se_rate / total_freq)**0.5) * (u_lam - total_freq)) elif ci_method == 'ff': # Fay and Feuer method # see: Fay MP, Feuer EJ. Confidence intervals for directly standardized rates: # a method based on the gamma distribution. Statistics in Medicine 1997 Apr 15;16(7):791-801. ffvari = summfreq * ffwi**2.0 ffvar = sumaxis(ffvari, axis) dsr_flat = Numeric.ravel(MA.filled(dsr, 0)) dsr_shape = dsr.shape ffvar_flat = Numeric.ravel(MA.filled(ffvar, 0)) # flat arrays to hold results and associated masks dsr_ll = Numeric.empty(len(dsr_flat), typecode=Numeric.Float64) dsr_ul = Numeric.empty(len(dsr_flat), typecode=Numeric.Float64) dsr_ll_mask = Numeric.zeros(len(dsr_flat), typecode=Numeric.Int8) dsr_ul_mask = Numeric.zeros(len(dsr_flat), typecode=Numeric.Int8) for i, y in enumerate(dsr_flat): try: dsr_ll[i] = (ffvar_flat[i] / (2.0 * y)) * r.qchisq( alpha / 2., df=(2.0 * (y**2.) / ffvar_flat[i])) dsr_ul[i] = ((ffvar_flat[i] + (ffwm**2.0)) / (2.0 * (y + ffwm))) * r.qchisq( 1. - alpha / 2., df=((2.0 * ((y + ffwm)**2.0)) / (ffvar_flat[i] + ffwm**2.0))) except: dsr_ll[i] = 0.0 dsr_ul[i] = 0.0 dsr_ll_mask[i] = 1 dsr_ul_mask[i] = 1 dsr_ll = MA.array(dsr_ll, mask=dsr_ll_mask, typecode=MA.Float64) dsr_ul = MA.array(dsr_ul, mask=dsr_ul_mask, typecode=MA.Float64) dsr_ll.shape = dsr_shape dsr_ul.shape = dsr_shape result.add_table('dsr_ll' + n_add, data=dsr_ll * basepop, label='DSR ' + '%d' % (100.0 * conflev) + '% lower confidence limit' + l_add) result.add_table('dsr_ul' + n_add, data=dsr_ul * basepop, label='DSR ' + '%d' % (100.0 * conflev) + '% upper confidence limit' + l_add) finally: set_default_mode(r_mode) soom.info('calc_directly_std_rates took %.03f' % (time.time() - st)) if stdpopset is not None: name = 'dir_std_rates_' + summset.name label = 'Directly Standardised Rates for ' + (summset.label or summset.name) else: name = 'crude_rates_' + summset.name label = 'Crude Rates for ' + (summset.label or summset.name) if conflev: label += ' (%g%% conf. limits)' % (conflev * 100) if debug: global vars vars = Vars(locals()) return result.to_summset(name, label=label)