Example #1
0
 def to_R(self, ds, dates_as_factor, suppress_all_value=True, **kwargs):
     if not self.colname:
         return
     col = ds[self.colname]
     if col.is_datetimetype():
         rdata = rconv.Missing_Date_to_R(col)
         if not dates_as_factor:
             return rdata
     elif MA.isMaskedArray(col.data):
         if self.missing_as_none:
             rdata = MA.filled(col.data, sys.maxint)
         else:
             if Numeric.alltrue(col.data.mask()):
                 raise PlotError('%r: all data-points masked' % col.name)
             rdata = rconv.MA_to_R(col.data)
     else:
         rdata = col.data
     if col.is_discrete():
         if 0 and suppress_all_value:
             row_vecs = [
                 vec for value, vec in col.inverted.items()
                 if value != col.all_value
             ]
             row_vec = union(*row_vecs)
             rdata = MA.take(rdata, row_vec)
         return self.discrete_col_to_R(col, rdata, suppress_all_value)
     else:
         return self.continuous_col_to_R(col, rdata)
Example #2
0
    def rgrd(self, dataIn=None):
        """    --------------------------------------------------------------------------------------------------------
         
             ROUTINE: rgrd 
         
             PURPOSE:  To perform one of the following two possible computations in 3-space: 
                           To interpolate random data in 3-space using a modified Shepard's algorithm 
                           To find the nearest neighbor to a user specified point 
         
             USAGE:    To interpolate use
         
                           dataOut = r.rgrd(dataIn) where
                               
                               r       -- an instance of Shgrid
                               dataIn  -- input data 
                               dataOut -- output data 
         
                       To locate the nearest point use
         
                           np = r.rgrd(numberPoints) where
                               
                               r       -- an instance of Shgrid
                               np      -- the array with numberPoints indices into the input grid idenifying the nearest points
    --------------------------------------------------------------------------------------------------------"""

        if self.callGridded == 'yes':  # Interpolation in 3-space
            if usefilled == 'yes':
                dataIn = MA.filled(dataIn)

            if debug == 1:
                print 'calling shgrid'

            iwk = numpy.zeros((2 * self.ni, ), 'i')
            rwk = numpy.zeros((11 * self.ni + 6, ), 'f')
            dataOut, ier = shgridmodule.shgrid(self.xi, self.yi, self.zi,
                                               dataIn, self.xo, self.yo,
                                               self.zo, iwk, rwk)
            dataOut = numpy.transpose(dataOut)

            if debug == 1:
                print '*****************   returning from shgrid with  ier = ', ier

            if ier != 0:
                msg = 'Error in return from shgrid call with -- ' + Shgrid.errorTable(
                    self)[ier]
                raise ValueError, msg

            # is a reverse the order in the returned arrays necessary?

            if (self.xreverse == 'yes') or (self.yreverse
                                            == 'yes') or (self.zreverse
                                                          == 'yes'):
                needReverse = 'yes'
            else:
                needReverse = 'no'
            if needReverse == 'yes':
                dataOut = Shgrid.reverseData(self, dataOut)

            return dataOut

    #---------------------------------------------------------------------------------

        else:  # Computation of nearest neighbors
            if debug == 1:
                print 'calling shgetnp'

            numberPoints = dataIn
            np = numpy.zeros((numberPoints, ), numpy.int32)

            n = self.ni
            iwk = numpy.zeros((2 * n, ), numpy.int32)
            rwk = numpy.zeros((11 * n + 6, ), numpy.float32)

            iflag = 0
            np[0], ier = shgridmodule.shgetnp(self.xo, self.yo, self.zo,
                                              self.xi, self.yi, self.zi, iflag,
                                              iwk, rwk)

            if debug == 1:
                print '*****************   returning from shgrid with  ier = ', ier
            if ier != 0:
                msg = 'Error in return from shgetnp call with -- ' + Shgrid.errorTable(
                    self)[ier]
                raise ValueError, msg

            iflag = 1
            for i in range(1, numberPoints):
                np[i], ier = shgridmodule.shgetnp(self.xo, self.yo, self.zo,
                                                  self.xi, self.yi, self.zi,
                                                  iflag, iwk, rwk)

                if debug == 1:
                    print '*****************   returning from shgrid with  ier = ', ier
                if ier != 0:
                    msg = 'Error in return from shgetnp call with -- ' + Shgrid.errorTable(
                        self)[ier]
                    raise ValueError, msg
            return np
Example #3
0
File: ngl09p.py Project: akrherz/me
nhlat    = fice.shape[1]
nhlon    = fice.shape[2]

nmo    = 0
month  = nmo+1

icemon = MA.zeros((nhlat,nhlon),MA.Float0)
for i in xrange(fice_masked.shape[0]):
  for j in xrange(fice_masked.shape[1]):
    icemon[i,j] = MA.average(fice_masked[i,j,0:ntime:12])

#
#  Fill the places where icemon is zero with the fill value.
#
icemon = MA.masked_values(icemon,0.,rtol=0.,atol=1.e-15)
icemon = MA.filled(icemon,value=fill_value)

                       # Calculate the January (nmo=0) average.


nsub = 16 # Subscript location of northernmost hlat to be plotted.

cmap = Numeric.array([                                         \
         [1.00,1.00,1.00], [0.00,0.00,0.00], [1.00,1.00,0.50], \
         [0.00,0.00,0.50], [0.50,1.00,1.00], [0.50,0.00,0.00], \
         [1.00,0.00,1.00], [0.00,1.00,1.00], [1.00,1.00,0.00], \
         [0.00,0.00,1.00], [0.00,1.00,0.00], [1.00,0.00,0.00], \
         [0.50,0.00,1.00], [1.00,0.50,0.00], [0.00,0.50,1.00], \
         [0.50,1.00,0.00], [0.50,0.00,0.50], [0.50,1.00,0.50], \
         [1.00,0.50,1.00], [0.00,0.50,0.00], [0.50,0.50,1.00], \
         [1.00,0.00,0.50], [0.50,0.50,0.00], [0.00,0.50,0.50], \
Example #4
0
    def rgrd(self, dataIn = None):

        """    --------------------------------------------------------------------------------------------------------
         
             ROUTINE: rgrd 
         
             PURPOSE:  To perform one of the following two possible computations in 3-space: 
                           To interpolate random data in 3-space using a modified Shepard's algorithm 
                           To find the nearest neighbor to a user specified point 
         
             USAGE:    To interpolate use
         
                           dataOut = r.rgrd(dataIn) where
                               
                               r       -- an instance of Shgrid
                               dataIn  -- input data 
                               dataOut -- output data 
         
                       To locate the nearest point use
         
                           np = r.rgrd(numberPoints) where
                               
                               r       -- an instance of Shgrid
                               np      -- the array with numberPoints indices into the input grid idenifying the nearest points
    --------------------------------------------------------------------------------------------------------""" 

        if self.callGridded == 'yes':                           # Interpolation in 3-space
            if usefilled == 'yes':
                dataIn = MA.filled(dataIn)

            if debug == 1:
                print 'calling shgrid' 

            iwk = numpy.zeros((2*self.ni,),'i')
            rwk = numpy.zeros((11*self.ni+6,),'f')
            dataOut, ier = shgridmodule.shgrid(self.xi, self.yi, self.zi,
                                               dataIn,
                                               self.xo, self.yo, self.zo,
                                               iwk,rwk)
            dataOut = numpy.transpose(dataOut)

            if debug == 1:
                print '*****************   returning from shgrid with  ier = ', ier

            if ier != 0:
                msg = 'Error in return from shgrid call with -- ' + Shgrid.errorTable(self)[ier]
                raise ValueError, msg

            # is a reverse the order in the returned arrays necessary?

            if (self.xreverse == 'yes') or (self.yreverse == 'yes') or (self.zreverse == 'yes'):
                needReverse = 'yes'
            else:
                needReverse = 'no'
            if needReverse == 'yes':
                dataOut = Shgrid.reverseData(self, dataOut)

            return dataOut 

    #---------------------------------------------------------------------------------

        else:                                                       # Computation of nearest neighbors
            if debug == 1:
                print 'calling shgetnp' 

            numberPoints = dataIn
            np = numpy.zeros((numberPoints,),numpy.int32)

            n = self.ni
            iwk = numpy.zeros((2*n,),numpy.int32)
            rwk = numpy.zeros((11*n + 6,), numpy.float32)

            iflag = 0
            np[0], ier = shgridmodule.shgetnp(self.xo, self.yo, self.zo,
                                              self.xi, self.yi, self.zi,
                                              iflag,
                                              iwk, rwk)

            if debug == 1:
                print '*****************   returning from shgrid with  ier = ', ier
            if ier != 0:
                msg = 'Error in return from shgetnp call with -- ' + Shgrid.errorTable(self)[ier]
                raise ValueError, msg

            iflag = 1
            for i in range(1, numberPoints):
                np[i], ier = shgridmodule.shgetnp(self.xo, self.yo, self.zo,
                                                  self.xi, self.yi, self.zi,
                                                  iflag,
                                                  iwk, rwk)

                if debug == 1:
                    print '*****************   returning from shgrid with  ier = ', ier
                if ier != 0:
                    msg = 'Error in return from shgetnp call with -- ' + Shgrid.errorTable(self)[ier]
                    raise ValueError, msg
            return np
Example #5
0
def calc_directly_std_rates(summset,
                            popset,
                            stdpopset=None,
                            conflev=0.95,
                            basepop=100000,
                            timeinterval='years',
                            ci_method='dobson',
                            popset_popcol='_freq_',
                            stdpopset_popcol='_stdpop_',
                            axis=0,
                            debug=False):
    """
    Calculate Directly Standardised Population Rates

    summset     is a summary dataset of counts of events for the
                population-of-interest being compared to the standard
                population.  
    popset      is the stratified population counts for the
                population-of-interest
    stdpopset   is the stratified population counts for the standard
                population
    """
    from rpy import r, get_default_mode, set_default_mode, BASIC_CONVERSION

    alpha = get_alpha(conflev)

    if ci_method not in ('dobson', 'ff'):
        raise Error('Only Dobson et al. (dobson) and Fay-Feuer (ff) methods '
                    'for confidence intervals currently implemented')
    if not popset.has_column(popset_popcol):
        raise Error('Denominator population dataset %r does not have a '
                    '%r column' % (popset.label or popset.name, popset_popcol))
    if stdpopset is not None and not stdpopset.has_column(stdpopset_popcol):
        raise Error('Standard population dataset %r does not have a '
                    '%r column' %
                    (stdpopset.label or stdpopset.name, stdpopset_popcol))

    st = time.time()
    r_mode = get_default_mode()
    try:
        set_default_mode(BASIC_CONVERSION)

        # We turn the summset into an Ncondcols-dimensional matrix
        summtab = CrossTab.from_summset(summset)

        if stdpopset is not None:
            # Then attempt to do the same to the stdpop data, summing any
            # axes not required and replicate any missing until we have an
            # array the same shape as the summtab array.
            stdtab = CrossTab.from_summset(stdpopset, shaped_like=summtab)
            stdtab.collapse_axes_not_in(summtab)
            stdtab.replicate_axes(summtab)
            stdpop = stdtab[stdpopset_popcol].data.astype(Numeric.Float64)

        # The population dataset must have at least as many dimensions as
        # summary dataset. Any additional axes are eliminated by summing.
        # any missing axes are created by replication.
        poptab = CrossTab.from_summset(popset, shaped_like=summtab)
        poptab.collapse_axes_not_in(summtab)
        poptab.replicate_axes(summtab)
        popfreq = poptab[popset_popcol].data.astype(Numeric.Float64)

        # Manufacture a CrossTab for the result, with one less axis (the first)
        result = summtab.empty_copy()
        del result.axes[axis]

        if stdpopset is not None:
            sum_stdpop = sumaxis(stdpop)
            stdwgts = stdpop / sum_stdpop
            stdpop_sq = stdpop**2
            sum_stdpop_sq = sum_stdpop**2
            ffwi = stdwgts / popfreq
            ffwm = MA.maximum(MA.ravel(ffwi))

        basepop = float(basepop)

        for table, name, n_add, l_add in just_freq_tables(summtab):

            # avoid integer overflows...
            summfreq = table.data.astype(Numeric.Float64)
            strata_rate = summfreq / popfreq

            result.add_table('summfreq' + n_add,
                             data=sumaxis(summfreq, axis),
                             label='Total events' + l_add)
            result.add_table('popfreq' + n_add,
                             data=sumaxis(popfreq, axis),
                             label='Total person-' + timeinterval +
                             ' at risk' + l_add)

            if stdpopset is not None:
                std_strata_summfreq = summfreq * Numeric.where(
                    MA.getmask(stdwgts), 0., 1.)
                wgtrate = strata_rate * stdwgts
                result.add_table('std_strata_summfreq' + n_add,
                                 data=sumaxis(std_strata_summfreq, axis),
                                 label="Total events in standard strata" +
                                 l_add)

            # Crude rate
            cr = sumaxis(summfreq, axis) / sumaxis(popfreq, axis) * basepop
            result.add_table('cr' + n_add,
                             data=cr,
                             label='Crude Rate per ' + '%d' % basepop +
                             ' person-' + timeinterval + l_add)

            if alpha is not None:
                # CIs for crude rate
                count = sumaxis(summfreq, axis)
                count_shape = count.shape
                count_flat = MA.ravel(count)
                totpop = sumaxis(popfreq, axis)
                assert totpop.shape == count.shape
                totpop_flat = MA.ravel(totpop)

                cr_ll = Numeric.empty(len(count_flat),
                                      typecode=Numeric.Float64)
                cr_ul = Numeric.empty(len(count_flat),
                                      typecode=Numeric.Float64)
                cr_ll_mask = Numeric.zeros(len(count_flat),
                                           typecode=Numeric.Int8)
                cr_ul_mask = Numeric.zeros(len(count_flat),
                                           typecode=Numeric.Int8)

                for i, v in enumerate(count_flat):
                    try:
                        if v == 0:
                            cr_ll[i] = 0.0
                        else:
                            cr_ll[i] = (
                                (r.qchisq(alpha / 2., df=2.0 * v) / 2.0) /
                                totpop_flat[i]) * basepop
                        cr_ul[i] = (
                            (r.qchisq(1. - alpha / 2., df=2.0 *
                                      (v + 1)) / 2.0) /
                            totpop_flat[i]) * basepop
                    except:
                        cr_ll[i] = 0.0
                        cr_ul[i] = 0.0
                        cr_ll_mask[i] = 1
                        cr_ul_mask[i] = 1

                cr_ll = MA.array(cr_ll, mask=cr_ll_mask, typecode=MA.Float64)
                cr_ul = MA.array(cr_ul, mask=cr_ul_mask, typecode=MA.Float64)
                cr_ll.shape = count_shape
                cr_ul.shape = count_shape

                cr_base = 'Crude rate %d%%' % (100.0 * conflev)
                result.add_table('cr_ll' + n_add,
                                 data=cr_ll,
                                 label=cr_base + ' lower confidence limit ' +
                                 l_add)
                result.add_table('cr_ul' + n_add,
                                 data=cr_ul,
                                 label=cr_base + ' upper confidence limit ' +
                                 l_add)

            if stdpopset is not None:

                # Directly Standardised Rate
                dsr = sumaxis(wgtrate, axis)
                result.add_table('dsr' + n_add,
                                 data=dsr * basepop,
                                 label='Directly Standardised Rate per ' +
                                 '%d' % basepop + ' person-' + timeinterval +
                                 l_add)

                # Confidence Intervals
                if alpha is None or name != '_freq_':
                    # Can only calculate confidence intervals on freq cols
                    continue

                if ci_method == 'dobson':
                    # Dobson et al method
                    # see: Dobson A, Kuulasmaa K, Eberle E, Schere J. Confidence intervals for weighted sums
                    # of Poisson parameters, Statistics in Medicine, Vol. 10, 1991, pp. 457-62.
                    # se_wgtrate = summfreq*((stdwgts/(popfreq/basepop))**2)
                    se_wgtrate = summfreq * ((stdwgts / (popfreq))**2)
                    stderr = stdpop_sq * strata_rate * (1.0 - strata_rate)
                    se_rate = sumaxis(se_wgtrate, axis)
                    sumsei = sumaxis(stderr, axis)
                    total_freq = sumaxis(std_strata_summfreq, axis)
                    # get shape of total_freq
                    total_freq_shape = total_freq.shape

                    total_freq_flat = MA.ravel(total_freq)

                    # flat arrays to hold results and associated masks
                    l_lam = Numeric.empty(len(total_freq_flat),
                                          typecode=Numeric.Float64)
                    u_lam = Numeric.empty(len(total_freq_flat),
                                          typecode=Numeric.Float64)
                    l_lam_mask = Numeric.zeros(len(total_freq_flat),
                                               typecode=Numeric.Int8)
                    u_lam_mask = Numeric.zeros(len(total_freq_flat),
                                               typecode=Numeric.Int8)

                    conflev_l = (1 - conflev) / 2.0
                    conflev_u = (1 + conflev) / 2.0

                    for i, v in enumerate(total_freq_flat):
                        try:
                            if v == 0.:
                                u_lam[i] = -math.log(1 - conflev)
                                l_lam[i] = 0.0
                            else:
                                l_lam[i] = r.qgamma(conflev_l, v, scale=1.)
                                u_lam[i] = r.qgamma(conflev_u,
                                                    v + 1.,
                                                    scale=1.)
                        except:
                            l_lam[i] = 0.0
                            u_lam[i] = 0.0
                            l_lam_mask[i] = 1
                            u_lam_mask[i] = 1

                    l_lam = MA.array(l_lam,
                                     mask=l_lam_mask,
                                     typecode=MA.Float64)
                    u_lam = MA.array(u_lam,
                                     mask=u_lam_mask,
                                     typecode=MA.Float64)
                    l_lam.shape = total_freq_shape
                    u_lam.shape = total_freq_shape
                    dsr_ll = dsr + (((se_rate / total_freq)**0.5) *
                                    (l_lam - total_freq))
                    dsr_ul = dsr + (((se_rate / total_freq)**0.5) *
                                    (u_lam - total_freq))

                elif ci_method == 'ff':
                    # Fay and Feuer method
                    # see: Fay MP, Feuer EJ. Confidence intervals for directly standardized rates:
                    # a method based on the gamma distribution. Statistics in Medicine 1997 Apr 15;16(7):791-801.

                    ffvari = summfreq * ffwi**2.0
                    ffvar = sumaxis(ffvari, axis)

                    dsr_flat = Numeric.ravel(MA.filled(dsr, 0))
                    dsr_shape = dsr.shape

                    ffvar_flat = Numeric.ravel(MA.filled(ffvar, 0))

                    # flat arrays to hold results and associated masks
                    dsr_ll = Numeric.empty(len(dsr_flat),
                                           typecode=Numeric.Float64)
                    dsr_ul = Numeric.empty(len(dsr_flat),
                                           typecode=Numeric.Float64)
                    dsr_ll_mask = Numeric.zeros(len(dsr_flat),
                                                typecode=Numeric.Int8)
                    dsr_ul_mask = Numeric.zeros(len(dsr_flat),
                                                typecode=Numeric.Int8)

                    for i, y in enumerate(dsr_flat):
                        try:
                            dsr_ll[i] = (ffvar_flat[i] / (2.0 * y)) * r.qchisq(
                                alpha / 2., df=(2.0 * (y**2.) / ffvar_flat[i]))
                            dsr_ul[i] = ((ffvar_flat[i] + (ffwm**2.0)) /
                                         (2.0 * (y + ffwm))) * r.qchisq(
                                             1. - alpha / 2.,
                                             df=((2.0 * ((y + ffwm)**2.0)) /
                                                 (ffvar_flat[i] + ffwm**2.0)))
                        except:
                            dsr_ll[i] = 0.0
                            dsr_ul[i] = 0.0
                            dsr_ll_mask[i] = 1
                            dsr_ul_mask[i] = 1
                    dsr_ll = MA.array(dsr_ll,
                                      mask=dsr_ll_mask,
                                      typecode=MA.Float64)
                    dsr_ul = MA.array(dsr_ul,
                                      mask=dsr_ul_mask,
                                      typecode=MA.Float64)
                    dsr_ll.shape = dsr_shape
                    dsr_ul.shape = dsr_shape

                result.add_table('dsr_ll' + n_add,
                                 data=dsr_ll * basepop,
                                 label='DSR ' + '%d' % (100.0 * conflev) +
                                 '% lower confidence limit' + l_add)
                result.add_table('dsr_ul' + n_add,
                                 data=dsr_ul * basepop,
                                 label='DSR ' + '%d' % (100.0 * conflev) +
                                 '% upper confidence limit' + l_add)

    finally:
        set_default_mode(r_mode)
    soom.info('calc_directly_std_rates took %.03f' % (time.time() - st))
    if stdpopset is not None:
        name = 'dir_std_rates_' + summset.name
        label = 'Directly Standardised Rates for ' + (summset.label
                                                      or summset.name)
    else:
        name = 'crude_rates_' + summset.name
        label = 'Crude Rates for ' + (summset.label or summset.name)
    if conflev:
        label += ' (%g%% conf. limits)' % (conflev * 100)
    if debug:
        global vars
        vars = Vars(locals())
    return result.to_summset(name, label=label)