Python histogram Examples, esutil.stat.histogram Python Examples

Example #1

0

Show file

File: fit-prior-gmix.py Project: esheldon/espy

    def do_histograms(self, minmag, maxmag):

        data=self.data
        w,=where((data['mag'] > minmag) & (data['mag'] < maxmag))
        more=True
        data=self.data

        g1=data[self.ellip_name][w,0]
        g2=data[self.ellip_name][w,1]

        gtot = sqrt(g1**2 + g2**2)

        sigma=gtot.std()
        binsize=0.1*sigma
        self.binsize=binsize

        h1=histogram(g1, binsize=binsize, min=self.mine_2d, max=self.maxe_2d, more=more)
        h2=histogram(g2, binsize=binsize, min=self.mine_2d, max=self.maxe_2d, more=more)


        h=histogram(gtot, binsize=binsize, min=0., max=self.maxe, more=more)
        #h=histogram(gtot, binsize=binsize, more=more)

        if False:
            import biggles
            hp=biggles.Histogram(h['hist'], x0=h['low'][0], binsize=binsize)
            plt=biggles.FramedPlot()
            plt.add(hp)
            plt.show()
        return h1, h2, h

Example #2

0

Show file

File: fit-prior-gmix-eta.py Project: esheldon/espy

    def do_histograms(self, minmag, maxmag, ellip_name):

        data = self.data
        w, = where((data["mag"] > minmag) & (data["mag"] < maxmag))
        more = True
        data = self.data

        g1 = data[ellip_name][w, 0]
        g2 = data[ellip_name][w, 1]

        gtot = sqrt(g1 ** 2 + g2 ** 2)

        sigma = gtot.std()
        binsize = 0.1 * sigma

        if ellip_name == "eta":
            mine_2d = self.min_eta_2d
            maxe_2d = self.max_eta_2d
            maxe = self.max_eta
            self.binsize_eta = binsize
        else:
            mine_2d = self.min_g_2d
            maxe_2d = self.max_g_2d
            maxe = self.max_g
            self.binsize_g = binsize

        h1 = histogram(g1, binsize=binsize, min=mine_2d, max=maxe_2d, more=more)
        h2 = histogram(g2, binsize=binsize, min=mine_2d, max=maxe_2d, more=more)

        h = histogram(gtot, binsize=binsize, min=0.0, max=maxe, more=more)

        return h1, h2, h

Example #3

0

Show file

File: fit-prior.py Project: esheldon/espy

    def doplot(self):
        tab = Table(2, 1)
        tab.title = self.title

        xfit, yfit, gprior = self.get_prior_vals()

        nrand = 100000
        binsize = self.binsize
        h = self.h
        h1 = self.h1
        h2 = self.h2

        g1rand, g2rand = gprior.sample2d(nrand)
        grand = gprior.sample1d(nrand)

        hrand = histogram(grand, binsize=binsize, min=0.0, max=1.0, more=True)
        h1rand = histogram(g1rand, binsize=binsize, min=-1.0, max=1.0, more=True)

        fbinsize = xfit[1] - xfit[0]
        hrand["hist"] = hrand["hist"] * float(yfit.sum()) / hrand["hist"].sum() * fbinsize / binsize
        h1rand["hist"] = h1rand["hist"] * float(h1["hist"].sum()) / h1rand["hist"].sum()

        pltboth = FramedPlot()
        pltboth.xlabel = r"$g$"

        hplt1 = Histogram(h1["hist"], x0=h1["low"][0], binsize=binsize, color="red")
        hplt2 = Histogram(h2["hist"], x0=h2["low"][0], binsize=binsize, color="blue")
        hpltrand = Histogram(hrand["hist"], x0=hrand["low"][0], binsize=binsize, color="magenta")
        hplt1rand = Histogram(h1rand["hist"], x0=h1rand["low"][0], binsize=binsize, color="magenta")

        hplt1.label = r"$g_1$"
        hplt2.label = r"$g_2$"
        hplt1rand.label = "rand"
        hpltrand.label = "rand"
        keyboth = PlotKey(0.9, 0.9, [hplt1, hplt2, hplt1rand], halign="right")

        pltboth.add(hplt1, hplt2, hplt1rand, keyboth)
        tab[0, 0] = pltboth

        plt = FramedPlot()
        plt.xlabel = r"$|g|$"

        hplt = Histogram(h["hist"], x0=h["low"][0], binsize=binsize)
        hplt.label = "|g|"

        line = Curve(xfit, yfit, color="blue")
        line.label = "model"

        key = PlotKey(0.9, 0.9, [hplt, line, hpltrand], halign="right")
        plt.add(line, hplt, hpltrand, key)

        tab[1, 0] = plt

        if self.show:
            tab.show()

        return tab

Example #4

0

Show file

File: weightingold.py Project: esheldon/espy

def hist_match(data1, data2, binsize, extra_weights1=None):
    """
    Generate a set of weights for data set 1 such that the distribution of
    observables are matched to dataset 2.  

    This is the simplest method for histogram matching and just works
    in rectangular bins

    parameters
    ----------
    data1:
        This data set is to be matched by weighting to data2
    data2:
        The data to be matched against
    binsize:
        The binsize to use for the histogram
    extra_weights1:
        An extra set of weights to apply to data1.  The returned weights
        will include this weight
    """

    weights1 = zeros(data1.size)
    min2=data2.min()
    max2=data2.max()

    if extra_weights1 is not None:
        bs1 = histogram(data1, binsize=binsize, min=min2, max=max2, rev=True,
                        weights=extra_weights1)
        h1=bs1['whist']
        rev1=bs1['rev']
    else:
        h1,rev1=histogram(data1, binsize=binsize, min=min2, max=max2, rev=True)

    h2 = histogram(data2, min=min2, max=max2, binsize=binsize)

    if h1.size != h2.size:
        raise ValueError("histogram sizes don't match: %d/%d" % (h1.size,h2.size))

    ratio = zeros(h1.size)
    w,=where(h1 > 0)
    ratio[w] = (h2[w]*1.0)/h1[w]

    # this is the weight for each object in the bin
    ratio /= ratio.max()

    for i in xrange(h1.size):
        if rev1[i] != rev1[i+1]:
            w1 = rev1[ rev1[i]:rev1[i+1] ]

            weights1[w1] = ratio[i]

    if extra_weights1 is not None:
        weights1 *= extra_weights1
    return weights1

Example #5

0

Show file

File: fit-sprior-true.py Project: esheldon/espy

    def do_histogram(self, minmag, maxmag):

        data=self.data
        w,=where((data['mag'] > minmag) & (data['mag'] < maxmag))
        more=True
        data=self.data

        sigma_vals=self.sigma[w]

        mean=sigma_vals.mean()
        sigma=sigma_vals.std()

        for i in xrange(3):
            w,=where(sigma_vals < (mean+4.*sigma))

            sigma_vals=sigma_vals[w]

            mean=sigma_vals.mean()
            sigma=sigma_vals.std()

        binsize=sigma*self.binfac
        self.binsize=binsize

        h=histogram(sigma_vals, binsize=binsize, more=more)

        if False:
            hp=biggles.Histogram(h['hist'], x0=h['low'][0], binsize=binsize)
            plt=biggles.FramedPlot()
            plt.add(hp)
            plt.show()

        return h, mean, sigma

Example #6

0

Show file

File: fit-prior.py Project: esheldon/espy

    def do_histograms(self):

        more = True
        data = self.data
        binsize = self.binsize

        h1 = histogram(data["g"][:, 0], binsize=binsize, min=-1.0, max=1.0, more=more)
        h2 = histogram(data["g"][:, 1], binsize=binsize, min=-1.0, max=1.0, more=more)

        gtot = sqrt(data["g"][:, 0] ** 2 + data["g"][:, 1] ** 2)

        h = histogram(gtot, binsize=binsize, min=0.0, max=1.0, more=more)
        # h=histogram(gtot, binsize=binsize, more=more)

        self.h = h
        self.h1 = h1
        self.h2 = h2

Example #7

0

Show file

File: averaging.py Project: esheldon/espy

def get_jackknife_sums_weighted(data, weights, jackreg_col=None):
    """
    the sums for jackknifing.  If regions are sent, use them for jackknifing,
    otherwise jackknife one object at a time

    parameters
    ----------
    data: array
        An array with fields 'dsum' and 'wsum'. If shear style
        is lensfit, dsensum is needed rather than wsum.
    weights: array
        Additional weights
    jackreg_col: string, optional
        column name holding the jackknife region ids
    """
    from esutil.stat import histogram

    shear_style = get_shear_style(data)

    dcol = "dsum"
    if shear_style == "lensfit":
        wcol = "dsensum"
    else:
        wcol = "wsum"

    if jackreg_col is None:
        # broadcast it
        wa = weights[:, newaxis]
        jdsum = data[dcol] * wa
        jwsum = data[wcol] * wa
    else:
        print("using jackreg_col:", jackreg_col)
        regions = data[jackreg_col]

        h, rev = histogram(regions, rev=True)

        nbin = h.size
        nrad = data[dcol].shape[1]
        jdsum = zeros((nbin, nrad))
        jwsum = zeros((nbin, nrad))

        for i in xrange(nbin):
            if rev[i] != rev[i + 1]:
                w = rev[rev[i] : rev[i + 1]]

                # broadcast it
                wa = weights[w]
                wa = wa[:, newaxis]

                # note leaving off trailing axis in subscripts
                jdsum[i] = (data[dcol][w] * wa).sum(axis=0)
                jwsum[i] = (data[wcol][w] * wa).sum(axis=0)

        w, = where(h > 0)
        jdsum = jdsum[w, :]
        jwsum = jwsum[w, :]

    return jdsum, jwsum

Example #8

0

Show file

File: validation.py Project: esheldon/espy

    def compare_all_other(self, type, show=True):
        
        fdict=self.all_other_fdict(type)

        # this is the original file.  It has the redshifts
        orig = zphot.weighting.read_training(fdict['origfile'])

        # this is the outputs
        num = zphot.weighting.read_num(fdict['numfile1'])

        # this is the weights file
        weights = zphot.weighting.read_training(fdict['wfile2'])

        # recoverable set
        w_recoverable = where1(num['num'] > 0)
        # this is actually the indexes back into the "orig" file
        w_keep = num['photoid'][w_recoverable]

        # get the z values for these validation objects
        zrec = orig['z'][w_keep]

        binsize=0.0314
        valid_dict = histogram(zrec, min=0, max=1.1, binsize=binsize, more=True)
        plt=FramedPlot()

        vhist = valid_dict['hist']/(float(valid_dict['hist'].sum()))
        pvhist=biggles.Histogram(vhist, x0=valid_dict['low'][0], binsize=binsize)
        pvhist.label = 'truth'

        weights_dict = histogram(weights['z'], min=0, max=1.1, binsize=binsize,
                                 weights=weights['weight'], more=True)
        whist = weights_dict['whist']/weights_dict['whist'].sum()
        pwhist=biggles.Histogram(whist, x0=weights_dict['low'][0], 
                                 binsize=binsize, color='red')
        pwhist.label = 'weighted train'

        key = PlotKey(0.6,0.6,[pvhist,pwhist])
        plt.add(pvhist,pwhist,key)

        plt.add( biggles.PlotLabel(.8, .9, type) )

        plt.write_eps(fdict['zhistfile'])
        converter.convert(fdict['zhistfile'],dpi=90,verbose=True)
        if show:
            plt.show()

Example #9

0

Show file

File: weightingold.py Project: esheldon/espy

def hist_match_remove(data1, data2, binsize, extra_weights1=None):
    """

    Similar to hist_match but instead of returning the weights, actually remove
    a random subset from data set 1

    """
    import esutil as eu
    min2=data2.min()
    max2=data2.max()

    if extra_weights1 is not None:
        bs1 = histogram(data1, binsize=binsize, min=min2, max=max2, rev=True,
                        weights=extra_weights1)
        h1=bs1['whist']
        rev1=bs1['rev']
    else:
        h1,rev1=histogram(data1, binsize=binsize, min=min2, max=max2, rev=True)

    h2 = histogram(data2, min=min2, max=max2, binsize=binsize)

    if h1.size != h2.size:
        raise ValueError("histogram sizes don't match: %d/%d" % (h1.size,h2.size))

    ratio = zeros(h1.size)
    w,=where(h1 > 0)
    ratio[w] = (h2[w]*1.0)/h1[w]

    # this is the weight for each object in the bin
    ratio /= ratio.max()

    keep=[]
    for i in xrange(h1.size):
        if rev1[i] != rev1[i+1]:
            w1 = rev1[ rev1[i]:rev1[i+1] ]

            # get a random subsample
            nkeep = int(w1.size*ratio[i])
            if nkeep > 0:
                # sort method is faster here.
                indices = eu.random.random_indices(w1.size, nkeep)
                keep.append(w1[indices])

    return eu.numpy_util.combine_arrlist(keep)

Example #10

0

Show file

File: averaging.py Project: esheldon/espy

def get_jackknife_sums(data, jackreg_col=None, weights=None):
    """
    the sums for jackknifing.  If regions are sent, use them for jackknifing,
    otherwise jackknife one object at a time

    parameters
    ----------
    data: array
        An array with fields 'dsum' and 'wsum'. If shear style
        is lensfit, dsensum is needed rather than wsum.
    jackreg_col: string, optional
        column name holding the jackknife region ids
    weights: array, optional
        Additional weights
    """
    from esutil.stat import histogram

    if weights is not None:
        return get_jackknife_sums_weighted(data, weights, jackreg_col=jackreg_col)

    shear_style = get_shear_style(data)

    dcol = "dsum"
    if shear_style == "lensfit":
        wcol = "dsensum"
    else:
        wcol = "wsum"

    if jackreg_col is None:
        jdsum = data[dcol]
        jwsum = data[wcol]
    else:
        print("using jackreg_col:", jackreg_col)
        regions = data[jackreg_col]

        h, rev = histogram(regions, rev=True)

        nbin = h.size
        nrad = data[dcol].shape[1]
        jdsum = zeros((nbin, nrad))
        jwsum = zeros((nbin, nrad))

        for i in xrange(nbin):
            if rev[i] != rev[i + 1]:
                w = rev[rev[i] : rev[i + 1]]

                jdsum[i] = data[dcol][w].sum(axis=0)
                jwsum[i] = data[wcol][w].sum(axis=0)

        w, = where(h > 0)
        jdsum = jdsum[w, :]
        jwsum = jwsum[w, :]

    return jdsum, jwsum

Example #11

0

Show file

File: stats.py Project: esheldon/espy

def bin_shear_data(data, bin_field, **keys):
    """
    median or wmedian are not an improvement
    """
    use_median=keys.get('use_median',False)
    use_wmedian=keys.get('use_wmedian',False)

    h,rev=histogram(data[bin_field], rev=True, **keys)

    nbin=len(h)
    dt,fields=get_binned_dtype(bin_field)
    bindata=zeros(nbin, dtype=dt)

    for i in xrange(nbin):
        if rev[i] != rev[i+1]:
            w=rev[ rev[i]:rev[i+1] ]

            bindata['n'][i] = w.size

            wts=get_weights(data['gcov'], ind=w)

            for field in fields:
                if field == 'g1':
                    fdata=data['g'][w,0]
                elif field=='g2':
                    fdata=data['g'][w,1]
                elif field=='g1sens':
                    fdata=data['gsens'][w,0]
                elif field=='g2sens':
                    fdata=data['gsens'][w,1]
                else:
                    fdata=data[field][w]

                err_field=field+'_err'
                wmean,werr=wmom(fdata, wts, calcerr=True)
                if use_median:
                    bindata[field][i] = median(fdata)
                elif use_wmedian:
                    bindata[field][i] = wmedian(fdata,wts)
                else:
                    bindata[field][i] = wmean
                bindata[err_field][i] = werr

    # for when we use a binsize instead of nperbni
    w,=where(h > 0)
    bindata=bindata[w]

    bindata['g1'] /= bindata['g1sens']
    bindata['g1_err'] /= bindata['g1sens']
    bindata['g2'] /= bindata['g2sens']
    bindata['g2_err'] /= bindata['g2sens']

    return bindata

Example #12

0

Show file

File: fit-prior-true.py Project: esheldon/espy

    def do_histograms(self, minmag, maxmag):

        data=self.data
        w,=where((data['mag'] > minmag) & (data['mag'] < maxmag))
        more=True
        data=self.data

        if self.evals:
            import lensing
            # assum g is actually e
            e1=data['g'][w,0]
            e2=data['g'][w,1]
            g1=zeros(e1.size,dtype='f8')
            g2=zeros(e1.size,dtype='f8')
            for i in xrange(g1.size):
                g1[i],g2[i] =lensing.util.e1e2_to_g1g2(e1[i],e2[i])
        else:
            g1=data['g'][w,0]
            g2=data['g'][w,1]

        gtot = sqrt(g1**2 + g2**2)

        sigma=gtot.std()
        binsize=0.2*sigma
        self.binsize=binsize

        h1=histogram(g1, binsize=binsize, min=-1., max=1., more=more)
        h2=histogram(g2, binsize=binsize, min=-1., max=1., more=more)


        #h=histogram(gtot, binsize=binsize, min=0., max=1., more=more)
        h=histogram(gtot, binsize=binsize, more=more)

        if False:
            import biggles
            hp=biggles.Histogram(h['hist'], x0=h['low'][0], binsize=binsize)
            plt=biggles.FramedPlot()
            plt.add(hp)
            plt.show()
        return h1, h2, h

Example #13

0

Show file

File: htm.py Project: d80b2t/python

    def match_prepare(self, ra, dec, verbose=False):
        if verbose:
            stdout.write("looking up ids\n")

        htmid = self.lookup_id(ra, dec)
        minid = htmid.min()
        maxid = htmid.max()

        if verbose:
            stdout.write("Getting reverse indices\n");stdout.flush()
        hist, htmrev = stat.histogram(htmid-minid,rev=True)

        return htmrev, minid, maxid

Example #14

0

Show file

File: mcmc.py Project: esheldon/espy

    def plot_multiple(self, hardcopy=False):
        """
        Plot the results from run_multiple, saved in meanvals.
        """
        import biggles
        from esutil import stat

        if self.type == "constant":

            meanvals = self.meanvals

            xmin = meanvals.min()
            xmax = meanvals.max()
            xstd = meanvals.std()

            binsize = xstd * 0.2
            bindata = stat.histogram(meanvals, binsize=binsize, more=True)

            plt = biggles.FramedPlot()

            x_range = [xmin, xmax]
            # plt.x1.range = x_range

            d = biggles.Histogram(bindata["hist"], x0=xmin, binsize=binsize)
            d.label = "Trials Means"

            # get the expected gaussian
            expected_error = self.true_error / numpy.sqrt(self.npoints)

            xvals = numpy.arange(x_range[0], x_range[1], 0.02, dtype="f8")
            gpoints = self.gaussfunc(self.true_pars, expected_error, xvals)

            gpoints *= meanvals.size * binsize

            g = biggles.Curve(xvals, gpoints, color="blue")
            g.label = "Expected Distribution"

            k = biggles.PlotKey(0.1, 0.9, [d, g])

            plt.add(d, k, g)

            plt.xlabel = "Trial Means"
            plt.ylabel = "count"

            if hardcopy:
                fname = "mcmc-constant-multi.eps"
                stdout.write("Writing test file hardcopy: %s\n" % fname)
                plt.write_eps(fname)
            plt.show()
        else:
            raise ValueError("only support type='constant'")

Example #15

0

Show file

File: stats.py Project: esheldon/espy

def logbin_shear_data(data, bin_field, **keys):
    """
    Send nbin
    """
    mindata=keys.get('min',data[bin_field].min())
    maxdata=keys.get('max',data[bin_field].max())
    keys['min']=log10(mindata)
    keys['max']=log10(maxdata)
    keys['rev']=True

    logdata=log10(data[bin_field])
    h,rev=histogram(logdata, **keys)

    nbin=len(h)
    dt,fields=get_binned_dtype(bin_field)
    bindata=zeros(nbin, dtype=dt)

    for i in xrange(nbin):
        if rev[i] != rev[i+1]:
            w=rev[ rev[i]:rev[i+1] ]

            bindata['n'][i] = w.size

            wts=get_weights(data['gcov'], ind=w)

            for field in fields:
                if field == 'g1':
                    fdata=data['g'][w,0]
                elif field=='g2':
                    fdata=data['g'][w,1]
                elif field=='g1sens':
                    fdata=data['gsens'][w,0]
                elif field=='g2sens':
                    fdata=data['gsens'][w,1]
                else:
                    fdata=data[field][w]

                err_field=field+'_err'
                wmean,werr=wmom(fdata, wts, calcerr=True)
                bindata[field][i] = wmean
                bindata[err_field][i] = werr

    w,=where(h > 0)
    bindata=bindata[w]
    bindata['g1'] /= bindata['g1sens']
    bindata['g1_err'] /= bindata['g1sens']
    bindata['g2'] /= bindata['g2sens']
    bindata['g2_err'] /= bindata['g2sens']

    return bindata

Example #16

0

Show file

File: afterburn.py Project: esheldon/gmix_meds

    def set_rev(self):
        from esutil.stat import histogram

        print("histogramming epoch 'number'")

        m=self.meds_list[0]
        number_max=m['number'].max()

        h_number,rev = histogram(self.epoch_data['number'],
                                 min=1,
                                 max=number_max,
                                 rev=True)

        self.h_number=h_number
        self.rev_number=rev

Example #17

0

Show file

File: fit-sprior-true.py Project: esheldon/espy

    def doplot(self, fitres, h, minmag, maxmag):
        tab=biggles.Table(2,1)

        plt=FramedPlot()
        plt.title='%s %.2f %.2f ' % (self.objtype, minmag, maxmag)
        plt.xlabel=r'$\sigma$'

        sprior=fitres.get_model()

        nrand=100000
        binsize=self.binsize

        hplt=Histogram(h['hist'], x0=h['low'][0], binsize=binsize)
        hplt.label='data'

        srand=sprior.sample(nrand)
        hrand=histogram(srand, binsize=binsize, min=h['low'][0], max=h['high'][-1], more=True)
        hrand['hist'] = hrand['hist']*float(h['hist'].sum())/nrand

        hpltrand=Histogram(hrand['hist'], x0=hrand['low'][0], binsize=binsize,
                           color='blue')
        hpltrand.label='rand'


        key=PlotKey(0.9,0.9,[hplt,hpltrand],halign='right')

        plt.add(hplt, hpltrand, key)

        
        tplt=fitres.plot_trials(show=False,fontsize_min=0.88888888)
        
        tab[0,0] = plt
        tab[1,0] = tplt

        if self.show:
            tab.show()

        d=files.get_prior_dir()
        d=os.path.join(d, 'plots')
        epsfile='pofs-%.2f-%.2f-%s.eps' % (minmag,maxmag,self.objtype)
        epsfile=os.path.join(d,epsfile)
        eu.ostools.makedirs_fromfile(epsfile)
        print epsfile
        tab.write_eps(epsfile)
        os.system('converter -d 100 %s' % epsfile)

        return tab

Example #18

0

Show file

File: mcmc.py Project: esheldon/espy

    def plot_results(self, burnin=100, hardcopy=False):
        import biggles
        from esutil import stat

        if self.type == "constant":
            par0 = self.trials[burnin:, 0]

            xmin = par0.min()
            binsize = 0.05
            bindata = stat.histogram(par0, binsize=binsize, more=True)

            plt = biggles.FramedPlot()

            x_range = [8.5, 11.5]
            plt.x1.range = x_range

            d = biggles.Histogram(bindata["hist"], x0=xmin, binsize=binsize)
            d.label = "trials"

            # get the expected gaussian
            expected_error = self.true_error / numpy.sqrt(self.npoints)

            xvals = numpy.arange(x_range[0], x_range[1], 0.02, dtype="f8")
            gpoints = self.gaussfunc(self.true_pars, expected_error, xvals)

            gpoints *= par0.size * binsize

            g = biggles.Curve(xvals, gpoints, color="blue")
            g.label = "Expected Distribution"

            k = biggles.PlotKey(0.1, 0.9, [d, g])

            plt.add(d, k, g)

            plt.xlabel = "trial values"
            plt.ylabel = "count"

            if hardcopy:
                fname = "mcmc-constant.eps"
                stdout.write("Writing test file hardcopy: %s\n" % fname)
                plt.write_eps(fname)
            plt.show()
        else:
            raise ValueError("only support type='constant'")

Example #19

0

Show file

File: htm.py Project: lqleeqee/esutil

    def match_prepare(self, ra, dec, verbose=False):
        """
        deprecated.  Use an htm.Matcher instead
        """

        print 'deprecated: use a htm.Matcher instead'

        if verbose:
            stdout.write("looking up ids\n")

        htmid = self.lookup_id(ra, dec)
        minid = htmid.min()
        maxid = htmid.max()

        if verbose:
            stdout.write("Getting reverse indices\n");stdout.flush()
        hist, htmrev = stat.histogram(htmid-minid,rev=True)

        return htmrev, minid, maxid

Example #20

0

Show file

File: mcmc.py Project: esheldon/mcmc

def test_lognormal():
    import biggles
    import esutil as eu
    from esutil.random import LogNormal, srandu
    from esutil.stat import histogram

    n=1000
    nwalkers=100
    burnin=100
    nstep=100

    mean=8
    sigma=3
    ln=LogNormal(mean,sigma)
    vals=ln.sample(n)

    binsize=0.5

    plt=eu.plotting.bhist(vals, binsize=binsize,show=False)

    h=histogram(vals, binsize=binsize,more=True)
    herr=sqrt(h['hist'])
    herr=herr.clip(1.0, herr.max())

    guess=[n*(1. + .1*srandu()),
           mean*(1. + .1*srandu()),
           sigma*(1. + .1*srandu())]
    guess=[n*binsize,mean,sigma]

    print 'guess:',guess
    nlf=LogNormalFitter(h['center'], h['hist'], guess, nwalkers, burnin, nstep,
                        yerr=herr)

    print nlf

    res=nlf.get_result()
    
    model=nlf.get_model()

    yvals=model.scaled(h['center'])
    plt.add(biggles.Curve(h['center'], yvals, color='blue'))
    plt.show()

Example #21

0

Show file

    def match_prepare(self, ra, dec, verbose=False):
        """
        deprecated.  Use an htm.Matcher instead
        """

        print 'deprecated: use a htm.Matcher instead'

        if verbose:
            stdout.write("looking up ids\n")

        htmid = self.lookup_id(ra, dec)
        minid = htmid.min()
        maxid = htmid.max()

        if verbose:
            stdout.write("Getting reverse indices\n")
            stdout.flush()
        hist, htmrev = stat.histogram(htmid - minid, rev=True)

        return htmrev, minid, maxid

Example #22

0

Show file

File: mcmc.py Project: esheldon/espy

def test_line(burnin=1000, nstep=10000, doplot=False):
    """
    run all steps at once so we can plot burnin phase
    """
    import esutil

    pars = [1.0, 1.0]
    xmin = -1.0
    xmax = 1.0
    nx = 10
    yerr = 0.1
    x, y, yerr = noisy_line(pars, xmin, xmax, nx, yerr)

    LF = LinFitter(x, y, yerr)

    fitter = MH(LF.get_loglike, LF.step)

    # bad guess
    parguess = [pars[0] + 0.2, pars[1] - 0.2]

    ntot = nstep + burnin
    pos = fitter.run(parguess, ntot)

    data = fitter.get_trials()

    if doplot:
        import biggles
        from esutil import stat

        burnin = 1000

        # plot the burnin
        tab = biggles.Table(2, 1)

        steps = numpy.arange(ntot, dtype="i4")

        offset_steps_plot = biggles.FramedPlot()
        offset_steps_plot.ylabel = "offset"

        slope_steps_plot = biggles.FramedPlot()
        slope_steps_plot.ylabel = "slope"
        slope_steps_plot.xlabel = "step number"

        offset_burnin_curve = biggles.Curve(steps[0:burnin], data[0:burnin, 0], color="red")
        slope_burnin_curve = biggles.Curve(steps[0:burnin], data[0:burnin, 1], color="red")
        offset_rest_curve = biggles.Curve(steps[burnin:], data[burnin:, 0])
        slope_rest_curve = biggles.Curve(steps[burnin:], data[burnin:, 1])

        offset_steps_plot.add(offset_burnin_curve, offset_rest_curve)
        slope_steps_plot.add(slope_burnin_curve, slope_rest_curve)

        tab[0, 0] = offset_steps_plot
        tab[1, 0] = slope_steps_plot

        tab.show()

        # get status for chain
        parfit, cov = extract_stats(data[burnin:, :])
        errfit = sqrt(diag(cov))

        # plot the histograms and comparison plot

        tab = biggles.Table(2, 2)
        offsets = data[burnin:, 0]
        slopes = data[burnin:, 1]

        offset_binsize = offsets.std() * 0.2
        slope_binsize = slopes.std() * 0.2

        offset_hist = stat.histogram(offsets, binsize=offset_binsize, more=True)
        slope_hist = stat.histogram(slopes, binsize=slope_binsize, more=True)

        offset_phist = biggles.FramedPlot()
        offset_phist.add(biggles.Histogram(offset_hist["hist"], x0=offset_hist["low"][0], binsize=offset_binsize))
        offset_phist.xlabel = "Offsets"
        offset_phist.add(biggles.PlotLabel(0.1, 0.9, "offset=%0.2f +/- %0.2f" % (parfit[0], errfit[0]), halign="left"))
        offset_phist.yrange = [0, offset_hist["hist"].max() * 1.2]

        slope_phist = biggles.FramedPlot()
        slope_phist.add(biggles.Histogram(slope_hist["hist"], x0=slope_hist["low"][0], binsize=slope_binsize))
        slope_phist.xlabel = "slopes"
        slope_phist.add(biggles.PlotLabel(0.1, 0.9, "slope=%0.2f +/- %0.2f" % (parfit[1], errfit[1]), halign="left"))
        slope_phist.yrange = [0, slope_hist["hist"].max() * 1.2]

        tab[0, 0] = offset_phist
        tab[0, 1] = slope_phist

        # now plot original data and best fit par

        yfit = parfit[0] * x + parfit[1]

        fitplt = biggles.FramedPlot()
        data_errbar = biggles.SymmetricErrorBarsY(x, y, yerr)
        data_points = biggles.Points(x, y, type="filled circle")
        data_points.label = "Data"

        yfit_curve = biggles.Curve(x, yfit, color="blue")

        key = biggles.PlotKey(0.1, 0.9, [data_points, yfit_curve])

        fitplt.add(data_errbar, data_points, yfit_curve, key)

        tab[1, 0] = fitplt

        tab.show()

    return data

Example #23

0

Show file

File: htm.py Project: d80b2t/python

    def match(self, ra1, dec1, ra2, dec2, radius,
              maxmatch=1, 
              htmid2=None, 
              htmrev2=None,
              minid=None,
              maxid=None,
              file=None,
              verbose=False):
        """
        Class:
            HTM

        Method Name:
            match
        
        Purpose:

          Match two sets of ra/dec points using the Hierarchical Triangular
          Mesh code.  This is very efficient for large search angles and large
          lists.  May seem slow otherwise due to overhead creating htm indices.
        
        
        Calling Sequence:
            import esutil
            depth = 10
            h=esutil.htm.HTM(depth)
            m1,m2,d12 = h.match(ra1,dec1,ra2,dec2,radius,
                                maxmatch=1,
                                htmid2=None,
                                htmrev2=None,
                                minid=None,
                                maxid=None,
                                file=None)

              To speed up successive calls with the same ra2,dec2, you
              can use:
                  htmrev2,minid,maxid = h.match_prepare(ra2,dec2)
              Then
                  m1,m2,e12 = h.match(ra1,dec1,ra2,dec2,radius,
                                      htmrev2=htmrev2,minid=minid,maxid=maxid)

        
        Inputs:
            ra1,dec1,ra2,dec2: 
                ra,dec lists in degrees.  Can be scalars or arrays but require
                size(ra) == size(dec) in each set.

          radius: 
              The search radius in degrees.  May be a scalar or an array same
              length as ra1,dec1.
        
        
        Keyword Parameters:

            maxmatch=1: 
                The maximum number of allowed matches per point. Defaults to
                return the closest match, maxmatch=1.  Use maxmatch<=0 to
                return all matches

            htmid2=None: 
                the htm indexes for the second list.  If not sent they are
                generated internally.  You can generate these with 

                    htmid = h.lookup_id(ra, dec)

            htmrev2=None: 
                The result of
                    import esutil
                    htmid2 = h.lookup_id(ra, dec)
                    minid=htmid2.min()
                    hist2,htmrev2=\\
                        esutil.stat.histogram(htmid2-minid,rev=True) 

                If not sent it is calculated internally for fast lookups.  You
                can save time on successive calls by generating these your
                self.

            minid=None, maxid=None: 
                If htmrev2 is sent along with these, there is no need to
                calculate htmid2.

            file=None: 
                A file into which will be written the indices and distances.
                When this keyword is sent, None,None,None is returned. This is
                useful when the match data will not fit into memory.  
                
                The file is an unformatted binary file. It can be read with the
                read() method. 

                The format is a 64-bit signed integer representing the number
                of rows, followed by rows of 
                  
                  i1 i2 d12

                Where i1,i2 are the match indices as 64-bit signed integers and
                d12 is the distance between them in degrees as a 64-bit float.
        
        Outputs:

            m1,m2,d12: 

                A tuple of m1,m2,d12.  m1 and m2 are the match indices for
                list1 and list2.  d12 is the distance between them in degrees.

                You can subscript the arrays ra1,dec1 with the m1 array, and
                ra2,dec2 with the m2 array.   If you do so the data "line-up"
                so that points in list one and list two at the same index are
                matches.
                
                If you write the results to a file, the returned value is
                simply the match count.

        
        Restrictions:
            The C++ wrapper must be compiled.  This will happen automatically
            during installation of esutil.
        

         EXAMPLE:
        
            # try the matching two lists of ra/dec points
            # Matching by ra/dec, expect 10 matches ordered by distance....

            # match within two arcseconds
            two = 2.0/3600.

            # offset second list by fraction of 2 arcsec in dec
            # but last one won't match anything
            ra1 = numpy.array(  [200.0, 200.0, 200.0, 175.23, 21.36])
            dec1 = numpy.array( [24.3,          24.3,            24.3,  -28.25, -15.32])
            ra2 = numpy.array(  [200.0, 200.0, 200.0, 175.23, 55.25])
            dec2 = numpy.array( [24.3+0.75*two, 24.3 + 0.25*two, 24.3 - 0.33*two, -28.25 + 0.58*two, 75.22])

            m1,m2,d12 = h.match(ra1,dec1,ra2,dec2,two,maxmatch=0)

            for i in range(m1.size):
                print m1[i],m2[i],d12[i]

            # this produces
            0 1 0.00013888984367
            0 2 0.00018333285694
            0 0 0.000416666032158
            1 1 0.00013888984367
            1 2 0.00018333285694
            1 0 0.000416666032158
            2 1 0.00013888984367
            2 2 0.00018333285694
            2 0 0.000416666032158
            3 3 0.000322221232243


        
         MODIFICATION HISTORY:
            SWIG Wrapper and matching code working 2010-03-03, 
                Erin Sheldon, BNL.
            2010-03-19: Default to maxmatch=1, return the closest match.
            2010-06-16: Fixed bug that disallowed scalar inputs.  -BFG


        """

        if ((numpy.size(ra1) != numpy.size(dec1)) or 
            (numpy.size(ra2) != numpy.size(dec2))):
            raise ValueError("require size(ra)==size(dec) for "
                             "both sets of inputs")

        if (htmrev2 is None) or (minid is None) or (maxid is None):
            if htmid2 is None:
                if verbose:
                    stdout.write("looking up ids\n");stdout.flush()
                htmid2 = self.lookup_id(ra2, dec2)
                minid = htmid2.min()
                maxid = htmid2.max()
            else:
                if minid is None:
                    minid = htmid2.min()
                if maxid is None:
                    maxid = htmid2.max()

            if htmrev2 is None:
                if verbose:
                    stdout.write("Getting reverse indices\n");stdout.flush()
                hist2, htmrev2 = stat.histogram(htmid2-minid,rev=True)

        if verbose:
            stdout.write("calling cmatch\n");stdout.flush()
        return self.cmatch(radius,
                           ra1,
                           dec1,
                           ra2,
                           dec2,
                           htmrev2,
                           minid,
                           maxid,
                           maxmatch,
                           file)

Example #24

0

Show file

File: fit-prior-gmix.py Project: esheldon/espy

    def doplot(self, gprior, h1, h2, h, minmag, maxmag):
        tab=Table(2,1)
        tab.title='%s %.2f %.2f ' % (self.otype, minmag, maxmag)


        nrand=1000000
        binsize=self.binsize
        rbinsize=binsize*0.2


        gr = gprior.sample(nrand)
        g1rand=gr[:,0]
        g2rand=gr[:,1]

        grand = numpy.sqrt( g1rand**2 + g2rand**2 )

        #hrand=histogram(grand, binsize=rbinsize, min=h['low'][0], max=h['high'][-1], more=True)
        hrand=histogram(grand, binsize=rbinsize, min=0,max=self.maxe, more=True)
        h1rand=histogram(g1rand, binsize=rbinsize, min=self.mine_2d, max=self.maxe_2d, more=True)

        bratio = self.binsize/rbinsize
        hrand['hist'] = hrand['hist']*bratio*float(h['hist'].sum())/nrand
        h1rand['hist'] = h1rand['hist']*bratio*float(h1['hist'].sum())/h1rand['hist'].sum()


        pltboth=FramedPlot()
        pltboth.xlabel=r'$%s$' % self.ellip_name

        hplt1=Histogram(h1['hist'], x0=h1['low'][0], binsize=binsize,color='darkgreen')
        hplt2=Histogram(h2['hist'], x0=h2['low'][0], binsize=binsize,color='blue')
        hpltrand=Histogram(hrand['hist'], x0=hrand['low'][0], binsize=rbinsize,
                           color='red')
        hplt1rand=Histogram(h1rand['hist'], x0=h1rand['low'][0], binsize=rbinsize,
                           color='red')

        hplt1.label=r'$g_1$'
        hplt2.label=r'$g_2$'
        hplt1rand.label='rand'
        hpltrand.label='rand'
        keyboth=PlotKey(0.9,0.9,[hplt1,hplt2,hplt1rand],halign='right')

        pltboth.add(hplt1, hplt2, hplt1rand, keyboth)


        tab[0,0]=pltboth
        

        plt=FramedPlot()
        plt.xlabel=r'$|%s|$' % self.ellip_name

        hplt=Histogram(h['hist'], x0=h['low'][0], binsize=binsize)
        hplt.label='|%s|' % self.ellip_name

        
        #line=Curve(xfit, yfit, color='blue')
        #line.label='model'

        #key=PlotKey(0.9,0.9,[hplt,line,hpltrand],halign='right')
        #plt.add(line, hplt, hpltrand, key)
        key=PlotKey(0.9,0.9,[hplt,hpltrand],halign='right')
        plt.add(hplt, hpltrand, key)


        tab[1,0]=plt
        
        if self.show:
            tab.show()

        d=files.get_prior_dir()
        d=os.path.join(d, 'plots')
        epsfile='pofe-%.2f-%.2f-%s.eps' % (minmag,maxmag,self.otype)
        epsfile=os.path.join(d,epsfile)
        eu.ostools.makedirs_fromfile(epsfile)
        print epsfile
        tab.write_eps(epsfile)
        os.system('converter -d 100 %s' % epsfile)

        return tab

Example #25

0

Show file

File: fit-prior-gmix-eta.py Project: esheldon/espy

    def doplot(self, gprior, minmag, maxmag):
        from lensing.util import eta1eta2_to_g1g2

        tab = Table(2, 2)
        tab.title = "%s %.2f %.2f " % (self.otype, minmag, maxmag)

        h1_g, h2_g, h_g = self.do_histograms(minmag, maxmag, "g")
        h1_eta, h2_eta, h_eta = self.do_histograms(minmag, maxmag, "eta")

        nrand = 1000000
        binsize_eta = self.binsize_eta
        binsize_g = self.binsize_g

        rbinsize_eta = binsize_eta * 0.2
        rbinsize_g = binsize_g * 0.2

        gr = gprior.sample(nrand)
        eta1_rand = gr[:, 0]
        eta2_rand = gr[:, 1]
        eta_rand = numpy.sqrt(eta1_rand ** 2 + eta2_rand ** 2)

        g1_rand, g2_rand = eta1eta2_to_g1g2(eta1_rand, eta2_rand)
        g_rand = numpy.sqrt(g1_rand ** 2 + g2_rand ** 2)

        hrand_eta = histogram(eta_rand, binsize=rbinsize_eta, min=0, max=self.max_eta, more=True)
        h1rand_eta = histogram(eta1_rand, binsize=rbinsize_eta, min=self.min_eta_2d, max=self.max_eta_2d, more=True)

        hrand_g = histogram(g_rand, binsize=rbinsize_g, min=0, max=self.max_g, more=True)
        h1rand_g = histogram(g1_rand, binsize=rbinsize_g, min=self.min_g_2d, max=self.max_g_2d, more=True)

        # eta 2d plots
        bratio_eta = self.binsize_eta / rbinsize_eta
        hrand_eta["hist"] = hrand_eta["hist"] * bratio_eta * float(h_eta["hist"].sum()) / nrand
        h1rand_eta["hist"] = h1rand_eta["hist"] * bratio_eta * float(h1_eta["hist"].sum()) / h1rand_eta["hist"].sum()

        pltboth_eta = FramedPlot()
        pltboth_eta.xlabel = r"$\eta$"

        hplt1_eta = Histogram(h1_eta["hist"], x0=h1_eta["low"][0], binsize=binsize_eta, color="darkgreen")
        hplt2_eta = Histogram(h2_eta["hist"], x0=h2_eta["low"][0], binsize=binsize_eta, color="blue")
        hpltrand_eta = Histogram(hrand_eta["hist"], x0=hrand_eta["low"][0], binsize=rbinsize_eta, color="red")
        hplt1rand_eta = Histogram(h1rand_eta["hist"], x0=h1rand_eta["low"][0], binsize=rbinsize_eta, color="red")

        hplt1_eta.label = r"$\eta_1$"
        hplt2_eta.label = r"$\eta_2$"
        hplt1rand_eta.label = "rand"
        hpltrand_eta.label = "rand"
        keyboth_eta = PlotKey(0.9, 0.9, [hplt1_eta, hplt2_eta, hplt1rand_eta], halign="right")

        pltboth_eta.add(hplt1_eta, hplt2_eta, hplt1rand_eta, keyboth_eta)

        tab[0, 0] = pltboth_eta

        plt1d_eta = FramedPlot()
        plt1d_eta.xlabel = r"$|\eta|$"

        hplt_eta = Histogram(h_eta["hist"], x0=h_eta["low"][0], binsize=binsize_eta)
        hplt_eta.label = r"$|\eta|$"

        key_eta = PlotKey(0.9, 0.9, [hplt_eta, hpltrand_eta], halign="right")
        plt1d_eta.add(hplt_eta, hpltrand_eta, key_eta)

        tab[1, 0] = plt1d_eta

        # g plots

        bratio_g = self.binsize_g / rbinsize_g
        hrand_g["hist"] = hrand_g["hist"] * bratio_g * float(h_g["hist"].sum()) / nrand
        h1rand_g["hist"] = h1rand_g["hist"] * bratio_g * float(h1_g["hist"].sum()) / h1rand_g["hist"].sum()

        pltboth_g = FramedPlot()
        pltboth_g.xlabel = r"$g$"

        hplt1_g = Histogram(h1_g["hist"], x0=h1_g["low"][0], binsize=binsize_g, color="darkgreen")
        hplt2_g = Histogram(h2_g["hist"], x0=h2_g["low"][0], binsize=binsize_g, color="blue")
        hpltrand_g = Histogram(hrand_g["hist"], x0=hrand_g["low"][0], binsize=rbinsize_g, color="red")
        hplt1rand_g = Histogram(h1rand_g["hist"], x0=h1rand_g["low"][0], binsize=rbinsize_g, color="red")

        hplt1_g.label = r"$g_1$"
        hplt2_g.label = r"$g_2$"
        hplt1rand_g.label = "rand"
        hpltrand_g.label = "rand"
        keyboth_g = PlotKey(0.9, 0.9, [hplt1_g, hplt2_g, hplt1rand_g], halign="right")

        pltboth_g.add(hplt1_g, hplt2_g, hplt1rand_g, keyboth_g)

        tab[0, 1] = pltboth_g

        plt1d_g = FramedPlot()
        plt1d_g.xlabel = r"$|g|$"

        hplt_g = Histogram(h_g["hist"], x0=h_g["low"][0], binsize=binsize_g)
        hplt_g.label = "|g|"

        key_g = PlotKey(0.9, 0.9, [hplt_g, hpltrand_g], halign="right")
        plt1d_g.add(hplt_g, hpltrand_g, key_g)

        tab[1, 1] = plt1d_g

        if self.show:
            tab.show()

        d = files.get_prior_dir()
        d = os.path.join(d, "plots")
        epsfile = "pofe-pofeta-%.2f-%.2f-%s.eps" % (minmag, maxmag, self.otype)
        epsfile = os.path.join(d, epsfile)
        eu.ostools.makedirs_fromfile(epsfile)
        print epsfile
        tab.write_eps(epsfile)
        os.system("converter -d 100 %s" % epsfile)

        return tab

Example #26

0

Show file

File: fit-prior-true.py Project: esheldon/espy

    def doplot(self, fitres, h1, h2, h, minmag, maxmag):
        tab=Table(2,1)
        tab.title='%s %.2f %.2f ' % (self.objtype, minmag, maxmag)

        #xfit,yfit,gprior = self.get_prior_vals(fitres, h)
        gprior=self.get_prior(fitres)

        nrand=100000
        binsize=self.binsize


        g1rand,g2rand=gprior.sample2d(nrand)
        grand=gprior.sample1d(nrand)

        #hrand=histogram(grand, binsize=binsize, min=0., max=1., more=True)
        hrand=histogram(grand, binsize=binsize, min=h['low'][0], max=h['high'][-1], more=True)
        h1rand=histogram(g1rand, binsize=binsize, min=-1., max=1., more=True)

        #fbinsize=xfit[1]-xfit[0]
        #hrand['hist'] = hrand['hist']*float(yfit.sum())/hrand['hist'].sum()*fbinsize/binsize
        hrand['hist'] = hrand['hist']*float(h['hist'].sum())/nrand
        h1rand['hist'] = h1rand['hist']*float(h1['hist'].sum())/h1rand['hist'].sum()


        pltboth=FramedPlot()
        pltboth.xlabel=r'$g$'

        hplt1=Histogram(h1['hist'], x0=h1['low'][0], binsize=binsize,color='red')
        hplt2=Histogram(h2['hist'], x0=h2['low'][0], binsize=binsize,color='blue')
        hpltrand=Histogram(hrand['hist'], x0=hrand['low'][0], binsize=binsize,
                           color='magenta')
        hplt1rand=Histogram(h1rand['hist'], x0=h1rand['low'][0], binsize=binsize,
                           color='magenta')

        hplt1.label=r'$g_1$'
        hplt2.label=r'$g_2$'
        hplt1rand.label='rand'
        hpltrand.label='rand'
        keyboth=PlotKey(0.9,0.9,[hplt1,hplt2,hplt1rand],halign='right')

        pltboth.add(hplt1, hplt2, hplt1rand, keyboth)
        tab[0,0]=pltboth
        

        plt=FramedPlot()
        plt.xlabel=r'$|g|$'

        hplt=Histogram(h['hist'], x0=h['low'][0], binsize=binsize)
        hplt.label='|g|'

        
        #line=Curve(xfit, yfit, color='blue')
        #line.label='model'

        #key=PlotKey(0.9,0.9,[hplt,line,hpltrand],halign='right')
        #plt.add(line, hplt, hpltrand, key)
        key=PlotKey(0.9,0.9,[hplt,hpltrand],halign='right')
        plt.add(hplt, hpltrand, key)


        tab[1,0]=plt
        
        if self.show:
            tab.show()

        d=files.get_prior_dir()
        d=os.path.join(d, 'plots')
        epsfile='pofe-%.2f-%.2f-%s.eps' % (minmag,maxmag,self.objtype)
        epsfile=os.path.join(d,epsfile)
        eu.ostools.makedirs_fromfile(epsfile)
        print epsfile
        tab.write_eps(epsfile)
        os.system('converter -d 100 %s' % epsfile)

        return tab

Example #27

0

Show file

File: fit-prior-true-old.py Project: esheldon/espy

    def doplot(self, fitres, h1, h2, h, minmag, maxmag):
        tab=Table(2,1)
        tab.title='%s %.2f %.2f ' % (self.objtype, minmag, maxmag)

        #xfit,yfit,gprior = self.get_prior_vals(fitres, h)
        gprior=self.get_prior(fitres)

        nrand=100000
        binsize=self.binsize


        g1rand,g2rand=gprior.sample2d(nrand)
        grand=gprior.sample1d(nrand)

        hrand=histogram(grand, binsize=binsize, min=0., max=1., more=True)
        h1rand=histogram(g1rand, binsize=binsize, min=-1., max=1., more=True)

        #fbinsize=xfit[1]-xfit[0]
        #hrand['hist'] = hrand['hist']*float(yfit.sum())/hrand['hist'].sum()*fbinsize/binsize
        hrand['hist'] = hrand['hist']*float(h['hist'].sum())/nrand
        h1rand['hist'] = h1rand['hist']*float(h1['hist'].sum())/h1rand['hist'].sum()


        pltboth=FramedPlot()
        pltboth.xlabel=r'$g$'

        hplt1=Histogram(h1['hist'], x0=h1['low'][0], binsize=binsize,color='red')
        hplt2=Histogram(h2['hist'], x0=h2['low'][0], binsize=binsize,color='blue')
        hpltrand=Histogram(hrand['hist'], x0=hrand['low'][0], binsize=binsize,
                           color='magenta')
        hplt1rand=Histogram(h1rand['hist'], x0=h1rand['low'][0], binsize=binsize,
                           color='magenta')

        hplt1.label=r'$g_1$'
        hplt2.label=r'$g_2$'
        hplt1rand.label='rand'
        hpltrand.label='rand'
        keyboth=PlotKey(0.9,0.9,[hplt1,hplt2,hplt1rand],halign='right')

        pltboth.add(hplt1, hplt2, hplt1rand, keyboth)
        tab[0,0]=pltboth
        

        plt=FramedPlot()
        plt.xlabel=r'$|g|$'

        hplt=Histogram(h['hist'], x0=h['low'][0], binsize=binsize)
        hplt.label='|g|'

        
        #line=Curve(xfit, yfit, color='blue')
        #line.label='model'

        #key=PlotKey(0.9,0.9,[hplt,line,hpltrand],halign='right')
        #plt.add(line, hplt, hpltrand, key)
        key=PlotKey(0.9,0.9,[hplt,hpltrand],halign='right')
        plt.add(hplt, hpltrand, key)


        tab[1,0]=plt
        
        if self.show:
            tab.show()

        return tab

Example #28

0

Show file

File: validation.py Project: esheldon/espy

    def compare_same_same(self, type, show=True):
        """
        Use the id from the validation set to go back and get the
        z for those objects.  Then plot histograms for comparision.

        read in all file
        read in validation set
            take recoverable subset based on num file
        Get z info for these points from the all file

        plot the histgram of actual validation set redshifts
        overplot the histgram of weighted redshifts

        Then bin by true validation set redshift and plot the
            ztrue - <z>
        Where <z> is the expectation value of z based on the p(z)
            <z> = integral( z*p(z) )/integral( p(z) )
        That will be noisy
        """
        
        fdict=self.same_same_fdict(type)

        # this is the original file
        all = zphot.weighting.read_training(fdict['origfile'])

        # this is the validation set, for which the "photoid" field
        # is actually an id pointing back into "all"
        # we take version 1 and will demand num > 0
        valid = zphot.weighting.read_photo(fdict['photofile'])
        num = zphot.weighting.read_num(fdict['numfile1'])


        # this is the weights file
        weights = zphot.weighting.read_training(fdict['wfile2'])

        # recoverable set
        w_recoverable = where1(num['num'] > 0)
        # this is actually the indexes back into the "all" file
        w_keep = num['photoid'][w_recoverable]

        # get the z values for these validation objects
        zvalid = all['z'][w_keep]

        binsize=0.0314
        valid_dict = histogram(zvalid, min=0, max=1.1, binsize=binsize, more=True)
        plt=FramedPlot()

        vhist = valid_dict['hist']/(float(valid_dict['hist'].sum()))
        pvhist=biggles.Histogram(vhist, x0=valid_dict['low'][0], binsize=binsize)
        pvhist.label = 'validation'

        weights_dict = histogram(weights['z'], min=0, max=1.1, binsize=binsize,
                                 weights=weights['weight'], more=True)
        whist = weights_dict['whist']/weights_dict['whist'].sum()
        pwhist=biggles.Histogram(whist, x0=weights_dict['low'][0], 
                                 binsize=binsize, color='red')
        pwhist.label = 'weighted train'

        key = PlotKey(0.6,0.6,[pvhist,pwhist])
        plt.add(pvhist,pwhist,key)

        plt.add( biggles.PlotLabel(.8, .9, type) )

        plt.write_eps(fdict['zhistfile'])
        converter.convert(fdict['zhistfile'],dpi=90,verbose=True)
        if show:
            plt.show()

Example #29

0

Show file

File: weightingold.py Project: esheldon/espy

def plot_results1d(data1, data2, weights1, binsize=None, 
                   xmin=None, xmax=None, xlabel=None, title=None,
                   epsfile=None, pngfile=None, show=True,
                   label1='dataset 1',
                   label2='dataset 2'):
    """
    compare the histograms at the input binsize

    Unless the domains are exactlyl the same, you should restrict xmin,xmax so
    that the normalizations will match correctly.

    """
    import biggles
    from esutil.stat import histogram

    #if xmin is None:
    #    xmin = data2.min()
    #if xmax is None:
    #    xmax = data2.max()
    if xmin is None:
        xmin = min([data1.min(), data2.min()])
    if xmax is None:
        xmax = max([data1.max(), data2.max()])

    if binsize is None:
        w,=where( (data2 < xmax) & (data2 > xmin) )
        binsize=0.2*data2[w].std()

    nw=weights1/weights1.max()
    effnum = nw.sum()
    effperc = effnum/data1.size*100
    plabtext='effnum: %d/%d = %0.1f%%' % (effnum,data1.size,effperc)

    print("    plotting hist match results")
    print("    histogramming data set 1")
    h1dict = histogram(data1, binsize=binsize, more=True, 
                       min=xmin, max=xmax)
    print("    histogramming data set 1 with weights")
    h1wdict = histogram(data1, binsize=binsize, 
                        min=xmin, max=xmax,
                        weights=weights1,
                        more=True)

    print("    histogramming data set 2")
    h2dict = histogram(data2, binsize=binsize, more=True,
                       min=xmin, max=xmax)

    h1=h1dict['hist']/float(h1dict['hist'].sum())
    h1w=h1wdict['whist']/float(h1wdict['whist'].sum())
    h2=h2dict['hist']/float(h2dict['hist'].sum())

    hdiff = h2-h1w


    #arr=biggles.FramedArray(2,1)
    tab=biggles.Table(2,1)


    ph1 = biggles.Histogram(h1, binsize=binsize, x0=h1dict['low'][0],color='blue')
    ph1.label = label1

    ph1w = biggles.Histogram(h1w, binsize=binsize, x0=h1dict['low'][0], color='red', width=2)
    ph1w.label = label1+' weighted'

    ph2 = biggles.Histogram(h2, binsize=binsize, x0=h2dict['low'][0], width=2)
    ph2.label = label2

    #plt=arr[0,0]
    plt=biggles.FramedPlot()
    plt.title=title

    plt.add(ph1)
    plt.add(ph2)
    plt.add(ph1w)
    plt.xlabel=xlabel

    key=biggles.PlotKey(0.1,0.90,[ph1,ph2,ph1w],halign='left')
    plt.add(key)

    tab[0,0]=plt

    #pltdiff=arr[1,0]
    pltdiff=biggles.FramedPlot()

    phdiff = biggles.Points(h1dict['center'], hdiff)

    zero=biggles.Curve([xmin,xmax],[0,0])

    plab=biggles.PlotLabel(0.05,0.9,plabtext,halign='left')
    
    pltdiff.add(phdiff, zero, plab)
    pltdiff.xlabel = xlabel
    pltdiff.ylabel = '%s-%s weighted' % (label2, label1)

    pltdiff.title=title
    tab[1,0] = pltdiff

    #arr.xlabel=xlabel
    #arr.title=title

    if epsfile is not None:
        tab.write_eps(epsfile)
    if pngfile is not None:
        tab.write_img(800,800,pngfile)

    if show:
        tab.show()

    return tab

Example #30

0

Show file

File: htm.py Project: lqleeqee/esutil

    def bincount(self,
                 rmin, rmax, nbin, ra1, dec1, ra2, dec2, scale=None,
                 htmid2=None, 
                 htmrev2=None,
                 minid=None,
                 maxid=None,
                 getbins=True):
        """
        Class:
            HTM

        Method Name:
            bincount 
        
        Purpose:

            Count number of pairs between two ra/dec lists as a function of
            their separation.  The binning is equal spaced in the log10 of the
            separation.  By default the bin sizes are in degrees, unless the
            scale= keyword is sent, in which case the units are angle*scale
            with angle in radians.

            This code can be used to calculate correlation functions by
            calling it on the data as well as random points.
        
        
        Calling Sequence:
            import esutil
            depth = 10
            h=esutil.htm.HTM(depth)
            rlower, rupper, counts = h.bincount(
                 rmin, rmax, nbin, ra1, dec1, ra2, dec2, 
                 scale=None,
                 htmid2=None, 
                 htmrev2=None,
                 minid=None,
                 maxid=None,
                 getbins=True)

        Inputs:
            rmin,rmax: Smallest and largest separations to consider.  This
                is in degrees unless the scale= keyword is sent, in which
                case the units are angle*scale with angle in radians.
            nbin:  The number of bins to use.  Bins will be equally spaced
                in the log10 of the separation.
            ra1,dec1,ra2,dec2: 
                ra,dec lists in degrees.  Can be scalars or arrays but require
                len(ra) == len(dec) in each set.
        
        Keyword Parameters:

            scale:  
                A scale to apply to the angular separations.  Must be the same
                length as ra1/dec1 or a scalar.  This is useful for converting
                angle to physical distance.  For example, scale could be the
                angular diameter distance to cosmological objects in list 1.

                If scale is sent, rmin,rmax must be in units of angle*scale
                where angle is in *radians*, as opposed to degrees when scale
                is not sent.

            htmid2=None: 
                the htm indexes for the second list.  If not sent they are
                generated internally.  You can generate these with 

                    htmid = h.lookup_id(ra, dec)

            htmrev2=None: 
                The result of
                    import esutil
                    htmid2 = h.lookup_id(ra, dec)
                    minid=htmid2.min()
                    hist2,htmrev2=\\
                        esutil.stat.histogram(htmid2-minid,rev=True) 

                If not sent it is calculated internally for fast lookups.  You
                can save time on successive calls by generating these your
                self.

            getbins: 
                If True, return a tuple 
                    rlower,rupper,counts 

                instead of just counts.  rlower,rupper are the lower and upper
                limits of each bin.  getbins=True is the default.
        
        Outputs:

            if getbins=True:
                rlower,rupper,counts:  rlower,rupper are the lower
                and upper limits of each bin.  getbins=True is the default.
            if getbins=False:
                counts:  The pair counts in equally spaced logarithmic bins
                    in separation.


        
        Restrictions:
            The C++ wrapper must be compiled.  This will happend automatically
            during installation of esutil.
        

         EXAMPLE:
            import esutil

            # simple angular counts, no scaling
            # cross correlate with second catalog
            h=esutil.htm.HTM()
            rmin=10/3600. # degrees
            rmax=1000/3600. # degrees
            nbin=25
            rlower,rupper,counts = h.bincount(rmin,rmax,nbin,
                                              cat1['ra'],cat1['dec'],
                                              cat2['ra'],cat2['dec'])



            # counts using scaling of the angular separations with
            # the angular diameter distance to get projected
            # physical separations.
            c=esutil.cosmology.Cosmo()

            # get angular diameter distance to catalog 1 objects
            DA=c.Da(0.0, cat1['z'])

            # cross correlate with second catalog
            h=esutil.htm.HTM()
            rmin=0.025 # Mpc
            rmax=30.0 # Mpc
            nbin=25
            rlower,rupper,counts = h.bincount(rmin,rmax,nbin,
                                              cat1['ra'],cat1['dec'],
                                              cat2['ra'],cat2['dec'],
                                              scale=DA)
        
         MODIFICATION HISTORY:
             Created:  2010-03-31, Erin Sheldon, BNL


        """


        if htmid2 is None:
            stdout.write("Generating HTM ids\n")
            htmid2 = self.lookup_id(ra2, dec2)
            minid = htmid2.min()
            maxid = htmid2.max()
        else:
            if minid is None:
                minid = htmid2.min()
            if maxid is None:
                maxid = htmid2.max()

        if htmrev2 is None:
            stdout.write("Generating reverse indices\n")
            hist2, htmrev2 = stat.histogram(htmid2-minid,rev=True)

        counts = self.cbincount(rmin,rmax,nbin,ra1,dec1,ra2,dec2,
                                htmrev2,minid,maxid,scale)
        if getbins:
            lower,upper = log_bins(rmin, rmax, nbin)
            return lower,upper,counts
        else:
            return counts

Example #31

0

Show file

    def bincount(self,
                 rmin,
                 rmax,
                 nbin,
                 ra1,
                 dec1,
                 ra2,
                 dec2,
                 scale=None,
                 htmid2=None,
                 htmrev2=None,
                 minid=None,
                 maxid=None,
                 getbins=True):
        """
        Class:
            HTM

        Method Name:
            bincount 
        
        Purpose:

            Count number of pairs between two ra/dec lists as a function of
            their separation.  The binning is equal spaced in the log10 of the
            separation.  By default the bin sizes are in degrees, unless the
            scale= keyword is sent, in which case the units are angle*scale
            with angle in radians.

            This code can be used to calculate correlation functions by
            calling it on the data as well as random points.
        
        
        Calling Sequence:
            import esutil
            depth = 10
            h=esutil.htm.HTM(depth)
            rlower, rupper, counts = h.bincount(
                 rmin, rmax, nbin, ra1, dec1, ra2, dec2, 
                 scale=None,
                 htmid2=None, 
                 htmrev2=None,
                 minid=None,
                 maxid=None,
                 getbins=True)

        Inputs:
            rmin,rmax: Smallest and largest separations to consider.  This
                is in degrees unless the scale= keyword is sent, in which
                case the units are angle*scale with angle in radians.
            nbin:  The number of bins to use.  Bins will be equally spaced
                in the log10 of the separation.
            ra1,dec1,ra2,dec2: 
                ra,dec lists in degrees.  Can be scalars or arrays but require
                len(ra) == len(dec) in each set.
        
        Keyword Parameters:

            scale:  
                A scale to apply to the angular separations.  Must be the same
                length as ra1/dec1 or a scalar.  This is useful for converting
                angle to physical distance.  For example, scale could be the
                angular diameter distance to cosmological objects in list 1.

                If scale is sent, rmin,rmax must be in units of angle*scale
                where angle is in *radians*, as opposed to degrees when scale
                is not sent.

            htmid2=None: 
                the htm indexes for the second list.  If not sent they are
                generated internally.  You can generate these with 

                    htmid = h.lookup_id(ra, dec)

            htmrev2=None: 
                The result of
                    import esutil
                    htmid2 = h.lookup_id(ra, dec)
                    minid=htmid2.min()
                    hist2,htmrev2=\\
                        esutil.stat.histogram(htmid2-minid,rev=True) 

                If not sent it is calculated internally for fast lookups.  You
                can save time on successive calls by generating these your
                self.

            getbins: 
                If True, return a tuple 
                    rlower,rupper,counts 

                instead of just counts.  rlower,rupper are the lower and upper
                limits of each bin.  getbins=True is the default.
        
        Outputs:

            if getbins=True:
                rlower,rupper,counts:  rlower,rupper are the lower
                and upper limits of each bin.  getbins=True is the default.
            if getbins=False:
                counts:  The pair counts in equally spaced logarithmic bins
                    in separation.


        
        Restrictions:
            The C++ wrapper must be compiled.  This will happend automatically
            during installation of esutil.
        

         EXAMPLE:
            import esutil

            # simple angular counts, no scaling
            # cross correlate with second catalog
            h=esutil.htm.HTM()
            rmin=10/3600. # degrees
            rmax=1000/3600. # degrees
            nbin=25
            rlower,rupper,counts = h.bincount(rmin,rmax,nbin,
                                              cat1['ra'],cat1['dec'],
                                              cat2['ra'],cat2['dec'])



            # counts using scaling of the angular separations with
            # the angular diameter distance to get projected
            # physical separations.
            c=esutil.cosmology.Cosmo()

            # get angular diameter distance to catalog 1 objects
            DA=c.Da(0.0, cat1['z'])

            # cross correlate with second catalog
            h=esutil.htm.HTM()
            rmin=0.025 # Mpc
            rmax=30.0 # Mpc
            nbin=25
            rlower,rupper,counts = h.bincount(rmin,rmax,nbin,
                                              cat1['ra'],cat1['dec'],
                                              cat2['ra'],cat2['dec'],
                                              scale=DA)
        
         MODIFICATION HISTORY:
             Created:  2010-03-31, Erin Sheldon, BNL


        """

        if htmid2 is None:
            stdout.write("Generating HTM ids\n")
            htmid2 = self.lookup_id(ra2, dec2)
            minid = htmid2.min()
            maxid = htmid2.max()
        else:
            if minid is None:
                minid = htmid2.min()
            if maxid is None:
                maxid = htmid2.max()

        if htmrev2 is None:
            stdout.write("Generating reverse indices\n")
            hist2, htmrev2 = stat.histogram(htmid2 - minid, rev=True)

        counts = self.cbincount(rmin, rmax, nbin, ra1, dec1, ra2, dec2,
                                htmrev2, minid, maxid, scale)
        if getbins:
            lower, upper = log_bins(rmin, rmax, nbin)
            return lower, upper, counts
        else:
            return counts