コード例 #1
0
def main(argv):
    test_moments()
    basepathpart = "/sandbox/sschmidt/DESC/TESTDC1"
    codes = ("ANNZ2", "BPZ", "DELIGHT", "EAZY/NEWSCATMAG", "FLEXZ/MAR5RESULTS",
             "GPZ", "LEPHARE", "METAPHOR", "NN", "SKYNET", "TPZ", "NULL")

    labels = ("ANNz2", "BPZ", "Delight", "EAZY", "FlexZBoost", "GPz",
              "LePhare", "METAPhoR", "NN", "SkyNet", "TPZ", "TrainZ")
    labeldict = dict(zip(codes, labels))

    nzvectorfile = "NZPLOT_vectors.out"
    outfp = open("MOMENTS_NZ.out", "w")

    numcodes = len(codes)
    numzs = 2001  #number of zbins in each NZPLOT_vectors.out file
    truenzvec = np.zeros([numcodes, numzs])
    stacknzvec = np.zeros([numcodes, numzs])
    z_array = np.zeros(numzs)

    for i, xfile in enumerate(codes):
        direcpath = "%s%s" % (basepathpart, xfile)
        fullpath = os.path.join(direcpath, nzvectorfile)
        #print fullpath
        data = np.loadtxt(fullpath, skiprows=1)
        if i == 0:
            z_array = data[:, 0]
            truenzvec = data[:, 1]
            trueobj = qp.PDF(gridded=(z_array, truenzvec))
            outfp.write("####TRUE N(z) moments N=1-5\n")
            for j in range(5):
                tmpmoment = qp.utils.calculate_moment(trueobj,
                                                      j + 1,
                                                      using="gridded",
                                                      dx=0.001)
                outfp.write("%3.3f " % (tmpmoment))
            outfp.write("\n")

        stacknzvec = data[:, 2]
        print "read in data for %s" % xfile

        stackobj = qp.PDF(gridded=(z_array, stacknzvec))
        outfp.write("####%s N(z) moments for N=1-5\n" % (xfile))
        for j in range(5):
            tmpmoment = qp.utils.calculate_moment(stackobj,
                                                  j + 1,
                                                  using="gridded",
                                                  dx=0.001)
            outfp.write("%3.3f " % (tmpmoment))
        outfp.write("\n")
    outfp.close()
    print "finished"
コード例 #2
0
def main(argv):
    """
    Quick script to calculate the RMSE for the stacked N(z) using empiricalCDF
    rather than the values output in NZPLOTvectors.out.  This is necessary
    because qp uses KDE and Scott's rule to determine smoothing in the specz
    sample in that output, and Scott's rule is not ideal for some codes.  Using
    the eCDF instead eliminates that bandwidth choice
    """
    basepathpart = "./TESTDC1"
    codes = ("ANNZ2", "BPZ", "DELIGHT", "EAZY", "FLEXZ", "GPZ", "LEPHARE",
             "METAPHOR", "NN", "SKYNET", "TPZ", "NULL2")
    labels = ("ANNz2", "BPZ", "Delight", "EAZY", "FlexZBoost", "GPz",
              "LePhare", "METAPhoR", "NN", "SkyNet", "TPZ", "TrainZ")
    labeldict = dict(zip(codes, labels))

    nzvectorfile = "NZPLOT_vectors.out"
    outfp = open("RMSE_NZ_eCDF.out", "w")
    numcodes = len(codes)
    numzs = 2001  #number of zbins in each NZPLOT_vectors.out file
    truenzvec = np.zeros([numcodes, numzs])
    stacknzvec = np.zeros([numcodes, numzs])
    z_array = np.zeros(numzs)

    szfile = "TESTDC1BPZ/BPZgold_idszmag.txt"
    szdata = np.loadtxt(szfile)
    szvec = szdata[:, 1]

    for i, xfile in enumerate(codes):
        print("working on code %s\n" % (xfile))
        direcpath = "%s%s" % (basepathpart, xfile)
        fullpath = os.path.join(direcpath, nzvectorfile)
        #print fullpath
        data = np.loadtxt(fullpath, skiprows=1)
        z_array = data[:, 0]
        truezcdf = speczCDF(szvec, z_array)
        trueobj = qp.PDF(gridded=(z_array, truezcdf))

        stacknzvec = data[:, 2]
        print "read in data for %s" % xfile
        cumstack = np.cumsum(stacknzvec)

        stackobj = qp.PDF(gridded=(z_array, cumstack))
        xrmse = qp.utils.calculate_rmse(trueobj,
                                        stackobj,
                                        limits=(0.0, 2.0),
                                        dx=0.001)
        outfp.write("%sN(z)RMSE: %6.6f\n" % (xfile, xrmse))
    outfp.close()
    print "finished"
コード例 #3
0
    def __init__(self,ensemble_obj,truth_vals,eval_grid=None,using='gridded',dx=0.001):
        """an object that takes a qp Ensemble of N PDF objects and an array of
        N "truth" specz's, will be used to calculate PIT and QQ, plus more stuff
        later
        Parameters:
        ensemble_obj: qp ensemble object 
            a qp ensemble object of N PDFs that will be stacked
        truths: numpy array of N true spec-z values
            1D numpy array with the N spec-z values
        eval_grid: the numpy array to evaluate the metrics on.  If fed "None"
            will default to np.arange(0.005,2.12,0.01), i.e. the grid for BPZ
        """

#        if stackpz_obj==None or truth_vals==None:
#            print 'Warning: inputs not complete'
        self.ensemble_obj = ensemble_obj
        self.truth = truth_vals
        if eval_grid is None:
            self.eval_grid = np.arange(0.005,2.12,0.01)
            print "using default evaluation grid of numpy.arange(0.005,2.12,0.01)\n"
        else: 
            self.eval_grid = eval_grid
        self.using=using
        self.dx = dx
        
        #make a stack of the ensemble object evaluated at the eval_grid points
        stacked = self.ensemble_obj.stack(loc=self.eval_grid,using='gridded')
        self.stackpz = qp.PDF(gridded=(stacked['gridded'][0],stacked['gridded'][1]))
        return
コード例 #4
0
 def NZAD(self, vmin = 0.005, vmax = 1.995, delv = 0.05):
   """                                                                              
   Compute the Anderson Darling statistic and p-value for the
   two distributions of sumpz and true_z vector of spec-z's
   Since the Anderson Darling test requires a properly normalized
   distribution over the [vmin,vmax] range, will need to create
   a new qp object defined on the range np.arange(vmin,vmax+delv,delv)
   Parameters:                                      
   vmin, vmax: specz values outside of these values are discarded
   delz: grid spacing for [vmin,vmax] interval to create new qp
   object
   -----------
   using: string
   which parameterization to evaluate
   Returns:
   --------
   Anderson-Darling statistic and pvalue
   """
   #copy the form of Rongpu's use of skgof functions
   #will have to use QPPDFCDF class, as those expect objects
   #that have a .cdf method for a vector of values
   print "using %f and %f for vmin and vmax\n"%(vmin,vmax)
   szs = self.truth
   mask = (szs > vmin) & (szs < vmax)
   vgrid = np.arange(vmin,vmax+delv,delv)
   veval = self.stackpz.evaluate(vgrid,'gridded',True,False)
   vobj = qp.PDF(gridded = (veval[0],veval[1]))
   tmpnzfunc = QPPDFCDF(vobj,self.dx)
   nzAD = skgof.ad_test(szs[mask],tmpnzfunc)
   return nzAD.statistic, nzAD.pvalue
コード例 #5
0
ファイル: ensemble.py プロジェクト: johannct/qp
 def make_pdfs_helper(i):
     # with open(self.logfilename, 'wb') as logfile:
     #     logfile.write('making pdf '+str(i)+'\n')
     return qp.PDF(funcform=self.mix_mod[i],
                   quantiles=self.quantiles[i],
                   histogram=self.histogram[i],
                   gridded=self.gridded[-1][i],
                   samples=self.samples[i],
                   limits=self.limits,
                   scheme=self.scheme,
                   vb=False)
コード例 #6
0
def main(argv):
    """
    Quick script to calculate the RMSE for the QQ plots
    """
    basepathpart = "./TESTDC1"
    codes = ("ANNZ2", "BPZ", "DELIGHT", "EAZY", "FLEXZ", "GPZ", "LEPHARE",
             "METAPHOR", "NN", "SKYNET", "TPZ", "NULL2")
    labels = ("ANNz2", "BPZ", "Delight", "EAZY", "FlexZBoost", "GPz",
              "LePhare", "METAPhoR", "NN", "SkyNet", "TPZ", "TrainZ")
    labeldict = dict(zip(codes, labels))

    nzvectorfile = "TESTQQvectors.out"
    outfp = open("RMSE_QQ.out", "w")

    numcodes = len(codes)
    numzs = 1001  #number of PITbins in each TESTQQvectors.out file
    trueqqvec = np.zeros([numcodes, numzs])
    stackqqvec = np.zeros([numcodes, numzs])
    z_array = np.zeros(numzs)

    for i, xfile in enumerate(codes):
        direcpath = "%s%s" % (basepathpart, xfile)
        fullpath = os.path.join(direcpath, nzvectorfile)
        data = np.loadtxt(fullpath, skiprows=1)
        trueqqvec = data[:, 0]
        trueobj = qp.PDF(gridded=(trueqqvec, trueqqvec))

        stackqqvec = data[:, 1]
        print "read in data for %s" % xfile

        stackobj = qp.PDF(gridded=(trueqqvec, stackqqvec))
        xrmse = qp.utils.calculate_rmse(trueobj,
                                        stackobj,
                                        limits=(0.0, 1.0),
                                        dx=0.001)
        outfp.write("%sQQRMSE: %5.5f\n" % (xfile, xrmse))
    outfp.close()
    print "finished"
コード例 #7
0
ファイル: test_PDF.py プロジェクト: meshch/qp
 def test_wide_separation_quantiles(self):
     """
     When the two modes of a composite PDF are widely separated,
     the quantiles can be misestimated. That's OK (ish) but the
     KLD should not be NaN.
     """
     # Create a pathological PDF:
     component_1 = {}
     component_1['function'] = sps.norm(loc=0.4, scale=0.001)
     component_1['coefficient'] = 0.1
     component_2 = {}
     component_2['function'] = sps.norm(loc=3.5, scale=0.001)
     component_2['coefficient'] = 0.9
     dist_info = [component_1, component_2]
     dist = qp.composite(dist_info)
     test_limits = (0., 5.)
     P = qp.PDF(truth=dist, limits=test_limits)
     # Quantile approximate:
     Q = qp.PDF(quantiles=P.quantize(N=10, limits=test_limits),
                limits=test_limits)
     # Compute KLD:
     KLD = qp.utils.calculate_kl_divergence(P, Q, limits=test_limits)
     self.assertFalse(np.isnan(KLD))
コード例 #8
0
def test_moments():
    f = sps.norm(loc=3, scale=1)  #Normal with mu=3 sigma=1
    dx = 0.02
    grid = np.arange(-2., 8.00002, dx)
    fx = f.pdf(
        grid)  #spit out the values of the normal on a grid, make a qp object
    testobj = qp.PDF(gridded=(grid, fx))
    truth = np.array(
        [3.0, 10.0, 36.0, 138.0,
         558.0])  #values for what first five moments should be for this Normal
    for j in range(5):
        tmpmom = qp.utils.calculate_moment(testobj,
                                           j + 1,
                                           using="gridded",
                                           dx=0.0001)
        print "%d %5.5f %.1f" % (j, tmpmom, truth[j])
    return
コード例 #9
0
def load_gridded(catalog_file_name, pz_file_name, z_spec_col,
                 z_min, z_max, z_step):
    """ Load a files that are sampled on a reqular grid.

    Load data files that come from codes such as LePHARE and BPZ which
    sample their PDFs at regular grid points.

    Parameters
    ----------
    catalog_file_name : str
        Name of the catalog file to load containing z_estimated and z_spec
    pz_file_name : str
        Name of file containing gridded PDF information
    z_min : float
        Minimum redshift of PDFs
    z_max : float
        Maximum redshift of PDFs. z_max is defined as inclusive in this calse
    z_step : float
        Step size in redshift for PDFs
    z_spec_col : int
       Column number of spectroscopic redshift.

    Returns
    -------
    A tubple gontaining a list of qp.PDF objects for each estimated pdf
    in the file and a qp.PDF of the true N(z) created from samples of the
    distribution.
    """

    # Load our data and create a the array of redshifts used in the grid.
    z_array = np.arange(z_min, z_max + z_step / 2., z_step)
    z_trues = np.loadtxt(catalog_file_name, usecols=z_spec_col)
    gridded_pdfs = np.loadtxt(pz_file_name)

    # Create our "true" PDF using the samples from the inputed data file.
    true_pdf = qp.PDF(samples=z_trues)

    # Create a qp.Ensamble objecct for each of the estimated pdfs.
    estimated_pdfs = qp.Ensemble(gridded_pdfs.shape[0],
                                 gridded=(z_array, gridded_pdfs))

    return (estimated_pdfs, true_pdf)
コード例 #10
0
ファイル: ensemble.py プロジェクト: johannct/qp
 def Q_func(pdfs):
     return qp.PDF(quantiles=pdf.quantiles, vb=False)
コード例 #11
0
ファイル: ensemble.py プロジェクト: johannct/qp
 def Q_func(pdf):
     assert (pdf.quantiles is not None)
     return qp.PDF(quantiles=pdf.quantiles, limits=limits, vb=False)
コード例 #12
0
def main(argv):

    starttime = time.time()
    currenttime = time.time()
    #
    z_array, ID, szs, mags, pzs = ingdata.ingestflexzdata()

    print "making Ensemble..."
    approx_pdf = qp.Ensemble(pzs.shape[0], gridded=(z_array, pzs), procs=3)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)
    print "making NzSumEvaluateMetric Object, with stacking..."

    nzobj = inmet.NzSumEvaluateMetric(approx_pdf,
                                      szs,
                                      eval_grid=z_array,
                                      using='gridded',
                                      dx=0.0001)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)
    print "calculating Nz sum vectors..."
    newgrid = np.arange(0.0, 2.0001, 0.001)
    #create qp object of samples from the spec-z sample
    szsamplepdf = qp.PDF(samples=szs)
    specznz = szsamplepdf.evaluate(
        newgrid, using='samples', vb=True,
        norm=False)[1]  #only grab the 2nd part of the tuples!
    photznz = nzobj.stackpz.evaluate(
        newgrid, using='gridded', vb=True,
        norm=False)[1]  #only grab the 2nd part of the tuples!
    outfp = open("NZPLOT_vectors.out", "w")
    outfp.write("#z_array speczNz photzNz\n")
    for i in range(len(newgrid)):
        outfp.write("%f %g %g\n" % (newgrid[i], specznz[i], photznz[i]))
    outfp.close()
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)
    print "calculating KS stat..."

    ks_stat, ks_pval = nzobj.NZKS()
    print "ks_stat: %g\nks_pval: %g\n" % (ks_stat, ks_pval)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)

    cvm_stat, cvm_pval = nzobj.NZCVM()
    print "cvm_stat: %g\cvm_pval: %g\n" % (cvm_stat, cvm_pval)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)

    zmin = min(szs)
    zmax = max(szs)
    delv = (zmax - zmin) / 200.

    ad_stat, ad_pval = nzobj.NZAD(vmin=zmin, vmax=zmax, delv=delv)
    print "ad_stat: %g\ad_pval: %g\n" % (ad_stat, ad_pval)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)

    ad_statx, ad_pvalx = nzobj.NZAD(vmin=0.0, vmax=2.0, delv=0.01)
    print "ad_stat full range: %g\ad_pval: %g\n" % (ad_statx, ad_pvalx)
    oldtime = currenttime
    currenttime = time.time()
    print "took %g seconds" % (currenttime - oldtime)

    ###all stats
    outfp = open("NZ_STATS_KSCVMAD.out", "w")

    outfp.write("KSval: %.6g\n" % (ks_stat))
    outfp.write("KSpval: %.6g\n" % (ks_pval))

    outfp.write("CvMval: %.6g\n" % (cvm_stat))
    outfp.write("Cvmpval: %.6g\n" % (cvm_pval))

    outfp.write("ADval for vmin/vmax=%.3f %.3f: %.6g\n" %
                (zmin, zmax, ad_stat))
    outfp.write("ADpval: %.6g\n" % (ad_pval))

    outfp.write("ADval for vmin/vmax=0.0/2.0: %.6g\n" % (ad_statx))
    outfp.write("ADpval: %.6g\n" % (ad_pvalx))

    outfp.close()

    print "finished\n"
コード例 #13
0
ファイル: ensemble.py プロジェクト: johannct/qp
 def Q_func(pdf):
     assert (pdf.samples is not None)
     return qp.PDF(samples=pdf.samples, limits=limits, vb=False)
コード例 #14
0
ファイル: ensemble.py プロジェクト: johannct/qp
 def Q_func(pdf):
     assert (pdf.histogram is not None)
     return qp.PDF(histogram=pdf.histogram, limits=limits, vb=False)
コード例 #15
0
ファイル: ensemble.py プロジェクト: johannct/qp
 def Q_func(pdf):
     assert (pdf.gridded is not None)
     return qp.PDF(gridded=pdf.gridded, limits=limits, vb=False)
コード例 #16
0
ファイル: ensemble.py プロジェクト: johannct/qp
 def Q_func(pdfs):
     return qp.PDF(quantiles=pdf.gridded, vb=False)
コード例 #17
0
ファイル: ensemble.py プロジェクト: johannct/qp
 def Q_func(pdfs):
     return qp.PDF(samples=pdf.samples, vb=False)
コード例 #18
0
ファイル: ensemble.py プロジェクト: johannct/qp
 def Q_func(pdfs):
     return qp.PDF(histogram=pdf.histogram, vb=False)
コード例 #19
0
ファイル: ensemble.py プロジェクト: johannct/qp
 def P_func(pdf):
     return qp.PDF(truth=pdf.truth, vb=False)