Exemple #1
0
    def test1DNormalDist(self):
        # prepare data
        U = dists.Normal(1.85, .3, 0, 3)
        trainSamples = np.array([U.rvs(500)]).T
        testSamples = np.array([U.rvs(1000)]).T

        # build parameter set
        dist = KDEDist(trainSamples,
                       kernelType=KernelType_GAUSSIAN,
                       bandwidthOptimizationType=
                       BandwidthOptimizationType_MAXIMUMLIKELIHOOD,
                       bounds=U.getBounds())

        #         fig = plt.figure()
        #         plotDensity1d(U)
        #         plotDensity1d(dist)

        print("quad = %s" % (quad(lambda x: dist.pdf([x]), 0, 3), ))
        print("mean = %g ~ %g" % (U.mean(), dist.mean()))
        print("var = %g ~ %g" % (U.var(), dist.var()))
        print("KL = %g" % U.klDivergence(dist, testSamples, testSamples))
        print("CE = %g" % dist.crossEntropy(testSamples))
        print("MSE = %g" % dist.l2error(U, testSamples, testSamples))

        plt.show()
Exemple #2
0
def estimateKDEDensity(functionName,
                       trainSamples,
                       testSamples=None,
                       iteration=0,
                       plot=False,
                       out=True,
                       label="kde_gaussian",
                       bandwidthOptimizationTypeStr="rot"):
    print("train: %i x %i (mean=%g, var=%g)" %
          (trainSamples.shape[0], trainSamples.shape[1], np.mean(trainSamples),
           np.var(trainSamples)))
    if testSamples is not None:
        print("test : %i x %i (mean=%g, var=%g)" %
              (testSamples.shape[0], testSamples.shape[1],
               np.mean(testSamples), np.var(testSamples)))

    if "gaussian" in label:
        kernelType = KernelType_GAUSSIAN
    elif "epanechnikov" in label:
        kernelType = KernelType_EPANECHNIKOV
    else:
        raise AttributeError("label is unknown")

    bandwidthOptimizationType = strTobandwidthOptimizationType(
        bandwidthOptimizationTypeStr)
    kdeDist = KDEDist(trainSamples,
                      kernelType=kernelType,
                      bandwidthOptimizationType=bandwidthOptimizationType)
    # -----------------------------------------------------------
    cvKDE = kdeDist.crossEntropy(testSamples)

    if plot and kdeDist.getDim() == 2:
        fig = plt.figure()
        plotDensity2d(kdeDist)
        plt.title("log=%g" % cvKDE)
        if out:
            plt.tight_layout()
            plt.savefig(
                os.path.join(pathResults, "kde_dist.%s.i%i.jpg" %
                             (functionName, iteration)))
            plt.savefig(
                os.path.join(pathResults, "kde_dist.%s.i%i.pdf" %
                             (functionName, iteration)))
            if out:
                plt.close(fig)
        else:
            plt.show()

    print("CV test = %g" % cvKDE)

    # -----------------------------------------------------------
    if out:
        pathResults = os.path.join("data", label)

        # serialize cross entropies
        out_crossEntropies = os.path.join(
            pathResults,
            "kde_cross_entropies.%s.i%i.csv" % (functionName, iteration))
        fd = open(out_crossEntropies, 'wb')
        file_writer = csv.writer(fd)
        file_writer.writerow(["crossEntropy"])
        file_writer.writerow([cvKDE])
        fd.close()

        # serialize samples
        np.savetxt(
            os.path.join(
                pathResults,
                "kde_train_samples.%s.i%i.csv" % (functionName, iteration)),
            trainSamples)
        np.savetxt(
            os.path.join(
                pathResults,
                "kde_test_samples.%s.i%i.csv" % (functionName, iteration)),
            testSamples)

        if plot:
            # plot density
            fig = plt.figure()
            plotDensity2d(kdeDist)
            plt.title("%s -> CV = %g" % (kdeDist.getBandwidths(), cvKDE))
            plt.savefig(
                os.path.join(pathResults,
                             "kde_pdf.%s.i%i.jpg" % (functionName, iteration)))
            plt.close(fig)

        # serialize best configuration to json
        out_bestDist = os.path.join(
            pathResults,
            "kde_best_config.%s.i%i.json" % (functionName, iteration))
        text = kdeDist.toJson()
        fd = open(out_bestDist, "w")
        fd.write(text)
        fd.close()

    # stats
    stats = {
        'config': {
            'functionName': functionName,
            'numDims': 2,
            'label': label,
            'bandwidth_optimization':
            BandwidthOptimizationType_MAXIMUMLIKELIHOOD,
            'kernelType': kernelType,
            'iteration': iteration
        },
        'trainSamples': trainSamples,
        'testSamples': testSamples,
        'crossEntropyTrainKDE': kdeDist.crossEntropy(trainSamples),
        'crossEntropyTestKDE': cvKDE,
        'KDEDist_json': kdeDist.toJson()
    }

    return kdeDist, stats