Beispiel #1
0
    def ppf(self, x):
        # convert the parameter to the right format
        if isList(x):
            x = DataVector(x)
        elif isNumerical(x):
            x = DataVector([x])
        elif isMatrix(x):
            x = DataMatrix(x)

        if isinstance(x, DataMatrix):
            A = x
            B = DataMatrix(A.getNrows(), A.getNcols())
            B.setAll(0.0)
        elif isinstance(x, DataVector):
            A = DataMatrix(1, len(x))
            A.setRow(0, x)
            B = DataMatrix(1, len(x))
            B.setAll(0)

        # do the transformation
        self.dist.ppf(A, B)

        # transform the outcome
        if isNumerical(x) or isinstance(x, DataVector):
            return B.get(0, 0)
        elif isinstance(x, DataMatrix):
            return B.array()
    def ppf(self, x):
        # convert the parameter to the right format
        if isList(x):
            x = DataVector(x)
        elif isNumerical(x):
            x = DataVector([x])
        elif isMatrix(x):
            x = DataMatrix(x)

        if isinstance(x, DataMatrix):
            A = x
            B = DataMatrix(A.getNrows(), A.getNcols())
            B.setAll(0.0)
        elif isinstance(x, DataVector):
            A = DataMatrix(1, len(x))
            A.setRow(0, x)
            B = DataMatrix(1, len(x))
            B.setAll(0)

        # do the transformation
        opInvRosen = createOperationInverseRosenblattTransformationKDE(self.dist)
        opInvRosen.doTransformation(A, B)

        # transform the outcome
        if isNumerical(x) or isinstance(x, DataVector):
            return B.get(0, 0)
        elif isinstance(x, DataMatrix):
            return B.array()
Beispiel #3
0
    def cdf(self, x):
        # convert the parameter to the right format
        if isList(x):
            x = DataVector(x)
        elif isNumerical(x):
            x = DataVector([x])
        elif isMatrix(x):
            x = DataMatrix(x)

        if isinstance(x, DataMatrix):
            A = x
            B = DataMatrix(A.getNrows(), A.getNcols())
            B.setAll(0.0)
        elif isinstance(x, DataVector):
            A = DataMatrix(1, len(x))
            A.setRow(0, x)
            B = DataMatrix(1, len(x))
            B.setAll(0)

        # do the transformation
        self.dist.cdf(A, B)

        # transform the outcome
        if isNumerical(x) or isinstance(x, DataVector):
            return B.get(0, 0)
        elif isinstance(x, DataMatrix):
            return B.array()
Beispiel #4
0
    def ppf(self, x):
        # convert the parameter to the right format
        if isList(x):
            x = DataVector(x)
        elif isNumerical(x):
            x = DataVector([x])

        if isinstance(x, DataMatrix):
            A = x
            B = DataMatrix(A.getNrows(), A.getNcols())
            B.setAll(0.0)
        elif isinstance(x, DataVector):
            A = DataMatrix(1, len(x))
            A.setRow(0, x)
            B = DataMatrix(1, len(x))
            B.setAll(0)

        # do the transformation
        assert A.getNcols() == B.getNcols() == self.trainData.getNcols()
        op = createOperationInverseRosenblattTransformationKDE(self.trainData)
        op.doTransformation(A, B)

        # transform the outcome
        if isNumerical(x) or isinstance(x, DataVector):
            return B.get(0, 0)
        elif isinstance(x, DataMatrix):
            return B.array()
Beispiel #5
0
def estimateDiscreteL2Error(grid, alpha, f, n=1000):
    gs = grid.getStorage()
    # create control samples
    samples = DataMatrix(np.random.rand(n, gs.dim()))

    nodalValues = evalSGFunctionMulti(grid, alpha, samples)
    fvalues = DataVector(samples.getNrows())
    for i, sample in enumerate(samples.array()):
        fvalues[i] = f(sample)

    # compute the difference
    nodalValues.sub(fvalues)
    return nodalValues.l2Norm()
def estimateDiscreteL2Error(grid, alpha, f, n=1000):
    gs = grid.getStorage()
    # create control samples
    samples = DataMatrix(np.random.rand(n, gs.dim()))

    nodalValues = evalSGFunctionMulti(grid, alpha, samples)
    fvalues = DataVector(samples.getNrows())
    for i, sample in enumerate(samples.array()):
        fvalues[i] = f(sample)

    # compute the difference
    nodalValues.sub(fvalues)
    return nodalValues.l2Norm()
Beispiel #7
0
    def ppf(self, x, shuffle=True):
        # convert the parameter to the right format
        x = self._convertEvalPoint(x)

        # do the transformation
        if self.dim == 1:
            op = createOperationInverseRosenblattTransformation1D(self.grid)
            x_unit = np.ndarray((x.shape[0], x.shape[1]))
            for i, xi in enumerate(x[:, 0]):
                x_unit[i,
                       0] = op.doTransformation1D(self.unnormalized_alpha_vec,
                                                  xi)

            # transform the samples to the unit hypercube
            if self.trans is not None:
                x_prob = self.trans.unitToProbabilisticMatrix(x_unit)
            else:
                x_prob = x

            # extract the outcome
            if x_prob.shape[0] == 1 and x_prob.shape[1] == 1:
                return x_prob[:, 0]
            else:
                return x_prob.flatten()
        else:
            A_vec = DataMatrix(x)
            B_vec = DataMatrix(x.shape[0], x.shape[1])
            B_vec.setAll(0.0)

            # do the transformation
            op = createOperationInverseRosenblattTransformation(self.grid)
            if shuffle:
                op.doTransformation(self.unnormalized_alpha_vec, A_vec, B_vec)
            else:
                op.doTransformation(self.unnormalized_alpha_vec, A_vec, B_vec,
                                    0)

            # transform the samples to the unit hypercube
            B = B_vec.array()
            if self.trans is not None:
                B_prob = self.trans.unitToProbabilisticMatrix(B)
            else:
                B_prob = B

            # extract the outcome
            if x.shape == (1, 1):
                return B_prob.get(0, 0)
            else:
                return B_prob
Beispiel #8
0
    def ppf(self, x, shuffle=False):
        x = self._convertEvalPoint(x)
        x_matrix = DataMatrix(x)
        res_matrix = DataMatrix(x_matrix.getNrows(), x_matrix.getNcols())
        res_matrix.setAll(0.0)

        # do the transformation
        opRosen = createOperationInverseRosenblattTransformationKDE(self.dist)
        if shuffle:
            opRosen.doShuffledTransformation(x_matrix, res_matrix)
        else:
            opRosen.doTransformation(x_matrix, res_matrix)

        # transform the outcome
        res = res_matrix.array()
        if res.shape[0] == 1 and res.shape[1] == 1:
            return res[0, 0]
        else:
            return res
Beispiel #9
0
class EstimateDensityAlgorithm(InterpolationAlgorithm):
    def __init__(self, trainSamples):
        self.trainSamples = DataMatrix(trainSamples)
        self.lmbd = lmbd
        self.regularizationType = regularizationType

    def computeHierarchicalCoefficients(self,
                                        grid,
                                        alpha,
                                        addedGridPoints=None):
        nodalValues = dehierarchize(grid, alpha)
        ixs = np.array([], dtype="int")
        for i, yi in enumerate(nodalValues):
            if yi < 0:
                ixs = np.append(ixs, i)
                nodalValues[i] = 0.0

        if len(ixs) > 0:
            # compute the coefficients for each grid point by estimating
            # the local density
            config = {
                'grid_filename': "positive.grid",
                "regularization_type": "Laplace",
                "crossValidation_enable": True,
                "crossValidation_enable": True,
                "crossValidation_kfold": 5,
                "crossValidation_silent": True
            }

            writeGrid(config['grid_filename'], grid)
            alpha = SGDEdist.byLearnerSGDEConfig(self.trainSamples.array(),
                                                 bounds=None,
                                                 config=config).alpha.array()

            # check if the coefficients of the new grid points are positive
            if addedGridPoints is not None:
                gs = grid.getStorage()
                assert all([
                    alpha[gs.getSequenceNumber(gp)] > -1e-13
                    for gp in addedGridPoints
                ])
        return alpha
Beispiel #10
0
    def computeBilinearForm(self, grid):
        """
        Compute bilinear form for the current grid
        @param grid: Grid
        @return: DataMatrix
        """
        gs = grid.getStorage()
        A = DataMatrix(gs.getSize(), gs.getSize())
        A.setAll(0.)
        createOperationLTwoDotExplicit(A, grid)
        A = A.array()

        # store the result in the hash map
        for i in range(gs.getSize()):
            gpi = gs.getPoint(i)
            for j in range(gs.getSize()):
                gpj = gs.getPoint(j)
                key = self.getKey([gpi, gpj])
                self._map[key] = A[i, j]
        
        return A
Beispiel #11
0
def computeCoefficients(jgrid, grid, alpha, f):
    """
    Interpolate function f, which depends on some sparse grid function
    (grid, alpha) on jgrid
    @param jgrid: Grid, new discretization
    @param grid: Grid, old discretization
    @param alpha: DataVector, surpluses for grid
    @param f: function, to be interpolated
    @return: DataVector, surpluses for jgrid
    """
    jgs = jgrid.getStorage()

    # dehierarchization
    p = DataVector(jgs.getDimension())
    A = DataMatrix(jgs.getSize(), jgs.getDimension())
    for i in range(jgs.getSize()):
        jgs.getCoordinates(jgs.getPoint(i), p)
        A.setRow(i, p)

    nodalValues = evalSGFunctionMulti(grid, alpha, A.array())

    # apply f to all grid points
    jnodalValues = DataVector(jgs.getSize())
    for i in range(len(nodalValues)):
        A.getRow(i, p)
        #         print( i, p.array(), nodalValues[i], alpha.min(), alpha.max() )
        #         if nodalValues[i] < -1e20 or nodalValues[i] > 1e20:
        #             from pysgpp.extensions.datadriven.uq.operations import evalSGFunction, evalSGFunctionMultiVectorized
        #             print( alpha.min(), alpha.max() )
        #             print( evalSGFunction(grid, alpha, p) )
        #             print( evalSGFunctionMulti(grid, alpha, DataMatrix([p.array()])) )
        #             print( evalSGFunctionMultiVectorized(grid, alpha, DataMatrix([p.array()])) )
        #             import ipdb; ipdb.set_trace()
        jnodalValues[i] = f(p.array(), nodalValues[i])

    jalpha = hierarchize(jgrid, jnodalValues)
    return jalpha
Beispiel #12
0
    def cdf(self, x, shuffle=True):
        # convert the parameter to the right format
        x = self._convertEvalPoint(x)

        # transform the samples to the unit hypercube
        if self.trans is not None:
            x_unit = self.trans.probabilisticToUnitMatrix(x)
        else:
            x_unit = x

        # do the transformation
        if self.dim == 1:
            op = createOperationRosenblattTransformation1D(self.grid)
            ans = np.ndarray(x.shape[0])
            for i, xi in enumerate(x_unit[:, 0]):
                ans[i] = op.doTransformation1D(self.unnormalized_alpha_vec, xi)
            if len(ans) == 1:
                return ans[0]
            else:
                return ans
        else:
            A = DataMatrix(x_unit)
            B = DataMatrix(x_unit.shape[0], x_unit.shape[1])
            B.setAll(0.0)

            # do the transformation
            op = createOperationRosenblattTransformation(self.grid)
            if shuffle:
                op.doTransformation(self.alpha_vec, A, B)
            else:
                op.doTransformation(self.alpha_vec, A, B, 0)

            # extract the outcome
            if x_unit.shape == (1, 1):
                return B.get(0, 0)
            else:
                return B.array()
Beispiel #13
0
    def ppf(self, x):
        # convert the parameter to the right format
        if isList(x):
            x = DataVector(x)
        elif isNumerical(x):
            x = DataVector([x])

        # do the transformation
        if self.grid.getStorage().dim() == 1:
            op = createOperationInverseRosenblattTransformation1D(self.grid)
            ans = np.ndarray(len(x))
            for i, xi in enumerate(x.array()):
                ans[i] = op.doTransformation1D(self.alpha, xi)
            if len(ans) == 1:
                return ans[0]
            else:
                return ans
        else:
            if isinstance(x, DataMatrix):
                A = x
                B = DataMatrix(A.getNrows(), A.getNcols())
                B.setAll(0.0)
            elif isinstance(x, DataVector):
                A = DataMatrix(1, len(x))
                A.setRow(0, x)
                B = DataMatrix(1, len(x))
                B.setAll(0)

            # do the transformation
            op = createOperationInverseRosenblattTransformation(self.grid)
            op.doTransformation(self.alpha, A, B)

            # extract the outcome
            if isNumerical(x) or isinstance(x, DataVector):
                return B.get(0, 0)
            elif isinstance(x, DataMatrix):
                return B.array()
Beispiel #14
0
    def ppf(self, x):
        # convert the parameter to the right format
        if isList(x):
            x = DataVector(x)
        elif isNumerical(x):
            x = DataVector([x])

        # do the transformation
        if self.grid.getStorage().dim() == 1:
            op = createOperationInverseRosenblattTransformation1D(self.grid)
            ans = np.ndarray(len(x))
            for i, xi in enumerate(x.array()):
                ans[i] = op.doTransformation1D(self.alpha, xi)
            if len(ans) == 1:
                return ans[0]
            else:
                return ans
        else:
            if isinstance(x, DataMatrix):
                A = x
                B = DataMatrix(A.getNrows(), A.getNcols())
                B.setAll(0.0)
            elif isinstance(x, DataVector):
                A = DataMatrix(1, len(x))
                A.setRow(0, x)
                B = DataMatrix(1, len(x))
                B.setAll(0)

            # do the transformation
            op = createOperationInverseRosenblattTransformation(self.grid)
            op.doTransformation(self.alpha, A, B)

            # extract the outcome
            if isNumerical(x) or isinstance(x, DataVector):
                return B.get(0, 0)
            elif isinstance(x, DataMatrix):
                return B.array()
Beispiel #15
0
class LibAGFDist(Dist):
    """
    The Sparse Grid Density Estimation (SGDE) distribution
    """
    def __init__(self,
                 trainData,
                 samples=None,
                 testData=None,
                 bandwidths=None,
                 transformation=None,
                 surfaceFile=None):
        super(LibAGFDist, self).__init__()

        self.trainData = DataMatrix(trainData)
        self.testData = testData
        self.bounds = [[0, 1] for _ in xrange(trainData.shape[1])]
        if len(self.bounds) == 1:
            self.bounds = self.bounds[0]

        if transformation is not None:
            self.bounds = [
                trans.getBounds()
                for trans in transformation.getTransformations()
            ]
        self.dim = trainData.shape[1]
        self.samples = samples
        self.transformation = transformation
        self.bandwidths = None
        if bandwidths is not None:
            self.bandwidths = bandwidths
        else:
            op = createOperationInverseRosenblattTransformationKDE(
                self.trainData)
            self.bandwidths = DataVector(self.dim)
            op.getOptKDEbdwth(self.bandwidths)
        self.surfaceFile = surfaceFile

    @classmethod
    def byConfig(cls, config):
        if config is not None and os.path.exists(config):
            # init density function
            traindatafile, samplefile, testFile, testOutFile, bandwidthFile, surfaceFile = \
                cls.computeDensity(config)
            return cls.byFiles(traindatafile, samplefile, testFile,
                               testOutFile, bandwidthFile, surfaceFile)

    @classmethod
    def byFiles(cls,
                trainDataFile,
                samplesFile=None,
                testFile=None,
                testOutFile=None,
                bandwidthFile=None,
                surfaceFile=None):
        # load training file
        if os.path.exists(trainDataFile):
            trainData = np.loadtxt(trainDataFile)
            if len(trainData.shape) == 1:
                trainData = np.array([trainData]).transpose()
        else:
            raise Exception('The training data file "%s" does not exist' %
                            trainDataFile)

        # load samples for quadrature
        samples = None
        if samplesFile is not None:
            if os.path.exists(samplesFile):
                samples = np.loadtxt(samplesFile)
                # if the data is just one dimensional -> transform to
                # matrix with one column
                if len(samples.shape) == 1:
                    samples = np.array([samples]).transpose()

        # load test file for evaluating pdf values
        testData = None
        if testFile is not None:
            if os.path.exists(testFile):
                testData = np.loadtxt(testFile)
                # if the data is just one dimensional -> transform to
                # matrix with one column
                if len(testData.shape) == 1:
                    testData = np.array([testData]).transpose()

        # load bandwidths file for evaluating pdf values
        bandwidths = None
        if bandwidthFile is not None:
            if os.path.exists(bandwidthFile):
                bandwidths = np.loadtxt(bandwidthFile)

        # load pdf values for testSamples if available
        if testOutFile is not None:
            if os.path.exists(testOutFile):
                testLikelihood = np.loadtxt(testOutFile)
                # store the results in a hash map
                if testData is not None:
                    testDataEval = {}
                    for i, sample in enumerate(testData):
                        testDataEval[tuple(sample)] = testLikelihood[i]

        if surfaceFile is not None and not os.path.exists(surfaceFile):
            surfaceFile = None

        return cls(trainData,
                   samples=samples,
                   testData=testDataEval,
                   bandwidths=bandwidths,
                   surfaceFile=surfaceFile)

    @classmethod
    def computeDensity(
            self,
            config,
            pathsgpp='/home/franzefn/workspace/SGppUQ/lib/sgpp',
            cluster='/home/franzefn/Promotion/UQ/benjamin/clustc/cluster'):
        if not os.path.exists(config):
            raise Exception('the config file "%s" does not exist' % config)

        os.environ['LD_LIBRARY_PATH'] = pathsgpp
        # ret = subprocess.Popen([clustc, "-c %s" % config], shell=True, env=os.environ)
        # ret = subprocess.call([clustc, "-c %s" % config], shell=True)
        ret = os.system("%s -c %s > out_libagf.log" % (cluster, config))
        if ret != 0:
            raise Exception('The density estimation exited unexpectedly')

        # extract grid and alpha from config
        s = cp.ConfigParser()
        s.optionxform = str
        s.read(config)

        traindatafile = s.get('files', 'inFileTrain')
        samplesfile = None
        if 'samplesNumberSamples' in s.options('denest') and \
                s.get('denest', 'samplesNumberSamples') > 0 and \
                'samplesOutput' in s.options('denest'):
            samplesfile = s.get('denest', 'samplesOutput')

        testFile = None
        if 'inFileTest' in s.options('files'):
            testFile = s.get('files', 'inFileTest')

        testOutFile = None
        if 'outFileTest' in s.options('files') and \
                'inFileTest' in s.options('files'):
            testOutFile = s.get('files', 'outFileTest')

        bandwidthsfile = None
        if 'printBandwidthsFile' in s.options('denest'):
            bandwidthsfile = s.get('denest', 'printBandwidthsFile')

        surfacefile = None
        if 'printSurfaceFile' in s.options('denest'):
            surfacefile = s.get('denest', 'printSurfaceFile')

        return traindatafile, samplesfile, testFile, testOutFile, bandwidthsfile, surfacefile

    def pdf_libagf(self, x):
        if isNumerical(x):
            x = [x]
        x = tuple(x)

        if x in self.testData:
            return self.testData[x]
        else:
            raise AttributeError("No pdf value for '%s' available" % (x, ))

    def pdf(self, x):
        n = self.trainData.getNrows()
        sigma = self.bandwidths.array()
        # normalization coefficient
        norm = 1. / (sigma * np.sqrt(2. * np.pi))

        trainData = self.trainData.array()

        # normalize it
        trainData = (x - trainData) / sigma
        trainData = norm * np.exp(-trainData**2 / 2.)

        # scale the result by the number of samples
        return np.sum(np.prod(trainData, axis=1)) / n

    def cdf(self, x):
        # convert the parameter to the right format
        if isList(x):
            x = DataVector(x)
        elif isNumerical(x):
            x = DataVector([x])

        if isinstance(x, DataMatrix):
            A = x
            B = DataMatrix(A.getNrows(), A.getNcols())
            B.setAll(0.0)
        elif isinstance(x, DataVector):
            A = DataMatrix(1, len(x))
            A.setRow(0, x)
            B = DataMatrix(1, len(x))
            B.setAll(0)

        # do the transformation
        op = createOperationRosenblattTransformationKDE(self.trainData)
        op.doTransformation(A, B)

        # transform the outcome
        if isNumerical(x) or isinstance(x, DataVector):
            return B.get(0, 0)
        elif isinstance(x, DataMatrix):
            return B.array()

    def ppf(self, x):
        # convert the parameter to the right format
        if isList(x):
            x = DataVector(x)
        elif isNumerical(x):
            x = DataVector([x])

        if isinstance(x, DataMatrix):
            A = x
            B = DataMatrix(A.getNrows(), A.getNcols())
            B.setAll(0.0)
        elif isinstance(x, DataVector):
            A = DataMatrix(1, len(x))
            A.setRow(0, x)
            B = DataMatrix(1, len(x))
            B.setAll(0)

        # do the transformation
        assert A.getNcols() == B.getNcols() == self.trainData.getNcols()
        op = createOperationInverseRosenblattTransformationKDE(self.trainData)
        op.doTransformation(A, B)

        # transform the outcome
        if isNumerical(x) or isinstance(x, DataVector):
            return B.get(0, 0)
        elif isinstance(x, DataMatrix):
            return B.array()

    def rvs(self, n=1):
        ixs = np.random.randint(0, len(self.samples), n)
        return self.samples[ixs, :]

    def mean(self, n=1e4):
        moment = 0.
        for sample, _ in self.testData.items():
            moment += np.prod(sample)
        return moment / len(self.testData)

    def var(self):
        mean = self.mean()
        moment = 0.
        for sample, _ in self.testData.items():
            moment += (np.prod(sample) - mean)**2

        return moment / (len(self.testData) - 1)

    def getBounds(self):
        return self.bounds

    def getDim(self):
        return self.dim

    def getDistributions(self):
        return [self]

    def gnuplot(self, jpegFile, gnuplotConfig=None):
        if self.surfaceFile is not None and os.path.exists(self.surfaceFile):
            gnuplot = """
            set terminal jpeg
            set output "%s"

            set view map
            set size ratio .9

            set object 1 rect from graph 0, graph 0 to graph 1, graph 1 back
            set object 1 rect fc rgb "black" fillstyle solid 1.0

            splot '%s' using 1:2:3 with points pointtype 5 pointsize 1 palette linewidth 0
            """
            if gnuplotConfig is None:
                gnuplotConfig = 'gnuplot.config'

            fd = open(gnuplotConfig, "w")
            fd.write(gnuplot % (jpegFile, self.surfaceFile))
            fd.close()
            os.system("gnuplot %s" % gnuplotConfig)
            # -----------------------------------------------------------
        else:
            raise Exception(
                'surface file not found. specify "printSurfaceFile" in [denest] section of config'
            )
        return

    def __str__(self):
        return "libAGF"
Beispiel #16
0
class LibAGFDist(Dist):
    """
    The Sparse Grid Density Estimation (SGDE) distribution
    """

    def __init__(self,
                 trainData,
                 samples=None,
                 testData=None,
                 bandwidths=None,
                 transformation=None,
                 surfaceFile=None):
        super(LibAGFDist, self).__init__()

        self.trainData = DataMatrix(trainData)
        self.testData = testData
        self.bounds = [[0, 1] for _ in xrange(trainData.shape[1])]
        if len(self.bounds) == 1:
            self.bounds = self.bounds[0]

        if transformation is not None:
            self.bounds = [trans.getBounds()
                           for trans in transformation.getTransformations()]
        self.dim = trainData.shape[1]
        self.samples = samples
        self.transformation = transformation
        self.bandwidths = None
        if bandwidths is not None:
            self.bandwidths = bandwidths
        else:
            op = createOperationInverseRosenblattTransformationKDE(self.trainData)
            self.bandwidths = DataVector(self.dim)
            op.getOptKDEbdwth(self.bandwidths)
        self.surfaceFile = surfaceFile

    @classmethod
    def byConfig(cls, config):
        if config is not None and os.path.exists(config):
            # init density function
            traindatafile, samplefile, testFile, testOutFile, bandwidthFile, surfaceFile = \
                cls.computeDensity(config)
            return cls.byFiles(traindatafile, samplefile,
                               testFile, testOutFile,
                               bandwidthFile, surfaceFile)

    @classmethod
    def byFiles(cls, trainDataFile,
                samplesFile=None,
                testFile=None,
                testOutFile=None,
                bandwidthFile=None,
                surfaceFile=None):
        # load training file
        if os.path.exists(trainDataFile):
            trainData = np.loadtxt(trainDataFile)
            if len(trainData.shape) == 1:
                trainData = np.array([trainData]).transpose()
        else:
            raise Exception('The training data file "%s" does not exist' % trainDataFile)

        # load samples for quadrature
        samples = None
        if samplesFile is not None:
            if os.path.exists(samplesFile):
                samples = np.loadtxt(samplesFile)
                # if the data is just one dimensional -> transform to
                # matrix with one column
                if len(samples.shape) == 1:
                    samples = np.array([samples]).transpose()

        # load test file for evaluating pdf values
        testData = None
        if testFile is not None:
            if os.path.exists(testFile):
                testData = np.loadtxt(testFile)
                # if the data is just one dimensional -> transform to
                # matrix with one column
                if len(testData.shape) == 1:
                    testData = np.array([testData]).transpose()

        # load bandwidths file for evaluating pdf values
        bandwidths = None
        if bandwidthFile is not None:
            if os.path.exists(bandwidthFile):
                bandwidths = np.loadtxt(bandwidthFile)

        # load pdf values for testSamples if available
        if testOutFile is not None:
            if os.path.exists(testOutFile):
                testLikelihood = np.loadtxt(testOutFile)
                # store the results in a hash map
                if testData is not None:
                    testDataEval = {}
                    for i, sample in enumerate(testData):
                        testDataEval[tuple(sample)] = testLikelihood[i]

        if surfaceFile is not None and not os.path.exists(surfaceFile):
            surfaceFile = None

        return cls(trainData,
                   samples=samples,
                   testData=testDataEval,
                   bandwidths=bandwidths,
                   surfaceFile=surfaceFile)

    @classmethod
    def computeDensity(self, config,
                       pathsgpp='/home/franzefn/workspace/SGppUQ/lib/sgpp',
                       cluster='/home/franzefn/Promotion/UQ/benjamin/clustc/cluster'):
        if not os.path.exists(config):
            raise Exception('the config file "%s" does not exist' % config)

        os.environ['LD_LIBRARY_PATH'] = pathsgpp
        # ret = subprocess.Popen([clustc, "-c %s" % config], shell=True, env=os.environ)
        # ret = subprocess.call([clustc, "-c %s" % config], shell=True)
        ret = os.system("%s -c %s > out_libagf.log" % (cluster, config))
        if ret != 0:
            raise Exception('The density estimation exited unexpectedly')

        # extract grid and alpha from config
        s = cp.ConfigParser()
        s.optionxform = str
        s.read(config)

        traindatafile = s.get('files', 'inFileTrain')
        samplesfile = None
        if 'samplesNumberSamples' in s.options('denest') and \
                s.get('denest', 'samplesNumberSamples') > 0 and \
                'samplesOutput' in s.options('denest'):
            samplesfile = s.get('denest', 'samplesOutput')

        testFile = None
        if 'inFileTest' in s.options('files'):
            testFile = s.get('files', 'inFileTest')

        testOutFile = None
        if 'outFileTest' in s.options('files') and \
                'inFileTest' in s.options('files'):
            testOutFile = s.get('files', 'outFileTest')

        bandwidthsfile = None
        if 'printBandwidthsFile' in s.options('denest'):
            bandwidthsfile = s.get('denest', 'printBandwidthsFile')

        surfacefile = None
        if 'printSurfaceFile' in s.options('denest'):
            surfacefile = s.get('denest', 'printSurfaceFile')

        return traindatafile, samplesfile, testFile, testOutFile, bandwidthsfile, surfacefile

    def pdf_libagf(self, x):
        if isNumerical(x):
            x = [x]
        x = tuple(x)

        if x in self.testData:
            return self.testData[x]
        else:
            raise AttributeError("No pdf value for '%s' available" % (x,))

    def pdf(self, x):
        n = self.trainData.getNrows()
        sigma = self.bandwidths.array()
        # normalization coefficient
        norm = 1. / (sigma * np.sqrt(2. * np.pi))

        trainData = self.trainData.array()

        # normalize it
        trainData = (x - trainData) / sigma
        trainData = norm * np.exp(-trainData ** 2 / 2.)

        # scale the result by the number of samples
        return np.sum(np.prod(trainData, axis=1)) / n

    def cdf(self, x):
        # convert the parameter to the right format
        if isList(x):
            x = DataVector(x)
        elif isNumerical(x):
            x = DataVector([x])

        if isinstance(x, DataMatrix):
            A = x
            B = DataMatrix(A.getNrows(), A.getNcols())
            B.setAll(0.0)
        elif isinstance(x, DataVector):
            A = DataMatrix(1, len(x))
            A.setRow(0, x)
            B = DataMatrix(1, len(x))
            B.setAll(0)

        # do the transformation
        op = createOperationRosenblattTransformationKDE(self.trainData)
        op.doTransformation(A, B)

        # transform the outcome
        if isNumerical(x) or isinstance(x, DataVector):
            return B.get(0, 0)
        elif isinstance(x, DataMatrix):
            return B.array()

    def ppf(self, x):
        # convert the parameter to the right format
        if isList(x):
            x = DataVector(x)
        elif isNumerical(x):
            x = DataVector([x])

        if isinstance(x, DataMatrix):
            A = x
            B = DataMatrix(A.getNrows(), A.getNcols())
            B.setAll(0.0)
        elif isinstance(x, DataVector):
            A = DataMatrix(1, len(x))
            A.setRow(0, x)
            B = DataMatrix(1, len(x))
            B.setAll(0)

        # do the transformation
        assert A.getNcols() == B.getNcols() == self.trainData.getNcols()
        op = createOperationInverseRosenblattTransformationKDE(self.trainData)
        op.doTransformation(A, B)

        # transform the outcome
        if isNumerical(x) or isinstance(x, DataVector):
            return B.get(0, 0)
        elif isinstance(x, DataMatrix):
            return B.array()

    def rvs(self, n=1):
        ixs = np.random.randint(0, len(self.samples), n)
        return self.samples[ixs, :]

    def mean(self, n=1e4):
        moment = 0.
        for sample, _ in self.testData.items():
            moment += np.prod(sample)
        return moment / len(self.testData)

    def var(self):
        mean = self.mean()
        moment = 0.
        for sample, _ in self.testData.items():
            moment += (np.prod(sample) - mean) ** 2

        return moment / (len(self.testData) - 1)

    def getBounds(self):
        return self.bounds

    def getDim(self):
        return self.dim

    def getDistributions(self):
        return [self]

    def gnuplot(self, jpegFile, gnuplotConfig=None):
        if self.surfaceFile is not None and os.path.exists(self.surfaceFile):
            gnuplot = """
            set terminal jpeg
            set output "%s"

            set view map
            set size ratio .9

            set object 1 rect from graph 0, graph 0 to graph 1, graph 1 back
            set object 1 rect fc rgb "black" fillstyle solid 1.0

            splot '%s' using 1:2:3 with points pointtype 5 pointsize 1 palette linewidth 0
            """
            if gnuplotConfig is None:
                gnuplotConfig = 'gnuplot.config'

            fd = open(gnuplotConfig, "w")
            fd.write(gnuplot % (jpegFile, self.surfaceFile))
            fd.close()
            os.system("gnuplot %s" % gnuplotConfig)
            # -----------------------------------------------------------
        else:
            raise Exception('surface file not found. specify "printSurfaceFile" in [denest] section of config')
        return

    def __str__(self):
        return "libAGF"
 def corrcoef(self):
     corrMatrix = DataMatrix(np.zeros((self.dim, self.dim)))
     self.dist.corrcoef(corrMatrix)
     return corrMatrix.array()
Beispiel #18
0
 def corrcoef(self):
     corrMatrix = DataMatrix(np.zeros((self.dim, self.dim)))
     self.dist.corrcoef(corrMatrix)
     return corrMatrix.array()
Beispiel #19
0
 def cov(self):
     covMatrix = DataMatrix(np.zeros((self.dim, self.dim)))
     bounds_vec = DataMatrix(self.bounds)
     self.learner.cov(covMatrix, bounds_vec)
     return covMatrix.array()