Exemple #1
    def integrate(self, func, *args, **kwargs):
        Integrate func, by using n sample points. Right now, all params defined must be passed to args must be provided, but this will change soon.

            func's signature must contain all parameters currently defined by the sampler, and with the same names. This is required so that the sample values can be passed consistently.

        nmax -- total allowed number of sample points, will throw a warning if this number is reached before neff.
        nmin -- minimum number of samples to allow, by default will be set to the end of the 'burnin' at n_adapt * n
        neff -- Effective samples to collect before terminating. If not given, assume infinity
        n -- Number of samples to integrate in a 'chunk' -- default is 1000
        save_integrand -- Save the evaluated value of the integrand at the sample points with the sample point
        history_mult -- Number of chunks (of size n) to use in the adaptive histogramming: only useful if there are parameters with adaptation enabled
        tempering_exp -- Exponent to raise the weights of the 1-D marginalized histograms for adaptive sampling prior generation, by default it is 0 which will turn off adaptive sampling regardless of other settings
        n_adapt -- number of chunks over which to allow the pdf to adapt. Default is zero, which will turn off adaptive sampling regardless of other settings
        convergence_tests - dictionary of function pointers, each accepting self._rvs and self.params as arguments. CURRENTLY ONLY USED FOR REPORTING
        maxval - Guess at the maximum value of the integrand -- used as a seed for the maxval counter

        Pinning a value: By specifying a kwarg with the same of an existing parameter, it is possible to "pin" it. The sample draws will always be that value, and the sampling prior will use a delta function at that value.

        # Pin values
        tempcdfdict, temppdfdict, temppriordict, temppdfnormdict = {}, {}, {}, {}
        temppdfnormdict = defaultdict(lambda: 1.0)
        for p, val in kwargs.iteritems():
            if p in self.params:
                # Store the previous pdf/cdf in case it's already defined
                tempcdfdict[p] = self.cdf_inv[p]
                temppdfdict[p] = self.pdf[p]
                temppdfnormdict[p] = self._pdf_norm[p]
                temppriordict[p] = self.prior_pdf[p]
                # Set a new one to always return the same value
                self.pdf[p] = functools.partial(delta_func_pdf_vector, val)
                self._pdf_norm[p] = 1.0
                self.prior_pdf[p] = functools.partial(delta_func_pdf_vector, val)
                self.cdf_inv[p] = functools.partial(delta_func_samp_vector, val)

        # This is a semi-hack to ensure that the integrand is called with
        # the arguments in the right order
        # FIXME: How dangerous is this?
        args = func.func_code.co_varnames[:func.func_code.co_argcount]
        if not MCSampler.match_params_from_args(args, self.params):
            raise ValueError("All integrand variables must be represented by integral parameters.")
        # Determine stopping conditions
        nmax = kwargs["nmax"] if kwargs.has_key("nmax") else float("inf")
        neff = kwargs["neff"] if kwargs.has_key("neff") else numpy.float128("inf")
        n = kwargs["n"] if kwargs.has_key("n") else min(1000, nmax)
        convergence_tests = kwargs["convergence_tests"] if kwargs.has_key("convergence_tests") else None

        # Adaptive sampling parameters
        n_history = int(kwargs["history_mult"]*n) if kwargs.has_key("history_mult") else None
        tempering_exp = kwargs["tempering_exp"] if kwargs.has_key("tempering_exp") else 1.0
        n_adapt = int(kwargs["n_adapt"]*n) if kwargs.has_key("n_adapt") else 0
        nmax = kwargs["nmax"] if kwargs.has_key("nmax") else n_adapt
        nmin = kwargs["nmin"] if kwargs.has_key("nmin") else n_adapt

        save_intg = kwargs["save_intg"] if kwargs.has_key("save_intg") else False
        nkeep = kwargs["save_intg"] if kwargs.has_key("save_intg") else None
        # Corner case: we want all the samples, and we don't want them messed
        # with, so everything is saved, but no sort is done
        if nkeep is True:
            nkeep = None

        # FIXME: The adaptive step relies on the _rvs cache, so this has to be
        # on in order to work
        if n_adapt > 0 and tempering_exp > 0.0:
            save_intg = True

        deltalnL = kwargs['igrand_threshold_deltalnL'] if kwargs.has_key('igrand_threshold_deltalnL') else None # default is to return all
        deltaP = kwargs["igrand_threshold_p"] if kwargs.has_key('igrand_threshold_p') else 0 # default is to omit 1e-7 of probability

        show_evaluation_log = kwargs['verbose'] if kwargs.has_key('verbose') else False
        if show_evaluation_log:
            print(" .... mcsampler : providing verbose output ..... ")

        int_val1 = numpy.float128(0)
        self.ntotal = 0
        maxval = kwargs["maxval"] if "maxval" in kwargs else -float("Inf")
        old_maxval = maxval
        maxlnL = -float("Inf")
        eff_samp = 0
        mean, var = None, None
        last_convergence_test = defaultdict(lambda: False)   # initialize record of tests

        if show_evaluation_log:
            print("walltime : iteration Neff  ln(maxweight) lnLmarg ln(Z/Lmax) int_var")

        socket = None
        while self.ntotal < nmin or (eff_samp < neff and self.ntotal < nmax):
            # Draw our sample points
            p_s, p_prior, rv = self.draw(n, *self.params)
            # Calculate the overall p_s assuming each pdf is independent
            joint_p_s = numpy.prod(p_s, axis=0)
            joint_p_prior = numpy.prod(p_prior, axis=0)

            # Prevent zeroes in the sampling prior
            # FIXME: If we get too many of these, we should bail
            if (isinstance(joint_p_s, numpy.ndarray) and any(joint_p_s <= 0)) \
              or (not isinstance(joint_p_s, numpy.ndarray) and joint_p_s <= 0):
                for p in self.params:
                    self._rvs[p] = numpy.resize(self._rvs[p], len(self._rvs[p])-n)
                print("Zero prior value detected, skipping.", file=sys.stderr)

            # Unpack rvs and evaluate integrand
            if len(rv[0].shape) != 1:
                rv = rv[0]

            params = []
            for item in self.params:
                if isinstance(item, tuple):
            unpacked = numpy.hstack([r.flatten() for r in rv]).reshape(len(args), -1)
            unpacked = dict(zip(params, unpacked))
            fval = func(**unpacked)

            # Check if there is any practical contribution to the integral
            # FIXME: While not technically a fatal error, this will kill the 
            # adaptive sampling
            if fval.sum() == 0:
                for p in self.params:
                    self._rvs[p] = numpy.resize(self._rvs[p], len(self._rvs[p])-n)
                print("No contribution to integral, skipping.", file=sys.stderr)

            sample_n = numpy.arange(self.ntotal, self.ntotal + len(fval))

            if save_intg:
                # FIXME: The joint_prior, if not specified is set to one and
                # will come out as a scalar here, hence the hack
                if not isinstance(joint_p_prior, numpy.ndarray):
                    joint_p_prior = numpy.ones(fval.shape)*joint_p_prior

                # FIXME: See warning at beginning of function. The prior values
                # need to be moved out of this, as they are not part of MC
                # integration
                if self._rvs.has_key("integrand"):
                    self._rvs["integrand"] = numpy.hstack( (self._rvs["integrand"], fval) )
                    self._rvs["joint_prior"] = numpy.hstack( (self._rvs["joint_prior"], joint_p_prior) )
                    self._rvs["joint_s_prior"] = numpy.hstack( (self._rvs["joint_s_prior"], joint_p_s) )
                    self._rvs["weights"] = numpy.hstack( (self._rvs["weights"], fval*joint_p_prior/joint_p_s) )
                    self._rvs["sample_n"] = numpy.hstack( (self._rvs["sample_n"], sample_n) )
                    self._rvs["integrand"] = fval
                    self._rvs["joint_prior"] = joint_p_prior
                    self._rvs["joint_s_prior"] = joint_p_s
                    self._rvs["weights"] = fval*joint_p_prior/joint_p_s
                    self._rvs["sample_n"] = sample_n

            # Calculate the integral over this chunk
            int_val = fval * joint_p_prior / joint_p_s

            # Calculate max L (a useful convergence feature) for debug 
            # reporting.  Not used for integration
            # Try to avoid nan's
            maxlnL = numpy.log(numpy.max([numpy.exp(maxlnL), numpy.max(fval),numpy.exp(-100)]))   # note if f<0, this will return nearly 0

            # Calculate the effective samples via max over the current 
            # evaluations
            maxval = [max(maxval, int_val[0]) if int_val[0] != 0 else maxval]
            for v in int_val[1:]:
                maxval.append( v if v > maxval[-1] and v != 0 else maxval[-1] )

            # running variance
            var = statutils.cumvar(int_val, mean, var, self.ntotal)[-1]
            # running integral
            int_val1 += int_val.sum()
            # running number of evaluations
            self.ntotal += n
            # FIXME: Likely redundant with int_val1
            mean = int_val1/self.ntotal
            maxval = maxval[-1]

            eff_samp = int_val1/maxval

            # Throw exception if we get infinity or nan
            if math.isnan(eff_samp):
                raise NanOrInf("Effective samples = nan")
            if maxlnL is float("Inf"):
                raise NanOrInf("maxlnL = inf")

            if show_evaluation_log:
                print("{0:.3f} : {1:d} {2:.5f} {3:.2f} {4:.2f} {5:.2f} {6:.3f}".format(time.time(), self.ntotal, eff_samp, math.log(maxval), numpy.log(int_val1 / self.ntotal), numpy.log(int_val1 / self.ntotal) - maxlnL, numpy.sqrt(var * self.ntotal) / int_val1))

            if (not convergence_tests) and self.ntotal >= nmin and self.ntotal >= nmax and neff != float("inf"):
                print("WARNING: User requested maximum number of samples reached... bailing.", file=sys.stderr)

            # Convergence tests
            if convergence_tests:
                converged = True
                for key in convergence_tests.keys():
                    last_convergence_test[key] = convergence_tests[key](self._rvs, self.params)
                    converged &= las_convergence_test[key]

            if convergence_tests and show_evaluation_log:  # Print status of each test
                for key in convergence_tests:
                    print("   -- Convergence test status : ", key, last_convergence_test[key])

            self._address, self._port = "pcdev2.nemo.phys.uwm.edu", 1890
            #if self._address is not None:
            if False:
                dims = ("distance", "inclination", "right_ascension",
                        "declination", "integrand", "joint_prior", "joint_s_prior")
                send_data = synchlib.prepare_data(self._rvs, dims, self.ntotal - n)
                self.socket = synchlib.send_samples(send_data, self._address, self._port, verbose=True, socket=self.socket)

            # The total number of adaptive steps is reached
            # FIXME: We need a better stopping condition here
            if self.ntotal >= n_adapt and maxval == old_maxval:
                # Downsample points
                if save_intg and nkeep is not None:
                    pt_sort = self._rvs["weights"].argsort()[-nkeep:]
                    for key in self._rvs:
                        if len(self._rvs[key].shape) > 1:
                            self._rvs[key] = self._rvs[key][:,pt_sort]
                            self._rvs[key] = self._rvs[key][pt_sort]
            old_maxval = maxval

            # Iterate through each of the parameters, updating the sampling
            # prior PDF according to the 1-D marginalization
            for itr, p in enumerate(self.params):
                # FIXME: The second part of this condition should be made more
                # specific to pinned parameters
                if p not in self.adaptive or p in kwargs.keys():
                points = self._rvs[p][-n_history:]
                weights = (self._rvs["integrand"][-n_history:]*self._rvs["joint_prior"][-n_history:])**tempering_exp

                self._hist[p] = statutils.get_adaptive_binning(points, (self.llim[p], self.rlim[p]))
                for pt, w in zip(points, weights):
                    self._hist[p][pt,] += w
                self._hist[p].array += self._hist[p].array.mean()
                rate.filter_array(self._hist[p].array, rate.tophat_window(3))
                norm = numpy.sum(self._hist[p].array * self._hist[p].bins.volumes())
                self._hist[p].array /= norm
                # FIXME: Stupid pet trick while numpy version is lacking
                self.pdf[p] = numpy.frompyfunc(rate.InterpBinnedArray(self._hist[p]), 1, 1)
                #with open("%s_%d_hist.txt" % (p, self.ntotal), "w") as fout:
                    #for c, pdf in zip(self._hist[p].centres()[0], self._hist[p].array):
                        #print >>fout, "%f %g" % (c, pdf)

                self.cdf[p] = self.cdf_function(p)
                self.cdf_inv[p] = self.cdf_inverse(p)

        # If we were pinning any values, undo the changes we did before

        # Clean out the _rvs arrays for 'irrelevant' points
        #   - create the cumulative weights
        if "weights" in self._rvs and deltaP > 0:
            # Sort the weights with the deltaL masked applied
            sorted_weights = self._rvs["weights"].argsort()
            # Make the (unnormalized) CDF
            total_weight = self._rvs["weights"][sorted_weights].cumsum()
            # Find the deltaP cutoff index
            idx = numpy.searchsorted(total_weight, deltaP*total_weight[-1], 'left')
            sorted_weights = sorted_weights[idx:]
            # Remove all samples which contribute to smallest 1e-3 of cumulative
            # probability
            for key in self._rvs.keys():
                if isinstance(key, tuple):
                    self._rvs[key] = self._rvs[key][:,sorted_weights]
                    self._rvs[key] = self._rvs[key][sorted_weights]

        if "integrand" in self._rvs and deltalnL is not None:
            deltal_mask = numpy.log(self._rvs["integrand"]) > (maxlnL - deltalnL)
            # Remove all samples which do not have L > maxlnL - deltalnL
            for key in self._rvs.keys():
                if isinstance(key, tuple):
                    self._rvs[key] = self._rvs[key][:,deltal_mask]
                    self._rvs[key] = self._rvs[key][deltal_mask]

        # Create extra dictionary to return things
        dict_return ={}
        if convergence_tests is not None:
            dict_return["convergence_test_results"] = last_convergence_test

        return int_val1/self.ntotal, var/self.ntotal, eff_samp, dict_return
