Beispiel #1
0
 def add_observed_node(self, node, out_message, K):
     filename=str(out_message)+ "->" + str(node) + "_obs_kernel" + self.filename_suffix + ".txt"
     self.lines[-1]+=str(out_message) + " " + filename + "\t"
     
     if self.pathname is not "":
         filename=self.pathname + os.sep + filename
     savetxt(filename, K)
Beispiel #2
0
 def add_edge(self, node, out_message, matrix_name, K):
     filename=str(out_message)+ "->" + str(node) + "_" + matrix_name + self.filename_suffix + ".txt"
     self.lines[-1]+=matrix_name + " " + filename + " "
     
     if self.pathname is not "":
         filename=self.pathname + os.sep + filename
     savetxt(filename, K)
Beispiel #3
0
    def __process_results__(self):
        lines = []
        if len(self.experiments) == 0:
            lines.append("no experiments to process")
            return

        # burnin and dimension are the same for all chains
        burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin
        dim = self.experiments[
            0].mcmc_chain.mcmc_sampler.distribution.dimension

        # collect all thinned samples of all chains in here
        merged_samples = zeros((0, dim))

        for i in range(len(self.experiments)):
            lines.append("Processing chain %d" % i)

            # discard samples before burn in
            lines.append("Discarding burnin of %d" % burnin)
            burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :]

            # thin out by factor and store thinned samples
            indices = arange(0, len(burned_in), self.thinning_factor)
            lines.append("Thinning by factor of %d, giving %d samples" \
                         % (self.thinning_factor, len(indices)))
            thinned = burned_in[indices, :]
            merged_samples = vstack((merged_samples, thinned))

        # dump merged samples to disc
        fname = self.experiments[0].name + "_merged_samples.txt"
        lines.append("Storing %d samples in file %s" %
                     (len(merged_samples), fname))
        savetxt(fname, merged_samples)

        return lines
    def __process_results__(self):
        lines = []
        if len(self.experiments) == 0:
            lines.append("no experiments to process")
            return
        
        # burnin and dimension are the same for all chains
        burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin
        dim = self.experiments[0].mcmc_chain.mcmc_sampler.distribution.dimension
        
        # collect all thinned samples of all chains in here
        merged_samples = zeros((0, dim))
        
        for i in range(len(self.experiments)):
            lines.append("Processing chain %d" % i)
            
            # discard samples before burn in
            lines.append("Discarding burnin of %d" % burnin)
            burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :]
            
            # thin out by factor and store thinned samples
            indices = arange(0, len(burned_in), self.thinning_factor)
            lines.append("Thinning by factor of %d, giving %d samples" \
                         % (self.thinning_factor, len(indices)))
            thinned = burned_in[indices, :]
            merged_samples = vstack((merged_samples, thinned))

        # dump merged samples to disc
        fname = self.experiments[0].name + "_merged_samples.txt"
        lines.append("Storing %d samples in file %s" % (len(merged_samples), fname))
        savetxt(fname, merged_samples)

        return lines
print "precomputing systems for messages from observed nodes"
graphlab_lines.lines.append(os.linesep + "# edges with observed targets")
for node, observation in observations.items():
    for out_message in graph[node]:
        edge=(out_message, node)
        graphlab_lines.new_edge_observed_target(node, out_message)
        
        data_source=data[edge][0]
        data_target=data[edge][1]
        Ks_chol, Is, Rs, Ws=incomplete_cholesky(data_source, kernel, eta)
        Kt_chol, It, Rt, Wt=incomplete_cholesky(data_target, kernel, eta)
        
        Qs,Rs,Ps=qr(Ws.dot(Ws.T)+Ks_chol+eye(shape(Ks_chol)[0])*reg_lambda, pivoting=True)
        Qt,Rt,Pt=qr(Wt.dot(Wt.T)+Kt_chol+eye(shape(Kt_chol)[0])*reg_lambda, pivoting=True)
        
        savetxt(graphlab_lines.add_edge(node, out_message,"Q_s"), Qs)
        savetxt(graphlab_lines.add_edge(node, out_message,"R_s"), Rs)
        savetxt(graphlab_lines.add_edge(node, out_message,"P_s"), Ps)
        
        savetxt(graphlab_lines.add_edge(node, out_message,"Q_t"), Qt)
        savetxt(graphlab_lines.add_edge(node, out_message,"R_t"), Rt)
        savetxt(graphlab_lines.add_edge(node, out_message,"P_t"), Pt)
        
        savetxt(graphlab_lines.add_edge(node, out_message,"W"), Ws.dot(Wt.T))

print "precomputing systems for messages from non-observed nodes"
graphlab_lines.lines.append(os.linesep + "# edges with non-observed targets")
for edge in edges:
    # exclude edges which involve observed nodes
    is_edge_target_observed=len(Set(observations.keys()).intersection(Set(edge)))>0
    if not is_edge_target_observed:
Beispiel #6
0
def write_input_data_to_disk(full_path_id, data, times):
    with open(full_path_id, 'wb') as output_data:
        for i, frame in enumerate(data):
            separator = '@' + str(times[i] - times[0])
            savetxt(output_data, frame, delimiter=',', fmt='%u')
            savetxt(output_data, [separator], fmt='%s')
Beispiel #7
0
    def __process_results__(self):
        lines = []
        if len(self.experiments) == 0:
            lines.append("no experiments to process")
            return

        # burnin is the same for all chains
        burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin

        quantiles = zeros((len(self.experiments), len(self.ref_quantiles)))
        norm_of_means = zeros(len(self.experiments))
        acceptance_rates = zeros(len(self.experiments))
        #         ess_0 = zeros(len(self.experiments))
        #         ess_1 = zeros(len(self.experiments))
        #         ess_minima = zeros(len(self.experiments))
        #         ess_medians = zeros(len(self.experiments))
        #         ess_maxima = zeros(len(self.experiments))
        times = zeros(len(self.experiments))

        for i in range(len(self.experiments)):
            burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :]

            # use precomputed quantiles if they match with the provided ones
            if hasattr(self.experiments[i], "ref_quantiles") and \
               hasattr(self.experiments[i], "quantiles") and \
               allclose(self.ref_quantiles, self.experiments[i].ref_quantiles):
                quantiles[i, :] = self.experiments[i].quantiles
            else:
                try:
                    quantiles[i, :] = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(\
                                      burned_in, self.ref_quantiles)
                except NotImplementedError:
                    print "skipping quantile computations, distribution does", \
                          "not support it."

            # quantiles should be about average error rather than average quantile
            quantiles[i, :] = abs(quantiles[i, :] - self.ref_quantiles)

            dim = self.experiments[
                i].mcmc_chain.mcmc_sampler.distribution.dimension
            norm_of_means[i] = norm(mean(burned_in, 0))
            acceptance_rates[i] = mean(
                self.experiments[i].mcmc_chain.accepteds[burnin:])

            # dump burned in samples to disc
            # sample_filename=self.experiments[0].experiment_dir + self.experiments[0].name + "_burned_in.txt"
            # savetxt(sample_filename, burned_in)

            # store minimum ess for every experiment
            #ess_per_covariate = asarray([RCodaTools.ess_coda(burned_in[:, cov_idx]) for cov_idx in range(dim)])
            #             ess_per_covariate = asarray([0 for _ in range(dim)])
            #             ess_0=ess_per_covariate[0]
            #             ess_1=ess_per_covariate[1]
            #             ess_minima[i] = min(ess_per_covariate)
            #             ess_medians[i] = median(ess_per_covariate)
            #             ess_maxima[i] = max(ess_per_covariate)

            # save chain time needed
            ellapsed = self.experiments[i].mcmc_chain.mcmc_outputs[0].times
            times[i] = int(round(sum(ellapsed)))

        mean_quantiles = mean(quantiles, 0)
        std_quantiles = std(quantiles, 0)

        sqrt_num_trials = sqrt(len(self.experiments))

        # print median kernel width sigma
        #sigma=GaussianKernel.get_sigma_median_heuristic(burned_in.T)
        #lines.append("median kernel sigma: "+str(sigma))

        lines.append("quantiles:")
        for i in range(len(self.ref_quantiles)):
            lines.append(
                str(mean_quantiles[i]) + " +- " +
                str(std_quantiles[i] / sqrt_num_trials))

        lines.append("norm of means:")
        lines.append(
            str(mean(norm_of_means)) + " +- " +
            str(std(norm_of_means) / sqrt_num_trials))

        lines.append("acceptance rate:")
        lines.append(
            str(mean(acceptance_rates)) + " +- " +
            str(std(acceptance_rates) / sqrt_num_trials))

        #         lines.append("ess dimension 0:")
        #         lines.append(str(mean(ess_0)) + " +- " + str(std(ess_0)/sqrt_num_trials))
        #
        #         lines.append("ess dimension 1:")
        #         lines.append(str(mean(ess_1)) + " +- " + str(std(ess_1)/sqrt_num_trials))
        #
        #         lines.append("minimum ess:")
        #         lines.append(str(mean(ess_minima)) + " +- " + str(std(ess_minima)/sqrt_num_trials))
        #
        #         lines.append("median ess:")
        #         lines.append(str(mean(ess_medians)) + " +- " + str(std(ess_medians)/sqrt_num_trials))
        #
        #         lines.append("maximum ess:")
        #         lines.append(str(mean(ess_maxima)) + " +- " + str(std(ess_maxima)/sqrt_num_trials))

        lines.append("times:")
        lines.append(
            str(mean(times)) + " +- " + str(std(times) / sqrt_num_trials))

        # mean as a function of iterations, normalised by time
        step = round(
            (self.experiments[0].mcmc_chain.mcmc_params.num_iterations -
             burnin) / 5)
        iterations = arange(
            self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin,
            step=step)

        running_means = zeros(len(iterations))
        running_errors = zeros(len(iterations))
        for i in arange(len(iterations)):
            # norm of mean of chain up
            norm_of_means_yet = zeros(len(self.experiments))
            for j in range(len(self.experiments)):
                samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(
                    burnin + iterations[i] + 1 + step), :]
                norm_of_means_yet[j] = norm(mean(samples_yet, 0))

            running_means[i] = mean(norm_of_means_yet)
            error_level = 1.96
            running_errors[i] = error_level * std(norm_of_means_yet) / sqrt(
                len(norm_of_means_yet))

        ioff()
        figure()
        plot(iterations, running_means * mean(times))
        fill_between(iterations, (running_means - running_errors)*mean(times), \
                     (running_means + running_errors)*mean(times), hold=True, color="gray")

        # make sure path to save exists
        try:
            os.makedirs(self.experiments[0].experiment_dir)
        except OSError as exception:
            if exception.errno != errno.EEXIST:
                raise

        savefig(self.experiments[0].experiment_dir + self.experiments[0].name +
                "_running_mean.png")
        close()

        # also store plot X and Y
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_X.txt", \
                iterations)
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_Y.txt", \
                running_means*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_errors.txt", \
                running_errors*mean(times))

        # dont produce quantile convergence plots here for now
        """# quantile convergence of a single one
        desired_quantile=0.5
        running_quantiles=zeros(len(iterations))
        running_quantile_errors=zeros(len(iterations))
        for i in arange(len(iterations)):
            quantiles_yet = zeros(len(self.experiments))
            for j in range(len(self.experiments)):
                samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :]
                
                # just compute one quantile for now
                quantiles_yet[j]=self.experiments[j].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(samples_yet, \
                                                                                          array([desired_quantile]))
                quantiles_yet[j]=abs(quantiles_yet[j]-desired_quantile)
            running_quantiles[i] = mean(quantiles_yet)
            error_level = 1.96
            running_quantile_errors[i] = error_level * std(quantiles_yet) / sqrt(len(quantiles_yet))
        
        
        ioff()
        figure()
        plot(iterations, running_quantiles*mean(times))
        fill_between(iterations, (running_quantiles - running_quantile_errors)*mean(times), \
                     (running_quantiles + running_quantile_errors)*mean(times), hold=True, color="gray")
        
        plot([iterations.min(),iterations.max()], [desired_quantile*mean(times) for _ in range(2)])
        
        title(str(desired_quantile)+"-quantile convergence")
        savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile.png")
        close()
        
        # also store plot X and Y
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_X.txt", \
                iterations)
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_Y.txt", \
                running_quantiles*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_errors.txt", \
                running_quantile_errors*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_reference.txt", \
                [desired_quantile*mean(times)])
        """
        # add latex table line
        #         latex_lines = []
        #         latex_lines.append("Sampler & Acceptance & ESS2 & Norm(mean) & ")
        #         for i in range(len(self.ref_quantiles)):
        #             latex_lines.append('%.1f' % self.ref_quantiles[i] + "-quantile")
        #             if i < len(self.ref_quantiles) - 1:
        #                 latex_lines.append(" & ")
        #         latex_lines.append("\\\\")
        #         lines.append("".join(latex_lines))
        #
        #         latex_lines = []
        #         latex_lines.append(self.experiments[0].mcmc_chain.mcmc_sampler.__class__.__name__)
        #         latex_lines.append('$%.3f' % mean(acceptance_rates) + " \pm " + '%.3f$' % (std(acceptance_rates)/sqrt_num_trials))
        #         latex_lines.append('$%.3f' % mean(norm_of_means) + " \pm " + '%.3f$' % (std(norm_of_means)/sqrt_num_trials))
        #         for i in range(len(self.ref_quantiles)):
        #             latex_lines.append('$%.3f' % mean_quantiles[i] + " \pm " + '%.3f$' % (std_quantiles[i]/sqrt_num_trials))
        #
        #
        #         lines.append(" & ".join(latex_lines) + "\\\\")

        return lines
    def __process_results__(self):
        lines = []
        if len(self.experiments) == 0:
            lines.append("no experiments to process")
            return
        
        # burnin is the same for all chains
        burnin = self.experiments[0].mcmc_chain.mcmc_params.burnin
        
        quantiles = zeros((len(self.experiments), len(self.ref_quantiles)))
        norm_of_means = zeros(len(self.experiments))
        acceptance_rates = zeros(len(self.experiments))
#         ess_0 = zeros(len(self.experiments))
#         ess_1 = zeros(len(self.experiments))
#         ess_minima = zeros(len(self.experiments))
#         ess_medians = zeros(len(self.experiments))
#         ess_maxima = zeros(len(self.experiments))
        times = zeros(len(self.experiments))
        
        for i in range(len(self.experiments)):
            burned_in = self.experiments[i].mcmc_chain.samples[burnin:, :]
            
            # use precomputed quantiles if they match with the provided ones
            if hasattr(self.experiments[i], "ref_quantiles") and \
               hasattr(self.experiments[i], "quantiles") and \
               allclose(self.ref_quantiles, self.experiments[i].ref_quantiles):
                quantiles[i, :] = self.experiments[i].quantiles
            else:
                try:
                    quantiles[i, :] = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(\
                                      burned_in, self.ref_quantiles)
                except NotImplementedError:
                    print "skipping quantile computations, distribution does", \
                          "not support it."
            
            # quantiles should be about average error rather than average quantile
            quantiles[i,:]=abs(quantiles[i,:]-self.ref_quantiles)
            
            dim = self.experiments[i].mcmc_chain.mcmc_sampler.distribution.dimension
            norm_of_means[i] = norm(mean(burned_in, 0))
            acceptance_rates[i] = mean(self.experiments[i].mcmc_chain.accepteds[burnin:])
            
            # dump burned in samples to disc
            # sample_filename=self.experiments[0].experiment_dir + self.experiments[0].name + "_burned_in.txt"
            # savetxt(sample_filename, burned_in)
            
            # store minimum ess for every experiment
            #ess_per_covariate = asarray([RCodaTools.ess_coda(burned_in[:, cov_idx]) for cov_idx in range(dim)])
#             ess_per_covariate = asarray([0 for _ in range(dim)])
#             ess_0=ess_per_covariate[0]
#             ess_1=ess_per_covariate[1]
#             ess_minima[i] = min(ess_per_covariate)
#             ess_medians[i] = median(ess_per_covariate)
#             ess_maxima[i] = max(ess_per_covariate)
            
            # save chain time needed
            ellapsed = self.experiments[i].mcmc_chain.mcmc_outputs[0].times
            times[i] = int(round(sum(ellapsed)))

        mean_quantiles = mean(quantiles, 0)
        std_quantiles = std(quantiles, 0)
        
        sqrt_num_trials=sqrt(len(self.experiments))
        
        # print median kernel width sigma
        #sigma=GaussianKernel.get_sigma_median_heuristic(burned_in.T)
        #lines.append("median kernel sigma: "+str(sigma))
        
        lines.append("quantiles:")
        for i in range(len(self.ref_quantiles)):
            lines.append(str(mean_quantiles[i]) + " +- " + str(std_quantiles[i]/sqrt_num_trials))
        
        lines.append("norm of means:")
        lines.append(str(mean(norm_of_means)) + " +- " + str(std(norm_of_means)/sqrt_num_trials))
        
        lines.append("acceptance rate:")
        lines.append(str(mean(acceptance_rates)) + " +- " + str(std(acceptance_rates)/sqrt_num_trials))
        
#         lines.append("ess dimension 0:")
#         lines.append(str(mean(ess_0)) + " +- " + str(std(ess_0)/sqrt_num_trials))
#         
#         lines.append("ess dimension 1:")
#         lines.append(str(mean(ess_1)) + " +- " + str(std(ess_1)/sqrt_num_trials))
#         
#         lines.append("minimum ess:")
#         lines.append(str(mean(ess_minima)) + " +- " + str(std(ess_minima)/sqrt_num_trials))
#         
#         lines.append("median ess:")
#         lines.append(str(mean(ess_medians)) + " +- " + str(std(ess_medians)/sqrt_num_trials))
#         
#         lines.append("maximum ess:")
#         lines.append(str(mean(ess_maxima)) + " +- " + str(std(ess_maxima)/sqrt_num_trials))
        
        lines.append("times:")
        lines.append(str(mean(times)) + " +- " + str(std(times)/sqrt_num_trials))
        
        # mean as a function of iterations, normalised by time
        step = round((self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin)/5)
        iterations = arange(self.experiments[0].mcmc_chain.mcmc_params.num_iterations - burnin, step=step)
        
        running_means = zeros(len(iterations))
        running_errors = zeros(len(iterations))
        for i in arange(len(iterations)):
            # norm of mean of chain up 
            norm_of_means_yet = zeros(len(self.experiments))
            for j in range(len(self.experiments)):
                samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :]
                norm_of_means_yet[j] = norm(mean(samples_yet, 0))
            
            running_means[i] = mean(norm_of_means_yet)
            error_level = 1.96
            running_errors[i] = error_level * std(norm_of_means_yet) / sqrt(len(norm_of_means_yet))
        
        ioff()
        figure()
        plot(iterations, running_means*mean(times))
        fill_between(iterations, (running_means - running_errors)*mean(times), \
                     (running_means + running_errors)*mean(times), hold=True, color="gray")
        
        # make sure path to save exists
        try:
            os.makedirs(self.experiments[0].experiment_dir)
        except OSError as exception:
            if exception.errno != errno.EEXIST:
                raise
        
        savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean.png")
        close()
        
        # also store plot X and Y
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_X.txt", \
                iterations)
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_Y.txt", \
                running_means*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_mean_errors.txt", \
                running_errors*mean(times))
        
        # dont produce quantile convergence plots here for now
        """# quantile convergence of a single one
        desired_quantile=0.5
        running_quantiles=zeros(len(iterations))
        running_quantile_errors=zeros(len(iterations))
        for i in arange(len(iterations)):
            quantiles_yet = zeros(len(self.experiments))
            for j in range(len(self.experiments)):
                samples_yet = self.experiments[j].mcmc_chain.samples[burnin:(burnin + iterations[i] + 1 + step), :]
                
                # just compute one quantile for now
                quantiles_yet[j]=self.experiments[j].mcmc_chain.mcmc_sampler.distribution.emp_quantiles(samples_yet, \
                                                                                          array([desired_quantile]))
                quantiles_yet[j]=abs(quantiles_yet[j]-desired_quantile)
            running_quantiles[i] = mean(quantiles_yet)
            error_level = 1.96
            running_quantile_errors[i] = error_level * std(quantiles_yet) / sqrt(len(quantiles_yet))
        
        
        ioff()
        figure()
        plot(iterations, running_quantiles*mean(times))
        fill_between(iterations, (running_quantiles - running_quantile_errors)*mean(times), \
                     (running_quantiles + running_quantile_errors)*mean(times), hold=True, color="gray")
        
        plot([iterations.min(),iterations.max()], [desired_quantile*mean(times) for _ in range(2)])
        
        title(str(desired_quantile)+"-quantile convergence")
        savefig(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile.png")
        close()
        
        # also store plot X and Y
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_X.txt", \
                iterations)
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_Y.txt", \
                running_quantiles*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_errors.txt", \
                running_quantile_errors*mean(times))
        savetxt(self.experiments[0].experiment_dir + self.experiments[0].name + "_running_quantile_reference.txt", \
                [desired_quantile*mean(times)])
        """
        # add latex table line
#         latex_lines = []
#         latex_lines.append("Sampler & Acceptance & ESS2 & Norm(mean) & ")
#         for i in range(len(self.ref_quantiles)):
#             latex_lines.append('%.1f' % self.ref_quantiles[i] + "-quantile")
#             if i < len(self.ref_quantiles) - 1:
#                 latex_lines.append(" & ")
#         latex_lines.append("\\\\")
#         lines.append("".join(latex_lines))
#         
#         latex_lines = []
#         latex_lines.append(self.experiments[0].mcmc_chain.mcmc_sampler.__class__.__name__)
#         latex_lines.append('$%.3f' % mean(acceptance_rates) + " \pm " + '%.3f$' % (std(acceptance_rates)/sqrt_num_trials))
#         latex_lines.append('$%.3f' % mean(norm_of_means) + " \pm " + '%.3f$' % (std(norm_of_means)/sqrt_num_trials))
#         for i in range(len(self.ref_quantiles)):
#             latex_lines.append('$%.3f' % mean_quantiles[i] + " \pm " + '%.3f$' % (std_quantiles[i]/sqrt_num_trials))
#         
#         
#         lines.append(" & ".join(latex_lines) + "\\\\")
        
        return lines