def test_unbiased_checkerboard(self): """ Test that checkerboard problem returns both valid solutions. """ sampler = GibbsSampler() checkerboard = IsingModel(J={(0, 1): 1, (1, 2): 1, (2, 3): 1, (3, 0): 1}, h={}) result = sampler.sample(checkerboard, 10000) solutions = [s.as_tuple for s in result] assert (-1, 1, -1, 1) in solutions assert (1, -1, 1, -1) in solutions
def main(): """ Main program. """ data = initialize() g = GibbsSampler(sequences=data['sequences'], motif_width=data['width']) g.find_motif(iterations=data['iterations']) print_sequences(data['sequences'], data['width']) return
class Solver: # Calculate the log of the posterior probability of a given sentence # with a given part-of-speech labeling. Right now just returns -999 -- fix this! def __init__(self, train_file_name, test_file_name): self.train_file = train_file_name #self.trainer_builder = Trainer_builder(self.train_file).get_trainer() #a = GibbsSampler(self.trainer_builder) #label = a.do_gibbs('The discovery struck Nick like a blow .'.split()) # a.calculate_posterior(('at', 'the', 'same', 'instant', ',', 'nick', 'hit', 'the', 'barrel', 'and', 'threw', 'himself', 'upon', 'the', 'smaller', 'man', '.'), map(lambda item: item.upper(), label)) def posterior(self, model, sentence, label): if model == "Simple": simple_instance = Simplified(sentence) return simple_instance.calc_posterior(self.trainer_builder, label) elif model == "Complex": return self.gibbs_instance.calculate_posterior( sentence, map(lambda item: item.upper(), label)) elif model == "HMM": viterbi_instance = Viterbi(sentence) return viterbi_instance.calc_posterior(self.trainer_builder, label) else: print("Unknown algo!") # Do the training! # def train(self, data): self.trainer_builder = Trainer_builder(self.train_file).get_trainer() # Functions for each algorithm. Right now this just returns nouns -- fix this! # def simplified(self, sentence): simple_instance = Simplified(sentence) return simple_instance.get_most_probable_tags(self.trainer_builder) def complex_mcmc(self, sentence): self.gibbs_instance = GibbsSampler(self.trainer_builder) return self.gibbs_instance.do_gibbs(sentence) def hmm_viterbi(self, sentence): viterbi_instance = Viterbi(sentence) self.hmm_tags = viterbi_instance.get_most_probable_tags( self.trainer_builder)[0] return self.hmm_tags # This solve() method is called by label.py, so you should keep the interface the # same, but you can change the code itself. # It should return a list of part-of-speech labelings of the sentence, one # part of speech per word. # def solve(self, model, sentence): if model == "Simple": return self.simplified(sentence) elif model == "Complex": return self.complex_mcmc(sentence) elif model == "HMM": output = self.hmm_viterbi(sentence) return output else: print("Unknown algo!")
def main(): model_width = 4 model_height = 4 temperature = 1 decimals = 2 builder = RandomBuilder(model_width, model_height) model = builder.generate(decimals=decimals) # First produce a graph with D-Wave sampler = DWaveSampler() results_dwave = sample_and_histogram(sampler, model, temperature) # Then with gibbss sampleler sampler = GibbsSampler(n_variables=model_height * model_width) results_gibbs = sample_and_histogram(sampler, model, temperature) save_experiment("histogram", { 'width': model_width, 'height': model_height, 'model_decimals': decimals, 'temperature': temperature, 'dwave_samples': results_dwave.raw_data, 'gibbs_samples': results_gibbs.raw_data })
def __init__(self, f, X, y, x_range, eval_only, extra, options): self.f = f self.x_range = x_range self.options = options self.well_defined = X.shape[0] > 0 self.solver = GibbsSampler(X, y, options) self.eval_only = eval_only self.opt_n = options['opt_n'] self.dx = options['dx'] if eval_only: self.newX = extra else: self.n_bo = extra self.opt_n = np.maximum(self.opt_n, self.n_bo * 2) self.max_value = self.options['max_value'] self.n_bo_top_percent = self.options['n_bo_top_percent']
def test_biased_checkerboard(self): """ Test that biased checkerboard (at one qubit) returns only one solution. """ sampler = GibbsSampler() biased_checkerboard = IsingModel(J={(0, 1): 1, (1, 2): 1, (2, 3): 1, (3, 0): 1}, h={0: 2}) result = sampler.sample(biased_checkerboard, 10000) solutions = [s.as_tuple for s in result] assert (-1, 1, -1, 1) in solutions assert len(solutions) == 1 # Enforce the opposite checkerboard biased_checkerboard = IsingModel(J={(0, 1): 1, (1, 2): 1, (2, 3): 1, (3, 0): 1}, h={0: -2}) result = sampler.sample(biased_checkerboard, 10000) solutions = [s.as_tuple for s in result] assert (1, -1, 1, -1) in solutions assert len(solutions) == 1
class bo(object): def __init__(self, f, X, y, x_range, eval_only, extra, options): self.f = f self.t = options['t'] self.x_range = x_range self.options = options self.well_defined = X.shape[0] > 0 self.solver = GibbsSampler(X, y, options) self.eval_only = eval_only self.opt_n = options['opt_n'] self.dx = options['dx'] if eval_only: self.newX = extra else: self.n_bo = extra self.opt_n = np.maximum(self.opt_n, self.n_bo * 2) if 'ziw' in options: self.max_value = self.options['max_value'] + np.random.normal( ) * 0.05 else: self.max_value = self.options['max_value'] self.n_bo_top_percent = self.options['n_bo_top_percent'] def learn(self): self.gp, self.z, self.k = self.solver.run(self.options['gibbs_iter']) def run(self): if self.eval_only: ynew = [self.f(x) for x in self.newX] return ynew # return random inputs if X is empty if not self.well_defined: xnew = np.random.uniform(self.x_range[0], self.x_range[1], (self.n_bo, self.dx)) acfnew = [self.max_value] * self.n_bo return xnew, acfnew, self.solver.z, self.solver.k, self.max_value # learn and optimize self.learn() # initialization xnew = np.empty((self.n_bo, self.dx)) xnew[0] = np.random.uniform(self.x_range[0], self.x_range[1]) # optimize group by group all_cat = np.unique(self.z) for a in np.random.permutation(all_cat): active = self.z == a af = lambda x: MAX_VALUE(x, xnew[0], active, self.max_value, self. gp) # lower confidential bound xnew[:, active], acfnew = global_minimize(af, self.x_range[:,active], \ self.opt_n, self.n_bo, self.n_bo_top_percent) return xnew, acfnew, self.z, self.k, self.max_value
parser.add_argument("--resume", help = "Specify if you want to resume sampling", action = "store_true") parser.add_argument("-y", "--yaml", help = "Specify yaml file") args = parser.parse_args() yaml_file = args.yaml with open(yaml_file) as yml: cdict = yaml.load(yml, Loader = yaml.FullLoader) Ldict = {key : cdict[key] for key in cdict if (key in LikelihoodFg.__init__.__code__.co_varnames)} Lclass = LikelihoodFg(**Ldict) Gdict = {key : cdict[key] for key in cdict if (key in GibbsSampler.__init__.__code__.co_varnames)} Nstep = 30000 gibbs_sampler = GibbsSampler(Nstep, Lclass, args.resume, **Gdict) gibbs_sampler.run() def get_lag(id, kmax, accepted): a = accepted[:, id] a_avg = np.mean(a) N = len(a) denom = np.sum((a-a_avg)**2) lag = [] for k in range(1, kmax + 1): num = 0 for i in range(0, N - k): num += (a[i] - a_avg) * (a[i+k]-a_avg) lag.append(num) lag = np.array(lag)
def complex_mcmc(self, sentence): self.gibbs_instance = GibbsSampler(self.trainer_builder) return self.gibbs_instance.do_gibbs(sentence)
from gibbs import GibbsSampler from csb.io import dump, load def create_posterior(filename): with open(filename) as script: exec(script) return posterior n_particles = (100, 200)[0] isdfile = 'posterior_betagal_{}.py'.format(n_particles) posterior = create_posterior(isdfile) diameter = posterior.priors[0].forcefield.d[0, 0] gibbs = GibbsSampler(posterior, stepsize=1e-2) samples = isd.Ensemble(posterior.params) # run Gibbs sampler with isd.take_time('Gibbs sampling'): while len(samples) < 500: with isd.take_time('Gibbs step'): next(gibbs) samples.update(posterior.params) cc = [] for image in posterior.likelihoods: cc.append(isd.crosscorr(image.data, image.mock.get())) print(np.round([np.mean(cc), np.min(cc), np.max(cc)], 3) * 100) if False: # look at structure
''' This is an example using 3 input ensembles Adenylate Kinase pdb_chainID: 1AKE_A, 4AKE_A, 1ANK_A ''' import numpy as np from gibbs import load_coordinates, GibbsSampler from matplotlib import pyplot as plt input_coordinate = load_coordinates('1AKE_A', '4AKE_A', '1ANK_A') #Run Gibb sampler using 2 priors gibb = GibbsSampler(input_coordinate, prior=2) gibb.run(500) print 'Number of Domain = ', np.unique(gibb.membership).shape[0] print 'Membership', gibb.membership print 'Log likelihood = ', gibb.log_likelihood plt.plot(gibb.membership) plt.title('Membership') plt.show()
def main(): res = [] for width in range(1, 2): for height in range(width, 3): model_width = width * 2 model_height = height * 2 decimals = 2 temperature = 1 runs = 2 print("{} x {}".format(model_width, model_height)) # Create the model builder = RandomBuilder(model_width, model_height) model = builder.generate(decimals=decimals) # Compute partition function of this model bruteforcer = BruteforceSampler() Z = bruteforcer.partition_function(model, temperature) # First produce a graph with D-Wave sampler = DWaveSampler() results_dwave_prob = [ sampler.sample(model, 10000, temperature) for _ in range(runs) ] embedding = builder.embedding_two() results_dwave_two = [ sampler.sample(model, 10000, temperature, embedding=embedding) for _ in range(runs) ] embedding = builder.embedding_four() results_dwave_four = [ sampler.sample(model, 10000, temperature, embedding=embedding) for _ in range(runs) ] sampler = GibbsSampler(n_variables=model_width * model_height) results_gibbs = [ sampler.sample(model, 10000, temperature) for _ in range(runs) ] compute_kl = lambda r_list: [ r.KL_divergence(Z, temperature) for r in r_list ] kl_gibbs = compute_kl(results_gibbs) kl_dwave_prob = compute_kl(results_dwave_prob) kl_dwave_two = compute_kl(results_dwave_two) kl_dwave_four = compute_kl(results_dwave_four) print("KL divergence: {}".format(kl_gibbs)) print("KL divergence: {}".format(kl_dwave_prob)) print("KL divergence: {}".format(kl_dwave_two)) print("KL divergence: {}".format(kl_dwave_four)) res += [{ 'type': 'gibbs', 'width': model_width, 'height': model_height, 'kl': kl } for kl in kl_gibbs] res += [{ 'type': 'dwaveP', 'width': model_width, 'height': model_height, 'kl': kl } for kl in kl_dwave_prob] res += [{ 'type': 'dwave2', 'width': model_width, 'height': model_height, 'kl': kl } for kl in kl_dwave_two] res += [{ 'type': 'dwave4', 'width': model_width, 'height': model_height, 'kl': kl } for kl in kl_dwave_four] df = pandas.DataFrame.from_dict(res) df.to_csv("data_kl_{}.csv".format( datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))) df.to_json("data_kl_{}.json".format( datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))) save_experiment("kl", { 'result': res, })
def main(): ### Parse command line options usage = "usage: %prog [options] datafile" cmdline_parser = OptionParser(usage=usage) cmdline_parser.add_option('-o', '--output', dest='output_filename', metavar='FILE', default='results.pkl', help='Serialize results to pickle FILE') cmdline_parser.add_option('-m', '--model', dest='model', metavar='FILE', default='model', help='Choose model to use') cmdline_parser.add_option('-v', '--verbose', dest='loglevel', default=logging.WARNING, action='store_const', const=logging.DEBUG, help='Debug logging level mode') cmdline_parser.add_option('-i', '--iterations', dest='iterations', type='int', default=25000, help='Number of Gibbs iterations') cmdline_parser.add_option('-b', '--burnin', dest='burnin', type='int', default=500, help='Number of burn-in iterations') cmdline_parser.add_option('-s', '--subsample', dest='subsample', type='int', default=10, help='Subsample rate') cmdline_parser.add_option('-t', '--start', dest='start', type='int', default=None, help='start') cmdline_parser.add_option('-e', '--end', dest='end', type='int', default=None, help='end') cmdline_parser.add_option('-g', '--visualize', dest='visualize', action='store_true', help='Visualize intermediate results') cmdline_parser.add_option('-G', '--visualize-priors', dest='visualize_priors', action='store_true', help='Visualize prior distributions') cmdline_parser.add_option( '-p', '--parameter-file', dest='parameter_filename', help='Use known parameters in file (i.e. simulated file).') options, args = cmdline_parser.parse_args() logging.basicConfig(level=options.loglevel, format='%(asctime)s %(message)s') data_filename = args[0] gibbs_iters = options.iterations burnin = options.burnin subsample = options.subsample model_module = __import__(options.model) if options.parameter_filename is not None: known_params = pickle.load(open(options.parameter_filename, 'rb')) # Build model and load data data = load_as_frame(data_filename, start=options.start, end=options.end) model = model_module.define_model(data, known_params) # Setup gibbs sampler sampler = GibbsSampler(model=model, iterations=gibbs_iters, burnin=burnin, subsample=subsample) if options.visualize: sampler.add_visualizer(model_module.visualize_gibbs) if options.visualize_priors: model_module.visualize_priors(model.priors) # Begin sampling start_time = time.time() sampler.run() gibbs_time = time.time() - start_time print "Gibbs sampler ran %d.1 minutes" % (gibbs_time / 60.) # Write out results results = {} results['options'] = options results['variable_names'] = model.variable_names results['known_params'] = model.known_params results['hyper_params'] = model.hyper_params results['filename'] = data_filename results['data'] = data results['gibbs_results'] = sampler.results() pickle.dump(results, open(options.output_filename, 'wb'))
def main(): ### Parse command line options usage = "usage: %prog [options] datafile" cmdline_parser = OptionParser(usage=usage) cmdline_parser.add_option( '-o', '--output', dest='output_filename', metavar='FILE', default='results.pkl', help='Serialize results to pickle FILE') cmdline_parser.add_option( '-m', '--model', dest='model', metavar='FILE', default='model', help='Choose model to use') cmdline_parser.add_option( '-v', '--verbose', dest='loglevel', default=logging.WARNING, action='store_const', const=logging.DEBUG, help='Debug logging level mode') cmdline_parser.add_option( '-i', '--iterations', dest='iterations', type='int', default=25000, help='Number of Gibbs iterations') cmdline_parser.add_option( '-b', '--burnin', dest='burnin', type='int', default=500, help='Number of burn-in iterations') cmdline_parser.add_option( '-s', '--subsample', dest='subsample', type='int', default=10, help='Subsample rate') cmdline_parser.add_option( '-t', '--start', dest='start', type='int', default=None, help='start') cmdline_parser.add_option( '-e', '--end', dest='end', type='int', default=None, help='end') cmdline_parser.add_option( '-g', '--visualize', dest='visualize', action='store_true', help='Visualize intermediate results') cmdline_parser.add_option( '-G', '--visualize-priors', dest='visualize_priors', action='store_true', help='Visualize prior distributions') cmdline_parser.add_option( '-p', '--parameter-file', dest='parameter_filename', help='Use known parameters in file (i.e. simulated file).') options, args = cmdline_parser.parse_args() logging.basicConfig(level=options.loglevel, format='%(asctime)s %(message)s') data_filename = args[0] gibbs_iters = options.iterations burnin = options.burnin subsample = options.subsample model_module = __import__(options.model) if options.parameter_filename is not None: known_params = pickle.load(open(options.parameter_filename, 'rb')) # Build model and load data data = load_as_frame(data_filename, start=options.start, end=options.end) model = model_module.define_model(data, known_params) # Setup gibbs sampler sampler = GibbsSampler( model=model, iterations=gibbs_iters, burnin=burnin, subsample=subsample) if options.visualize: sampler.add_visualizer(model_module.visualize_gibbs) if options.visualize_priors: model_module.visualize_priors(model.priors) # Begin sampling start_time = time.time() sampler.run() gibbs_time = time.time() - start_time print "Gibbs sampler ran %d.1 minutes" % (gibbs_time/60.) # Write out results results = {} results['options'] = options results['variable_names'] = model.variable_names results['known_params'] = model.known_params results['hyper_params'] = model.hyper_params results['filename'] = data_filename results['data'] = data results['gibbs_results'] = sampler.results() pickle.dump(results, open(options.output_filename, 'wb'))