def __init__(self, model): self.sampler = sampling.Sampler(model) self.queue = queue.Queue() self.thread = Thread(target=self.threadmain_h) self.thread.daemon = True self.stopped = True self.thread.start()
def run() -> typing.List: ### Creating sampler sampler = sampling.Sampler(n_query=2, dim_features=DOM.feature_size, update_func="approx", beta_demo=BETA_DEMO, beta_pref=10) ### Collecting and loading demos demo_names = [] for _ in range(N_DEMOS): inp = input("When you are ready, input 'y', and you can start providing your demonstration.") if inp == "y": demo_names.extend(DOM.collect_dems()) demo_path = 'dempref_demonstrations/' demos = [pickle.load(open(f'{demo_path}{d}', 'rb'), encoding='latin1') for d in demo_names] demos = [DOM.fetch_to_mujoco(x) for x in demos] phi_demos = [DOM.np_features(x) for x in demos] sampler.load_demo(np.array(phi_demos)) ### Computing BIRL estimate of w samples = sampler.sample(N=N_SAMPLES_SUMM) birl_w = np.mean(samples, axis=0) birl_w = birl_w / np.linalg.norm(birl_w) var_w = np.var(samples, axis=0) print("birl_w: ", birl_w) print("var_w: ", var_w) return [birl_w]
def generate_demos(dom: domain.Domain, weight: typing.List, name: str): t = dom.simulate(weight, query_length=QUERY_LENGTH, iter_count=10) sampler = sampling.Sampler(n_query=N_QUERY, dim_features=dom.feature_size, update_func=UPDATE_FUNC, beta_demo=BETA_DEMO, beta_pref=BETA_PREF) sampler.load_demo(dom.np_features(t)) samples = sampler.sample(50000) mean_w = np.mean(samples, axis=0) mean_w = mean_w / np.linalg.norm(mean_w) print("w = " + str(mean_w)) m = np.mean([ np.dot(w, weight) / np.linalg.norm(w) / np.linalg.norm(weight) for w in samples ]) print("m = " + str(m)) with open(f"fetch_demos/{name}.pickle", 'wb') as f: pickle.dump(t, f)
def main(argv): if FLAGS.data_dir is None: print('No data_dir specified. See --help') return 0 img_paths = [p for p in image_walk(FLAGS.data_dir)] random.shuffle(img_paths) samp = sampling.Sampler(img_paths, im_dims=(720, 1280), crop_size=FLAGS.crop_size, num_crop=FLAGS.crops_per_img, ) loader = gui.AsyncImageLoader(samp) # img_list, label_img_list = sampling.sample_img(720, 1280) # print(len(img_list)) # newboi = list(samp) # print(len(newboi)) gui_handler = gui.GUI(loader)
def run() -> pd.DataFrame: ### Creating data frame to store data in # pref_iter correponds to the iteration of the preference loop in the particular run # run is the type of data being stored; options are "mean", "var", "m" # value is the actual value being stored df = pd.DataFrame(columns=["pref_iter", "type", "value"]) ### Creating sampler sampler = sampling.Sampler(n_query=N_QUERY, dim_features=DOM.feature_size, update_func=UPDATE_FUNC, beta_demo=BETA_DEMO, beta_pref=BETA_PREF) ### Creating query generator qg = query_generation.ApproxQueryGenerator( dom=DOM, num_queries=N_QUERY, query_length=QUERY_LENGTH, num_expectation_samples=N_SAMPLES_EXP, include_previous_query=INC_PREV_QUERY, generate_scenario=GEN_SCENARIO, update_func=UPDATE_FUNC, beta_pref=BETA_PREF) ### Creating human object H = human.TerminalHuman(DOM, UPDATE_FUNC) ### Collecting and loading demonstrations demo_names = DOM.collect_dems() demo_path = 'dempref_demonstrations/' demos = [ pickle.load(open(demo_path + f'{demo_name}', 'rb'), encoding='latin1') for demo_name in demo_names ] demos = [DOM.fetch_to_mujoco(x) for x in demos] if INC_PREV_QUERY: last_query_picked = demos[0] phi_demos = [DOM.np_features(x) for x in demos] sampler.load_demo(np.array(phi_demos)) ### Computing initial estimates samples = sampler.sample(N=N_SAMPLES_SUMM) mean_w = np.mean(samples, axis=0) mean_w = mean_w / np.linalg.norm(mean_w) var_w = np.var(samples, axis=0) print('Mean: ' + str(mean_w)) print('Var: ' + str(sum(var_w))) data = [[0, "mean", mean_w], [0, "var", var_w]] df = df.append(pd.DataFrame(data, columns=["pref_iter", "type", "value"]), ignore_index=True) if sum(var_w) < VARIANCE_THRESH: print("Variance is now below threshold; EXITING.") return df ### Preferences loop for j in range(N_PREF_ITERS): print("\n\n*** Preferences # %d\n" % (j + 1)) ## Generate queries if INC_PREV_QUERY: queries = qg.queries(samples, last_query_picked) else: queries = qg.queries(samples) mujoco_queries = [DOM.mujoco_to_fetch(x) for x in queries] ## Querying human print('\a') best = H.input(queries, on_real_robot=False) if INC_PREV_QUERY: last_query_picked = queries[best] ## Creating dictionary mapping rankings to features of queries and loading into sampler features = [DOM.np_features(x) for x in queries] phi = {k: features[k] for k in range(len(queries))} sampler.load_prefs(phi, best) ## Recording data from this run samples = sampler.sample(N=N_SAMPLES_SUMM) mean_w = np.mean(samples, axis=0) mean_w = mean_w / np.linalg.norm(mean_w) var_w = np.var(samples, axis=0) print('Mean: ' + str(mean_w)) print('Var: ' + str(sum(var_w))) data = [[j + 1, "mean", mean_w], [j + 1, "var", var_w]] df = df.append(pd.DataFrame(data, columns=["pref_iter", "type", "value"]), ignore_index=True) if sum(var_w) < VARIANCE_THRESH: print("Variance is now below threshold; EXITING.") return df return df
# UPDATE_FUNC = "rank" # BETA_DEMO = 0.1 # BETA_PREF = 5 N_QUERY = 2 DIM_FEATURES = dom.feature_size ms = {} for i in range(25): weight = np.random.uniform(-1, 1, 4) t = dom.simulate(weight, query_length=QUERY_LENGTH, iter_count=10) sampler = sampling.Sampler(n_query=N_QUERY, dim_features=DIM_FEATURES, update_func=UPDATE_FUNC, beta_demo=BETA_DEMO, beta_pref=BETA_PREF) sampler.load_demo(dom.np_features(t)) samples = sampler.sample(50000) m = np.mean([ np.dot(w, true_weight) / np.linalg.norm(w) / np.linalg.norm(true_weight) for w in samples ]) ms[m] = t sorted = list(ms) sorted.sort() worst = sorted[0]
readline.parse_and_bind('tab: complete') readline.parse_and_bind('set editing-mode vi') parser = argparse.ArgumentParser() parser.add_argument('--checkpoint', default='models/test.json') parser.add_argument('--maxlength', default=1024, type=int) parser.add_argument('--temperature', default=1.0, type=float) parser.add_argument('--savedir', default='') args = parser.parse_args() model = LanguageModel.LanguageModel() model.load_json(args.checkpoint) model.eval() sampler = sampling.Sampler(model) stor = M.DefaultStateStore(model, default_token=model.token_to_idx[b'\n']) pc = sampling.default_put_chains(stor) gc = sampling.default_get_chains(stor, endtoken=[model.token_to_idx[b'\n']], maxlength=args.maxlength, temperature=args.temperature) badword_mod = M.BlockBadWords(model, []) path_bw = args.savedir + '/badwords' if args.savedir and os.path.exists(path_bw): badword_mod.badwords = pickle.load(open(path_bw, 'rb')) gc.sample_post += [M.PrintSampledString(model), badword_mod]
def run(self, n_iters: int = 1) -> Tuple[pd.DataFrame, List]: """ Runs the algorithm n_iters times and returns a data frame with all the data from the experiment. :param n_iters: Number of times to run the algorithm. :param verbose: Prints status messages about the progress of the algorithm if true. :return: (self.config, df); config contains the parameters of the run and df is a data frame containing all the data from the run. """ ### Creating data frame to store data in # run corresponds to the iteration of the whole experiment # pref_iter correponds to the iteration of the preference loop in the particular run # run is the type of data being stored; options are "mean", "var", "m" # value is the actual value being stored df = pd.DataFrame(columns=["run #", "pref_iter", "type", "value"]) ### Creating query generator if isinstance(self.domain, domain.Car): # using exact QG when dynamics is available if self.update_func == "pick_best": obj_fn = query_generation.pick_best elif self.update_func == "approx": obj_fn = query_generation.approx elif self.update_func == "rank": obj_fn = query_generation.rank qg = query_generation.QueryGenerator( dom=self.domain, num_queries=self.n_query, query_length=self.query_length, num_expectation_samples=self.n_samples_exp, include_previous_query=self.inc_prev_query, generate_scenario=self.gen_scenario, objective_fn=obj_fn, beta_pref=self.beta_pref) else: # using approx QG when dynamics is not available qg = query_generation.ApproxQueryGenerator( dom=self.domain, num_queries=self.n_query, query_length=self.query_length, num_expectation_samples=self.n_samples_exp, include_previous_query=self.inc_prev_query, generate_scenario=self.gen_scenario, update_func=self.update_func, beta_pref=self.beta_pref) ### Creating human humans = { "opt": human.OptimalHuman(self.domain, self.update_func, self.true_weight), "btz": human.BoltzmannHuman(self.domain, self.update_func, self.true_weight, self.beta_human), "term": human.TerminalHuman(self.domain, self.update_func) } H = humans[self.human_type] ### Iterating to build confidence intervals for i in range(n_iters): ### Processing demonstrations sampler = sampling.Sampler(n_query=self.n_query, dim_features=self.domain.feature_size, update_func=self.update_func, beta_demo=self.beta_demo, beta_pref=self.beta_pref) if self.n_demos > 0: if self.gen_demos: self.demos = [ self.domain.simulate(self.true_weight, iter_count=self.sim_iter_count) for _ in range(self.n_demos) ] phi_demos = [self.domain.np_features(x) for x in self.demos] sampler.load_demo(np.array(phi_demos)) if self.inc_prev_query and isinstance(self.domain, domain.Car): cleaned_demos = [ d.trim(self.query_length, self.trim_start) for d in self.demos ] else: cleaned_demos = self.demos if self.inc_prev_query: last_query_picked = [d for d in cleaned_demos] else: last_query_picked = [ traj.Trajectory(states=None, controls=None, null=True) ] ## Computing initial estimates samples = sampler.sample(N=self.n_samples_summ) mean_w = np.mean(samples, axis=0) mean_w = mean_w / np.linalg.norm(mean_w) var_w = np.var(samples, axis=0) data = [[i + 1, 0, "mean", mean_w], [i + 1, 0, "var", var_w]] print("Estimate of w: " + str(mean_w)) # TODO: Add different levels of verbose mode print("Estimate of variance: " + str(sum(var_w))) # computing convergence measure if we are in simulation if self.human_type != "term": m = np.mean([ np.dot(w, self.true_weight) / np.linalg.norm(w) / np.linalg.norm(self.true_weight) for w in samples ]) data.append([i + 1, 0, "m", m]) print("Estimate of m: " + str(m) + "\n\n") df = df.append(pd.DataFrame( data, columns=["run #", "pref_iter", "type", "value"]), ignore_index=True) ### Preferences loop for j in range(self.n_pref_iters): print("\n\n*** Preferences Loop %d\n" % (j)) ## Get last_query if self.inc_prev_query: if len(self.demos) > 0: random_scenario_index = np.random.randint( len(self.demos)) else: random_scenario_index = 0 last_query = last_query_picked[random_scenario_index] ## Generate queries while ensuring that features of queries are epsilon apart query_diff = 0 print("Generating queries") while query_diff <= self.epsilon: if self.inc_prev_query: if last_query.null: queries = qg.queries(samples, blank_traj=True) else: queries = qg.queries(samples, last_query) else: queries = qg.queries(samples) query_diffs = [] for m in range(len(queries)): for n in range(m): query_diffs.append( np.linalg.norm( self.domain.np_features(queries[m]) - self.domain.np_features(queries[n]))) query_diff = max(query_diffs) ## Querying human if self.human_type == "term": print('\a') rank = H.input(queries) if self.update_func == "rank": best = rank[0] else: if rank == -1: return df, self.config best = rank if self.inc_prev_query: last_query_picked[random_scenario_index] = queries[best] ## Creating dictionary mapping rankings to features of queries and loading into sampler features = [self.domain.np_features(x) for x in queries] phi = {k: features[k] for k in range(len(queries))} sampler.load_prefs(phi, rank) ## Recording data from this run samples = sampler.sample(N=self.n_samples_summ) mean_w = np.mean(samples, axis=0) mean_w = mean_w / np.linalg.norm(mean_w) var_w = np.var(samples, axis=0) data = [[i + 1, j + 1, "mean", mean_w], [i + 1, j + 1, "var", var_w]] print("Estimate of w: " + str(mean_w)) print("Estimate of variance: " + str(sum(var_w))) if self.human_type != "term": m = np.mean([ np.dot(w, self.true_weight) / np.linalg.norm(w) / np.linalg.norm(self.true_weight) for w in samples ]) data.append([i + 1, j + 1, "m", m]) print("Estimate of m: " + str(m) + "\n\n") df = df.append(pd.DataFrame( data, columns=["run #", "pref_iter", "type", "value"]), ignore_index=True) ## Resetting for next run sampler.clear_pref() if self.inc_prev_query and self.n_demos > 0: last_query_picked = [d for d in cleaned_demos] return df, self.config