def find_failure_case(num_bases, num_blocks, max_levels): while True: # print("trying..") env = BlocksWorldEnv(show=False) thing_below, goal_thing_below = random_problem_instance( env, num_blocks, max_levels, num_bases) am = make_abstract_machine(env, num_bases, max_levels) am_results = run_machine(am, goal_thing_below, {"jnt": "rest"}) env.close() ticks, running_time, sym_reward, spa_reward = am_results if sym_reward <= -2: break # print(sym_reward) return thing_below, goal_thing_below, sym_reward
def generate_data(num_blocks, base_name): thing_below = random_thing_below(num_blocks, max_levels=3) goal_thing_below = random_thing_below(num_blocks, max_levels=3) dump = DataDump(goal_thing_below, hook_period=1) env = BlocksWorldEnv(pb.POSITION_CONTROL, show=False, control_period=12, step_hook=dump.step_hook) env.load_blocks(thing_below) restacker = Restacker(env, goal_thing_below, dump) restacker.run() reward = compute_symbolic_reward(env, goal_thing_below) final_thing_below = env.thing_below commands = [frame["command"] for frame in dump.data] data_file = "%s/meta.pkl" % base_name data = (thing_below, goal_thing_below, final_thing_below, reward, commands) with open(data_file, "wb") as f: pk.dump(data, f) env.close() for d, frame in enumerate(dump.data): _, (thing, block) = frame["command"] position, action, rgba, coords_of, _ = zip(*frame["records"]) position = tr.tensor(np.stack(position)).float() action = tr.tensor(np.stack(action)).float() rgba = tr.tensor(np.stack(rgba)) block_coords = tr.tensor(np.stack([co[block] for co in coords_of])).float() thing_coords = tr.tensor(np.stack([co[thing] for co in coords_of])).float() # preprocessing rgb, block_coords, thing_coords = preprocess(rgba, block_coords, thing_coords) data_file = "%s/%03d.pt" % (base_name, d) tr.save((position, action, rgb, block_coords, thing_coords), data_file) print(" success=%s (start, end, goal)" % (reward == 0)) print(" ", thing_below) print(" ", env.thing_below) print(" ", goal_thing_below) return reward
def run_trial(num_bases, num_blocks, max_levels): env = BlocksWorldEnv(show=False) # rejection sample non-trivial instance thing_below, goal_thing_below = random_problem_instance( env, num_blocks, max_levels, num_bases) am = make_abstract_machine(env, num_bases, max_levels) nvm = virtualize(am) am_results = run_machine(am, goal_thing_below, {"jnt": "rest"}) env.reset() env.load_blocks(thing_below, num_bases) nvm_results = run_machine(nvm, goal_thing_below, {"jnt": tr.tensor(am.ik["rest"]).float()}) env.close() return am_results, nvm_results, nvm.size(), thing_below, goal_thing_below
num_dual_iters = 16 primal_tol = 0.001 dual_tol = 0.001 max_levels = 3 num_blocks = 5 num_bases = 5 # prob_freq = "batch" prob_freq = "once" if run_exp: domain = bp.BlockStackingDomain(num_blocks, num_bases, max_levels) mp_tracker = MovementPenaltyTracker(period=5) env = BlocksWorldEnv(show=False, step_hook=mp_tracker.step_hook) # set up rvm and virtualize rvm = make_abstract_machine(env, domain) rvm.reset({"jnt": "rest"}) rvm.mount("main") nvm = virtualize(rvm, σ=nv.default_activator, detach_gates=detach_gates) nvm.mount("main") W_init = {name: {0: nvm.net.batchify_weights(conn.W)} for name, conn in nvm.connections.items()} v_init = {name: {0: nvm.net.batchify_activities(reg.content)} for name, reg in nvm.registers.items()} v_init["jnt"][0] = nvm.net.batchify_activities(tr.tensor(rvm.ik["rest"]).float()) # set up trainable connections inputable = ("obj","loc","goal") # trainable = ["ik", "to", "tc", "po", "pc", "right", "above", "base"]
import pickle as pk import numpy as np import sys sys.path.append('../../envs') import pybullet as pb from blocks_world import BlocksWorldEnv, random_thing_below thing_below = random_thing_below(num_blocks=7, max_levels=3) env = BlocksWorldEnv(pb.POSITION_CONTROL, show=False, control_period=12) env.load_blocks(thing_below) rgba, view, proj, coords_of = env.get_camera_image() env.close() np.save("tmp.npy", rgba) rgba = np.load("tmp.npy") import matplotlib.pyplot as pt pt.imshow(rgba) pt.show()
return tr.tanh(v) / σ1 # def σ(v): return v if run_exp: lr_results = {lr: list() for lr in learning_rates} for rep in range(num_repetitions): for learning_rate in learning_rates: results = lr_results[learning_rate] start_rep = time.perf_counter() results.append([]) env = BlocksWorldEnv(show=showenv, step_hook=penalty_tracker.step_hook) env.load_blocks({ "b%d" % n: "t%d" % n for n in range(num_bases) }) # placeholder for rvm construction # set up rvm and virtualize rvm = make_abstract_machine(env, num_bases, max_levels, gen_regs=["r0", "r1"]) rvm.reset({"jnt": "rest"}) rvm.mount("main") nvm = virtualize(rvm, σ) init_regs, init_conns = nvm.get_state()
goal_thing_above) penalty_tracker = PenaltyTracker(period=5) if run_exp: lr_results = {lr: list() for lr in learning_rates} for rep in range(num_repetitions): for learning_rate in learning_rates: print("Starting lr=%f" % learning_rate) results = lr_results[learning_rate] start_rep = time.perf_counter() results.append([]) env = BlocksWorldEnv(show=False, step_hook=penalty_tracker.step_hook) env.load_blocks(thing_below) # set up rvm and virtualize rvm = make_abstract_machine(env, num_bases, max_levels) rvm.reset({"jnt": "rest"}) rvm.mount("main") nvm = virtualize(rvm, σ=nv.default_activator, detach_gates=detach_gates) nvm.mount("main") W_init = { name: { 0: nvm.net.batchify_weights(conn.W) }
# goal_thing_below = random_thing_below(num_blocks, max_levels, num_bases) # one failure case: max_levels = 3 num_blocks = 5 num_bases = 5 thing_below = {'b0': 't1', 'b2': 'b0', 'b4': 'b2', 'b1': 't4', 'b3': 't2'} goal_thing_below = { 'b1': 't1', 'b2': 't3', 'b3': 'b2', 'b0': 't0', 'b4': 'b0' } env = BlocksWorldEnv(show=True) env.load_blocks(thing_below, num_bases) am = make_abstract_machine(env, num_bases, max_levels) goal_thing_above = env.invert(goal_thing_below) for key, val in goal_thing_above.items(): if val == "none": goal_thing_above[key] = "nil" memorize_env(am, goal_thing_above) # restack test am.reset({ "jnt": "rest", }) num_ticks = am.run(dbg=True) input('...')
import sys sys.path.append('../../envs') import pybullet as pb from blocks_world import BlocksWorldEnv, random_thing_below env = BlocksWorldEnv() thing_below = random_thing_below(num_blocks=4, max_levels=3) env.load_blocks(thing_below) input('.') env.reset() input('.') thing_below = random_thing_below(num_blocks=4, max_levels=3) env.load_blocks(thing_below) input('.')
import pybullet as pb from blocks_world import BlocksWorldEnv step_log = [] def step_hook(env, action): if action is None: return position = env.get_position() delta = action - position rgb, _, _, coords_of = env.get_camera_image() step_log.append((position, delta, rgb, coords_of)) env = BlocksWorldEnv(pb.POSITION_CONTROL, step_hook=step_hook) env.load_blocks({"b0": "t0", "b1": "t1", "b2": "t2"}) action = [0.] * env.num_joints env.goto_position([0.5] * env.num_joints, 20 / 240) env.close() position, delta, rgb, coords_of = zip(*step_log) pt.ion() for t in range(len(step_log)): print(t) print(position[t]) print(delta[t]) x, y = zip(*coords_of[t].values()) pt.imshow(rgb[t])
end_reward = calc_reward(sym_reward, spa_reward) rewards[-1] += end_reward return end_reward, log_prob, rewards, log_probs if __name__ == "__main__": max_levels = 3 num_blocks = 5 num_bases = 5 for rep in range(10): penalty_tracker = PenaltyTracker() env = BlocksWorldEnv(show=False, step_hook=penalty_tracker.step_hook) thing_below, goal_thing_below = random_problem_instance( env, num_blocks, max_levels, num_bases) goal_thing_above = invert(goal_thing_below, num_blocks, num_bases) for key, val in goal_thing_above.items(): if val == "none": goal_thing_above[key] = "nil" σ1 = tr.tensor(1.).tanh() def σ(v): return tr.tanh(v) / σ1 # set up rvm and virtualize rvm = make_abstract_machine(env, num_bases, max_levels)
from blocks_world import BlocksWorldEnv, random_thing_below if __name__ == "__main__": # thing_below = random_thing_below(num_blocks, max_levels=3) # goal_thing_below = random_thing_below(num_blocks, max_levels=3) # num_blocks = 7 # thing_below = {("b%d" % n): ("t%d" % n) for n in range(num_blocks)} # block, thing = "b3", "b4" with open("episodes/000/meta.pkl", "rb") as f: thing_below, _, _, _, commands = pk.load(f) _, (block, thing) = commands[0] env = BlocksWorldEnv(pb.POSITION_CONTROL, show=True, control_period=12) env.load_blocks(thing_below) _, _, _, coords_of = env.get_camera_image() block_coords = tr.tensor(np.stack([coords_of[block]])).float() thing_coords = tr.tensor(np.stack([coords_of[thing]])).float() # move to net = VisuoMotorNetwork() # net.load_state_dict(tr.load("net.pt")) net.load_state_dict(tr.load("net500.pt")) force_coords = False for t in range(100): position = env.get_position()
am_results = run_machine(am, problem.goal_thing_below, {"jnt": "rest"}) ticks, running_time, sym_reward, spa_reward = am_results if sym_reward <= sym_cutoff: break # print(sym_reward) env.reset() return problem, sym_reward if __name__ == "__main__": num_bases, num_blocks, max_levels = 5, 5, 3 domain = bp.BlockStackingDomain(num_bases, num_blocks, max_levels) find_new = True if find_new: env = BlocksWorldEnv(show=False) problem, _ = find_failure_case(env, domain) env.close() thing_below = problem.thing_below goal_thing_below = problem.goal_thing_below print(thing_below) print(goal_thing_below) # thing_below = {'b0': 't1', 'b1': 'b0', 'b2': 'b1', 'b3': 't2', 'b4': 'b3'} # goal_thing_below = {'b0': 't0', 'b1': 'b4', 'b2': 'b1', 'b3': 't1', 'b4': 't4'} else: # one failure case: thing_below = { 'b0': 't1', 'b2': 'b0', 'b4': 'b2',
penalty_tracker = MovementPenaltyTracker(period=tracker_period) if run_exp: lr_results = {lr: list() for lr in learning_rates} for rep in range(num_repetitions): for learning_rate in learning_rates: print("Starting lr=%f" % learning_rate) results = lr_results[learning_rate] start_rep = time.perf_counter() results.append([]) if prob_freq != "once": problem = domain.random_problem_instance() env = BlocksWorldEnv(show=False, step_hook=penalty_tracker.step_hook) env.load_blocks(problem.thing_below) # set up rvm and virtualize rvm = make_abstract_machine(env, domain) rvm.reset({"jnt": "rest"}) rvm.mount("main") nvm = virtualize(rvm, σ=nv.default_activator, detach_gates=detach_gates) nvm.mount("main") W_init = { name: { 0: nvm.net.batchify_weights(conn.W) }
comp.ret_if_nil() comp.put("b0", "r0") comp.ret() def main(comp): comp.call("proc") if __name__ == "__main__": max_levels = 3 num_blocks = 5 num_bases = 5 domain = bp.BlockStackingDomain(num_blocks, num_bases, max_levels) env = BlocksWorldEnv(show=False) # # small example # am, compiler = setup_abstract_machine(env, domain, gen_regs=["r0"]) # compiler.flash(proc) # compiler.flash(main) # restacking code am = make_abstract_machine(env, domain) code = am.machine_code() ipt, asm, mach, store, recall = zip(*code) store = [", ".join(conn) for conn in store] recall = [", ".join(conn) for conn in recall] width = [
self.mp = [] self.sym = [] self.goal_thing_below = goal_thing_below def reset(self): self.mp = [] self.sym = [] def step_hook(self, env, action): self.mp.append(env.movement_penalty()) self.sym.append( compute_symbolic_reward(env, self.goal_thing_below)) # load tracker = Tracker(goal_thing_below) env = BlocksWorldEnv(show=False, step_hook=tracker.step_hook) env.load_blocks(thing_below) # run rvm rvm = make_abstract_machine(env, num_bases, max_levels, gen_regs=["r0", "r1"]) nvm = virtualize(rvm, nv.default_activator) # run goal_thing_above = env.invert(goal_thing_below) for key, val in goal_thing_above.items(): if val == "none": goal_thing_above[key] = "nil" memorize_env(rvm, goal_thing_above) rvm.reset({"jnt": "rest"}) rvm.mount("main") while True:
def run_trial(domain): env = BlocksWorldEnv(show=False) # rejection sample non-trivial instance problem = domain.random_problem_instance() env.reset() env.load_blocks(problem.thing_below, num_bases=domain.num_bases) # set up rvm and virtualize rvm = make_abstract_machine(env, domain) memorize_problem(rvm, problem) rvm.reset({"jnt": "rest"}) rvm.mount("main") nvm = virtualize(rvm, σ=nv.default_activator, detach_gates=True) nvm.mount("main") W_init = { name: { 0: nvm.net.batchify_weights(conn.W) } for name, conn in nvm.connections.items() } v_init = { name: { 0: nvm.net.batchify_activities(reg.content) } for name, reg in nvm.registers.items() } v_init["jnt"][0] = nvm.net.batchify_activities( tr.tensor(rvm.ik["rest"]).float()) # rvm_results = run_machine(rvm, problem.goal_thing_below, {"jnt": "rest"}) start = time.perf_counter() tar_changed = False while True: done = rvm.tick() if tar_changed: position = rvm.ik[rvm.registers["jnt"].content] env.goto_position(position, speed=1.5) if done: break tar_changed = (rvm.registers["tar"].content != rvm.registers["tar"].old_content) rvm_ticks = rvm.tick_counter rvm_runtime = time.perf_counter() - start rvm_sym = compute_symbolic_reward(env, problem.goal_thing_below) rvm_spa = compute_spatial_reward(env, problem.goal_thing_below) rvm_results = rvm_ticks, rvm_runtime, rvm_sym, rvm_spa # nvm_results = run_machine(nvm, problem.goal_thing_below, {"jnt": tr.tensor(rvm.ik["rest"]).float()}) env.reset() env.load_blocks(problem.thing_below, num_bases=domain.num_bases) start = time.perf_counter() while True: t = nvm.net.tick_counter if t > 0 and nvm.decode("ipt", t, 0) == nvm.decode("ipt", t - 1, 0): break nvm.net.tick(W_init, v_init) nvm.pullback(t) if t > 1 and nvm.decode("tar", t - 2, 0) != nvm.decode( "tar", t - 1, 0): position = nvm.net.activities["jnt"][t][0, :, 0].detach().numpy() env.goto_position(position, speed=1.5) nvm_ticks = nvm.net.tick_counter nvm_runtime = time.perf_counter() - start nvm_sym = compute_symbolic_reward(env, problem.goal_thing_below) nvm_spa = compute_spatial_reward(env, problem.goal_thing_below) nvm_results = nvm_ticks, nvm_runtime, nvm_sym, nvm_spa env.close() return rvm_results, nvm_results, nvm.size(), problem
thing_below = {'b0': 't1', 'b2': 'b0', 'b4': 'b2', 'b1': 't4', 'b3': 't2'} goal_thing_below = {'b1': 't1', 'b2': 't3', 'b3': 'b2', 'b0': 't0', 'b4': 'b0'} # num_blocks = 4 # thing_below = {"b%d"%b: "t%d"%b for b in range(num_blocks)} # thing_below.update({"b1": "b0", "b2": "b3"}) # goal_thing_below = {"b%d"%b: "t%d"%b for b in range(num_blocks)} # goal_thing_below.update({"b1": "b2", "b2": "b0"}) # # thing_below = random_thing_below(num_blocks, max_levels=3) # # goal_thing_below = random_thing_below(num_blocks, max_levels=3) dump = DataDump(goal_thing_below, hook_period=1) # env = BlocksWorldEnv(pb.POSITION_CONTROL, show=True, control_period=12, step_hook=dump.step_hook) env = BlocksWorldEnv() env.load_blocks(thing_below) # from check/camera.py pb.resetDebugVisualizerCamera( 1.2000000476837158, 56.799964904785156, -22.20000648498535, (-0.6051651835441589, 0.26229506731033325, -0.24448847770690918)) restacker = Restacker(env, goal_thing_below, dump) restacker.run() reward = compute_symbolic_reward(env, goal_thing_below) print("symbolic reward = %f" % reward) reward = compute_spatial_reward(env, goal_thing_below) print("spatial reward = %f" % reward)
num_repetitions = 1 num_episodes = 2 num_epochs = 3 run_exp = False showresults = True # tr.autograd.set_detect_anomaly(True) if run_exp: results = [] for rep in range(num_repetitions): start_rep = time.perf_counter() results.append([]) env = BlocksWorldEnv(show=False) # placehold blocks for nvm init env.load_blocks({"b%d" % n: "t%d" % n for n in range(num_bases)}) rvm = make_abstract_machine(env, num_bases, max_levels) nvm = virtualize(rvm) # print(nvm.size()) # input('.') init_regs, init_conns = nvm.get_state() orig_ik_W = init_conns["ik"].clone() init_regs["jnt"] = tr.tensor(rvm.ik["rest"]).float() # set up trainable connections conn_params = { name: init_conns[name] # for name in ["ik", "to", "tc", "pc", "pc"]