def plot_constructability_over_time(logger): tower_keys = ['2block', '3block', '4block', '5block'] tallest_stable_over_time = np.zeros((logger.args.max_acquisitions, 5)) tp = TowerPlanner(stability_mode='contains') for tx in range(logger.args.max_acquisitions): acquired_data, _ = logger.load_acquisition_data(tx) # For each tower, figure out when it fell over. for kx, k in enumerate(tower_keys): towers = acquired_data[k]['towers'] for ix in range(0, towers.shape[0]): height = 1 for top_id in range(1, towers.shape[1]): block_tower = [Object.from_vector(towers[ix, bx, :]) for bx in range(0, top_id+1)] if not tp.tower_is_constructable(block_tower): break height += 1 tallest_stable_over_time[tx, height-1] += 1 max_x = 40 + 10*logger.args.max_acquisitions xs = np.arange(40, max_x, 10) w = 10 plt.figure(figsize=(20, 10)) plt.bar(xs, tallest_stable_over_time[:, 0], width=w, label=1) for kx in range(1, 5): plt.bar(xs, tallest_stable_over_time[:, kx], bottom=np.sum(tallest_stable_over_time[:, :kx], axis=1), width=w, label=kx+1) plt.xlabel('Acquisition Step') plt.ylabel('Height of tallest stable subtower') plt.legend() plt.savefig(logger.get_figure_path('tallest_breakdown.png'))
def __init__(self, logger, n_samples=None): self.tower_keys = ['2block', '3block', '4block', '5block'] self.n_samples = { 2: 5000, 3: 10000, 4: 20000, 5: 100000, 6: 250000, 7: 500000 } self.tp = TowerPlanner(stability_mode='contains') self.logger = logger self.using_cache = False
def test_stability_tower_is_stable_with_sim(vis): tp = TowerPlanner() for _ in range(10): # sample three random blocks blocks = [Object.random(str(i)) for i in range(3)] # stack all the blocks on top of eachother (center of geometry, not COM) prev_z = 0 for block in blocks: pos = Position(0,0,block.dimensions.z/2+prev_z) block.pose = Pose(pos, ZERO_ROT) prev_z += block.dimensions.z assert tp.tower_is_stable(blocks) ==\ tower_is_stable_in_pybullet(blocks, vis=vis, T=50)
def inspect_validation_set(fname): with open(fname, 'rb') as handle: val_towers = pickle.load(handle) tp = TowerPlanner(stability_mode='contains') towers = val_towers['5block']['towers'] labels = val_towers['5block']['labels'] # Check how may towers fall over at a lower block. for tower_vec, label in zip(towers, labels): if label == 0: tower = [Object.from_vector(tower_vec[bx, :]) for bx in range(0, tower_vec.shape[0])] print(tp.tower_is_constructable(tower[:1]))
def check_validation_robustness(noise=0.001, n_attempts=10): """ Try adding noise to the placement of each block in the validation set to see how many of the towers are robust to placement noise. """ with open('learning/data/validation_towers_robust.pkl', 'rb') as handle: #with open('learning/data/random_blocks_(x2000)_5blocks_uniform_mass.pkl', 'rb') as handle: val_towers = pickle.load(handle) robust = {k: 0 for k in val_towers.keys()} tp = TowerPlanner(stability_mode='contains') stable_towers = copy.deepcopy(val_towers) unstable_towers = copy.deepcopy(val_towers) for k in robust.keys(): stable_indices = [] unstable_indices = [] for ix in range(0, val_towers[k]['towers'].shape[0]): stable = True if val_towers[k]['labels'][ix] == 0: continue for _ in range(n_attempts): tower = val_towers[k]['towers'][ix, :, :].copy() label = val_towers[k]['labels'][ix] tower[:, 7:9] += np.random.randn(2*tower.shape[0]).reshape(tower.shape[0], 2)*noise block_tower = [Object.from_vector(tower[kx, :]) for kx in range(tower.shape[0])] if tp.tower_is_constructable(block_tower) != label: stable = False if stable: robust[k] += 1 stable_indices.append(ix) else: unstable_indices.append(ix) stable_towers[k]['towers'] = stable_towers[k]['towers'][stable_indices,...] stable_towers[k]['labels'] = stable_towers[k]['labels'][stable_indices] unstable_towers[k]['towers'] = unstable_towers[k]['towers'][unstable_indices,...] unstable_towers[k]['labels'] = unstable_towers[k]['labels'][unstable_indices] # with open('learning/data/stable_val.pkl', 'wb') as handle: # pickle.dump(stable_towers, handle) # with open('learning/data/unstable_val.pkl', 'wb') as handle: # pickle.dump(unstable_towers, handle) print(k, ':', robust[k], '/', val_towers[k]['towers'].shape[0] )
def check_stable_bases(logger): tower_keys = ['2block', '3block', '4block', '5block'] tp = TowerPlanner(stability_mode='contains') for tx in range(0, 80): print(tx) towers, _ = logger.load_acquisition_data(tx) for k in tower_keys: print(k) for tower_vec in towers[k]['towers']: tower = [Object.from_vector(tower_vec[bx, :]) for bx in range(0, tower_vec.shape[0])] if tp.tower_is_constructable(tower[:-1]): print('Stable Base') else: print('Unstable Base')
def main(args): NOISE = 0.00005 # get a bunch of random blocks blocks = get_adversarial_blocks(num_blocks=args.num_blocks) if args.agent == 'teleport': agent = TeleportAgent(blocks, NOISE) elif args.agent == 'panda': agent = PandaAgent(blocks, NOISE, use_platform=True, teleport=False) else: raise NotImplementedError() # construct a world containing those blocks beliefs = [ ParticleBelief(block, N=200, plot=True, vis_sim=False, noise=NOISE) for block in blocks ] agent._add_text('Ready?') input('Start?') # Gain information about the CoM of each block. for b_ix, (block, belief) in enumerate(zip(blocks, beliefs)): print('Running filter for', block.name) for interaction_num in range(5): print("Interaction number: ", interaction_num) agent._add_text('Planning action.') action = plan_action(belief, exp_type='reduce_var', action_type='place') observation = agent.simulate_action(action, b_ix, T=50) agent._add_text('Updating particle belief.') belief.update(observation) block.com_filter = belief.particles print(belief.estimated_coms[-1], block.com) # Find the tallest tower print('Finding tallest tower.') # agent._add_text('Planning tallest tower') tp = TowerPlanner(plan_mode='expectation') tallest_tower = tp.plan(blocks) # and execute the resulting plan. agent.simulate_tower(tallest_tower, vis=True, T=2500, save_tower=args.save_tower)
def test_tower_simulation(blocks): agent = PandaAgent(blocks, NOISE) for b_ix, block in enumerate(blocks): belief = ParticleBelief(block, N=200, plot=False, vis_sim=False, noise=NOISE) block.com_filter = belief.particles tp = TowerPlanner() tallest_tower = tp.plan(blocks, num_samples=10) # and visualize the result agent.simulate_tower(tallest_tower, vis=True, T=2500)
def test_stability_tower_is_constructible(): tp = TowerPlanner() obj_a = Object('a', Dimensions(0.1,0.1,0.1), 1, Position(0,0,0), Color(0,1,1)) obj_b = Object('b', Dimensions(0.3,0.1,0.1), 3, Position(0,0,0), Color(1,0,1)) obj_c = Object('c', Dimensions(0.1,0.1,0.2), 2, Position(0,0,0), Color(1,1,0)) # the single block is constructible obj_a.pose = Pose(Position(0, 0, 0.05), ZERO_ROT) assert tp.tower_is_constructible([obj_a]) # this is constructible obj_b.pose = Pose(Position(0, 0, 0.15), ZERO_ROT) assert tp.tower_is_constructible([obj_a, obj_b]) # this is unconstructible obj_b.pose = Pose(Position(0.06, 0, 0.15), ZERO_ROT) assert not tp.tower_is_constructible([obj_a, obj_b]) # it becomes stable, but remains unconstructible when we add another block obj_c.pose = Pose(Position(0.0, 0, 0.3), ZERO_ROT) assert not tp.tower_is_constructible([obj_a, obj_b, obj_c]) # this tower is constructible, but not stable obj_b.pose = Pose(Position(0, 0.04, 0.15), ZERO_ROT) obj_c.pose = Pose(Position(0, 0.08, 0.3), ZERO_ROT) assert tp.tower_is_constructible([obj_a, obj_b, obj_c])
def test_stability_pair_is_stable(): tp = TowerPlanner() a = Object.random('a') b = Object.random('b') # center the COM of the top object over the bottom object stable_a_pos = Position(-a.com.x, -a.com.y, a.dimensions.z/2 + b.dimensions.z) a.pose = Pose(stable_a_pos, ZERO_ROT) assert tp.pair_is_stable(b, a) # center the COM of the top object on the positive x edge of the bottom # object, and then move it a tiny bit farther unstable_a_pos = Position(b.dimensions.x/2 - a.com.x + 1e-5, -a.com.y, a.dimensions.z/2 + b.dimensions.z) a.pose = Pose(unstable_a_pos, ZERO_ROT) assert not tp.pair_is_stable(b, a)
def is_robust(orig_tower, n_attempts=10, noise=0.001): """ Perturb each block in the tower by 1mm multiple times and make sure the label does not change. """ tp = TowerPlanner(stability_mode='contains') robust = True tower_vec = np.array( [orig_tower[bx].vectorize() for bx in range(0, len(orig_tower))]) label = tp.tower_is_constructable(orig_tower) for _ in range(n_attempts): tower = tower_vec.copy() tower[:, 7:9] += np.random.randn(2 * tower.shape[0]).reshape( tower.shape[0], 2) * noise block_tower = [ Object.from_vector(tower[kx, :]) for kx in range(tower.shape[0]) ] if tp.tower_is_constructable(block_tower) != label: robust = False return robust
def active(strategy, vis=False): hypotheses = get_all_hypotheses() tp = TowerPlanner(stability_mode='contains') for nx in range(1, MAX_N): # Generate a random set of 5 blocks. blocks = [Object.random(f'obj_{ix}') for ix in range(NUM_BLOCKS)] # Choose a tower to build. if strategy == 'random': num_blocks = np.random.randint(2, NUM_BLOCKS + 1) tower = sample_random_tower(blocks[:num_blocks]) tower = [get_rotated_block(b) for b in tower] tower = [deepcopy(b) for b in tower] elif strategy == 'entropy': tower = find_entropy_tower(blocks, hypotheses) else: raise NotImplementedError() # Check for consistent models. valid_hypotheses = [] for h in hypotheses: true = tp.tower_is_stable(tower) pred = h(tower) if true == pred: valid_hypotheses.append(h) hypotheses = valid_hypotheses # Visualize the chosen tower and print the updated hypothesis list. if vis: TeleportAgent.simulate_tower(tower, vis=True, T=300) print(hypotheses) # Check if true model found. if len(hypotheses) == 1: break return nx
def evaluate_predictions(fname): with open(fname, 'rb') as handle: results = pickle.load(handle) tp = TowerPlanner(stability_mode='contains') # Index this as [stable][cog_stable][pw_stable] for ix, (towers, labels, preds) in enumerate(results): correct = [[[0, 0], [0, 0]], [[0, 0], [0, 0]]] total = [[[0, 0], [0, 0]], [[0, 0], [0, 0]]] # Check the tower stability type. for tower, label, pred in zip(towers, labels, preds): blocks = to_blocks(tower) cog_stable = tp.tower_is_cog_stable(blocks) pw_stable = tp.tower_is_constructible(blocks) stable = tp.tower_is_stable(blocks) if stable != label: print('WAT', stable, label) #assert stable == label total[stable][cog_stable][pw_stable] += 1 if (pred > 0.5) == label: correct[stable][cog_stable][pw_stable] += 1 print(total) print('%d Towers' % (ix + 2)) for stable in [0, 1]: for cog_stable in [0, 1]: for pw_stable in [0, 1]: if ix == 0 and pw_stable != stable: continue acc = correct[stable][cog_stable][pw_stable] / total[ stable][cog_stable][pw_stable] print( 'Stable: %d\tCOG_Stable: %d\tPW_Stable: %d\tAcc: %f' % (stable, cog_stable, pw_stable, acc))
def get_labels(samples, exec_mode, agent, logger, xy_noise, save_tower=False, label_subtowers=False): """ Takes as input a dictionary from the get_subset function. Augment it with stability labels. :param samples: :param exec_mode: str in ['simple-model', 'noisy-model', 'sim', 'real'] :param agent: PandaAgent or None (if exec_mode == 'simple-model' or 'noisy-model') :return: """ labeled_samples = {'%dblock' % k: {} for k in [2, 3, 4, 5]} for k in labeled_samples: labeled_samples[k]['towers'] = [] labeled_samples[k]['block_ids'] = [] labeled_samples[k]['labels'] = [] block_placements = 0 tp = TowerPlanner(stability_mode='contains') for k in samples.keys(): n_towers, n_blocks, _ = samples[k]['towers'].shape labels = np.ones((n_towers, )) for ix in range(0, n_towers): print(f'Collecting tower {ix+1}/{n_towers} for {k} towers...') # Add noise to blocks and convert tower to Block representation. block_tower = [] for jx in range(n_blocks): vec_block = deepcopy(samples[k]['towers'][ix, jx, :]) if exec_mode == 'noisy-model': vec_block[7:9] += np.random.randn(2) * xy_noise block = Object.from_vector( vec_block) # block is already rotated if 'block_ids' in samples[k].keys(): block.name = 'obj_' + str(samples[k]['block_ids'][ix, jx]) block_tower.append(block) # Use tp to check for stability. if exec_mode == 'simple-model' or exec_mode == 'noisy-model': # iterate through each subtower until it falls (is not constructable) subtowers = [ block_tower[:k_sub] for k_sub in list(range(2, len(block_tower) + 1)) ] for k_sub, subtower in enumerate(subtowers, 2): if tp.tower_is_constructable(subtower): label = 1.0 else: label = 0.0 # add to labeled samples labeled_samples['%dblock' % k_sub]['towers'].append( samples[k]['towers'][ix, :k_sub, :]) if 'block_ids' in labeled_samples['%dblock' % k_sub]: labeled_samples['%dblock' % k_sub]['block_ids'].append( samples[k]['block_ids'][ix, :k_sub]) labeled_samples['%dblock' % k_sub]['labels'].append(label) # save tower file if save_tower: if 'block_ids' in samples[k].keys(): logger.save_towers_data( samples[k]['towers'][ix, :k_sub, :], samples[k]['block_ids'][ix, :k_sub], label) else: logger.save_towers_data( samples[k]['towers'][ix, :k_sub, :], None, label) # stop when tower falls if label == 0.0: block_placements += k_sub labels[ix] = 0.0 break else: vis = True success = False real = (exec_mode == 'real') # if planning fails, reset and try again while not success: success, label = agent.simulate_tower(block_tower, vis, real=real) print(f"Received success: {success}, label: {label}") if not success: if real: input( 'Resolve conflict causing planning to fail, then press \ enter to try again.') if isinstance(agent, PandaClientAgent): agent.restart_services() else: # in sim input( 'Should reset sim. Not yet handled. Exit and restart training.' ) labels[ix] = label if 'block_ids' in samples[k].keys(): logger.save_towers_data(samples[k]['towers'][ix, :, :], samples[k]['block_ids'][ix, :], labels[ix]) else: logger.save_towers_data(samples[k]['towers'][ix, :, :], None, labels[ix]) samples[k]['labels'] = labels if save_tower: # save block placement data logger.save_block_placement_data(block_placements) if label_subtowers: # vectorize labeled samples and return for ki, k in enumerate(labeled_samples, 2): if labeled_samples[k]['towers'] == []: labeled_samples[k]['towers'] = np.zeros((0, ki, 21)) labeled_samples[k]['block_ids'] = np.zeros((0, ki)) labeled_samples[k]['labels'] = np.zeros(0) labeled_samples[k]['towers'] = np.array( labeled_samples[k]['towers']) labeled_samples[k]['block_ids'] = np.array( labeled_samples[k]['block_ids']) labeled_samples[k]['labels'] = np.array( labeled_samples[k]['labels']) return labeled_samples else: return samples
def pairwise_stable(tower): tp = TowerPlanner(stability_mode='contains') return tp.tower_is_constructible(tower)
def geometric_stable(tower): tp = TowerPlanner(stability_mode='contains') return tp.tower_is_cog_stable(tower)
def com_stable(tower): tp = TowerPlanner(stability_mode='contains') return tp.tower_is_stable(tower)
def evaluate_planner(logger, blocks, reward_fn, fname, args, save_imgs=False, img_prefix=''): tower_keys = [str(ts)+'block' for ts in args.tower_sizes] tp = TowerPlanner(stability_mode='contains') ep = EnsemblePlanner(logger) # Store regret for towers of each size. regrets = {k: [] for k in tower_keys} rewards = {k: [] for k in tower_keys} if args.max_acquisitions is not None: eval_range = range(0, args.max_acquisitions, 10) elif args.acquisition_step is not None: eval_range = [args.acquisition_step] for tx in eval_range: print('Acquisition step:', tx) ensemble = logger.get_ensemble(tx) if torch.cuda.is_available(): ensemble = ensemble.cuda() for k, size in zip(tower_keys, args.tower_sizes): print('Tower size', k) num_failures, num_pw_failures = 0, 0 curr_regrets = [] curr_rewards = [] for t in range(0, args.n_towers): print('Tower number', t) if blocks is not None: plan_blocks = np.random.choice(blocks, size, replace=False) plan_blocks = copy.deepcopy(plan_blocks) else: plan_blocks = [Object.random() for _ in range(size)] tower, reward, max_reward, tower_block_ids = ep.plan(plan_blocks, ensemble, reward_fn, args, num_blocks=size, n_tower=t) block_tower = [] for vec_block, block_id in zip(tower, tower_block_ids): block = Object.from_vector(vec_block) block.name = 'obj_%d' % block_id block_tower.append(block) # save tower info to /evaluation_towers if args.exec_mode is None: if args.planning_model == 'noisy-model': logger.save_evaluation_tower(block_tower, reward, max_reward, tx, args.planning_model, args.problem, noise=args.plan_xy_noise) else: logger.save_evaluation_tower(block_tower, reward, max_reward, tx, args.planning_model, args.problem) # perturb tower if evaluating with noisy model if args.exec_mode == 'noisy-model': block_tower = [] for vec_block, block_id in zip(tower, tower_block_ids): vec_block[7:9] += np.random.randn(2)*args.exec_xy_noise block = Object.from_vector(vec_block) block.name = 'obj_%d' % block_id block_tower.append(block) # build found tower if args.exec_mode == 'noisy-model' or args.exec_mode == 'simple-model': if not tp.tower_is_constructable(block_tower): reward = 0 num_failures += 1 if tp.tower_is_pairwise_stable(block_tower): num_pw_failures += 1 else: pairs = [] dists = [] for i in range(len(tower) - 1): # check that each pair of blocks is stably individually top = block_tower[i+1] bottom = block_tower[i] if not tp.pair_is_stable(bottom, top): pairs.append(False) else: pairs.append(True) top_rel_pos = np.array(top.pose.pos) - np.array(bottom.pose.pos) top_rel_com = top_rel_pos + top.com dists.append((np.abs(top_rel_com)*2 - bottom.dimensions)[:2]) #print('Pairs:', pairs, dists) #print('PW Stable:', tp.tower_is_pairwise_stable(block_tower)) #print('Global Stable:', tp.tower_is_stable(block_tower)) if False and reward != 0: print(reward, max_reward) w = World(block_tower) env = Environment([w], vis_sim=True, vis_frames=True) input() for tx in range(240): env.step(vis_frames=True) time.sleep(1/240.) env.disconnect() # Note that in general max reward may not be the best possible due to sampling. #ground_truth = np.sum([np.max(b.dimensions) for b in blocks]) #print(max_reward, ground_truth) # Compare heights and calculate regret. regret = (max_reward - reward)/max_reward #print(reward, max_reward) #print(regret) curr_regrets.append(regret) curr_rewards.append(reward) if args.exec_mode == 'noisy-model' or args.exec_mode == 'simple-model': regrets[k].append(curr_regrets) rewards[k].append(curr_rewards) if args.max_acquisitions is not None: if args.exec_mode == 'noisy-model' or args.exec_mode == 'simple-model': with open(logger.get_figure_path(fname+'_regrets.pkl'), 'wb') as handle: pickle.dump(regrets, handle) with open(logger.get_figure_path(fname+'_rewards.pkl'), 'wb') as handle: pickle.dump(rewards, handle) # if just ran for one acquisition step, output final regret and reward if args.acquisition_step is not None: if args.exec_mode == 'noisy-model' or args.exec_mode == 'simple-model': final_median_regret = np.median(regrets[k][0]) final_upper75_regret = np.quantile(regrets[k][0], 0.75) final_lower25_regret = np.quantile(regrets[k][0][0], 0.25) final_median_reward = np.median(rewards[k][0]) final_upper75_reward = np.quantile(rewards[k][0], 0.75) final_lower25_reward = np.quantile(rewards[k][0], 0.25) final_average_regret = np.average(regrets[k][0]) final_std_regret = np.std(regrets[k][0]) final_average_reward = np.average(rewards[k][0]) final_std_reward = np.std(rewards[k][0]) print('Final Median Regret: (%f) %f (%f)' % (final_lower25_regret, final_median_regret, final_upper75_regret)) print('Final Median Reward: (%f) %f (%f)' % (final_lower25_reward, final_median_reward, final_upper75_reward)) print('Final Average Regret: %f +/- %f' % (final_average_regret, final_std_regret)) print('Final Average Reward: %f +/- %f' % (final_average_reward, final_std_reward))
def inspect_2block_towers(logger): """ In the full dataset, show the distribution of features. """ tower_keys = ['2block', '3block', '4block', '5block'] # dataset = logger.load_dataset(logger.args.max_acquisitions - 1) # print(dataset.tower_tensors['2block'].shape) # plt.hist(dataset.tower_tensors['2block'][:,1,8], bins=10) # plt.show() #ensemble = logger.get_ensemble(10) ensemble = logger.get_ensemble(logger.args.max_acquisitions - 1) unlabeled = sample_unlabeled_data(10000) preds = get_predictions(unlabeled, ensemble) bald_scores = bald(preds).numpy() print('Best BALD') ixs = np.argsort(bald_scores)[::-1][:10] print(bald_scores[ixs]) input() tp = TowerPlanner(stability_mode='contains') preds2 = preds[:unlabeled['2block']['towers'].shape[0], :] bald_scores2 = bald_scores[:unlabeled['2block']['towers'].shape[0]] acquire_indices = np.argsort(bald_scores2)[::-1][:50] # for ix in range(preds2.shape[0]): # print(np.around(preds2[ix,:].numpy(), 2), np.around(bald_scores2[ix], 3)) print('-----') for ix in acquire_indices: d = decision_distance(unlabeled['2block']['towers'][ix,:,:]) tower = unlabeled['2block']['towers'][ix,:,:] l = tp.tower_is_constructable([Object.from_vector(tower[bx, :]) for bx in range(tower.shape[0])]) print(np.around(preds2[ix,:].numpy(), 4), np.around(bald_scores2[ix], 3), d, l) for ix in acquire_indices: unlabeled['2block']['towers'][ix,1,7:8] += 0.0 new_preds = get_predictions(unlabeled, ensemble) print('-----') for ix in acquire_indices: d = decision_distance(unlabeled['2block']['towers'][ix,:,:]) print(np.around(new_preds[ix,:].numpy(), 2)) plt.hist(unlabeled['2block']['towers'][acquire_indices,1,0]) plt.show() print('-----') start = 0 for k in tower_keys: end = start + unlabeled[k]['towers'].shape[0] p, b = preds[start:end, :], bald_scores[start:end] informative = b[b > 0.3] print(p.shape, informative.shape) accs = {k: [] for k in tower_keys} with open('learning/data/random_blocks_(x2000)_5blocks_uniform_mass.pkl', 'rb') as handle: val_towers = pickle.load(handle) preds = get_predictions(val_towers, ensemble).mean(1).numpy() dists = [] for ix in range(0, val_towers['2block']['towers'].shape[0]): d = decision_distance(val_towers['2block']['towers'][ix,:,:]) dists.append(d) print(len(dists)) plt.hist(dists, bins=100) plt.show() start = 0 for k in tower_keys: end = start + val_towers[k]['towers'].shape[0] acc = ((preds[start:end]>0.5) == val_towers[k]['labels']).mean() accs[k].append(acc) start = end print(accs)
from block_utils import Object from learning.domains.towers.generate_tower_training_data import sample_random_tower from learning.domains.towers.tower_data import TowerDataset, TowerSampler from tower_planner import TowerPlanner import pickle import copy if __name__ == '__main__': with open('learning/domains/towers/eval_block_set_12.pkl', 'rb') as handle: blocks = pickle.load(handle) tp = TowerPlanner(stability_mode='contains') towers = [] n_stable = 0 for _ in range(0, 10000): copy_blocks = copy.deepcopy(blocks) tower, rotated_tower = sample_random_tower(copy_blocks, num_blocks=5, \ ret_rotated=True, discrete=False) stable = tp.tower_is_constructable(rotated_tower) n_stable += stable print(f"n_stable: {n_stable}")
class Tallest(Problem): def __init__(self, max_height): self.samples_per_block = 5 self.max_height = max_height self.tp = TowerPlanner(stability_mode='contains') def sample_actions(self, parent_node, model, discrete=False): new_towers = [] new_blocks_remaining = [] if len(parent_node['tower']) == 0: # first action: place each possible block at (0,0) at a random orientation for block in parent_node['blocks_remaining']: for _ in range(self.samples_per_block): tower = random_placement(block, [], discrete=discrete) tower[0].pose = Pose(ZERO_POS, tower[0].pose.orn) blocks_remaining = copy(parent_node['blocks_remaining']) blocks_remaining.remove(block) new_towers.append(tower) new_blocks_remaining.append(blocks_remaining) else: # randomly sample a placement of a random block for block in parent_node['blocks_remaining']: blocks_remaining = copy(parent_node['blocks_remaining']) blocks_remaining.remove(block) for _ in range(self.samples_per_block): tower = random_placement(block, parent_node['tower'], discrete=discrete) new_towers.append(tower) new_blocks_remaining.append(blocks_remaining) all_rewards = self.reward_fn(new_towers, model) terms = [False] * len(new_towers) for i, tower in enumerate(new_towers): # rewards of 0 are unstable --> terminal nodes # once max height is reached --> terminal nodes if all_rewards['exp_reward'][i] == 0 or len( tower) == self.max_height: terms[i] = True new_nodes = [] for i, (tower, blocks_remaining, term) in enumerate(zip(new_towers, new_blocks_remaining, terms)): new_node = { 'parent': None, 'children': [], 'term': term, 'leaf': True, 'exp_reward': all_rewards['exp_reward'][i], 'value': 0, 'count': 0, 'tower': tower, 'blocks_remaining': blocks_remaining, 'tower_height': all_rewards['reward'][i], 'ground_truth': all_rewards['ground_truth'][i] } new_nodes.append(new_node) return new_nodes ''' def sample_action(self, parent_node, model, discrete=False): block = np.random.choice(parent_node['blocks_remaining']) if len(parent_node['tower']) == 0: # first action: place block at (0,0) block.pose = Pose(ZERO_POS, block.pose.orn) tower = [block] else: # randomly sample a placement of a random block tower = random_placement(block, parent_node['tower'], discrete=discrete) blocks_remaining = copy(parent_node['blocks_remaining']) blocks_remaining.remove(block) all_rewards = self.reward_fn([tower], model) # rewards of 0 are unstable --> terminal nodes # once max height is reached --> terminal nodes term = False if all_rewards['exp_reward'][0] == 0 or len(tower) == self.max_height: term = True new_node = {'parent': None, 'children': [], 'term': term, 'leaf': True, 'exp_reward': all_rewards['exp_reward'][0], 'value': 0, 'count': 0, 'tower': tower, 'blocks_remaining': blocks_remaining, 'tower_height': all_rewards['reward'][0], 'ground_truth': all_rewards['ground_truth'][0]} return new_node ''' def reward_fn(self, towers, model): all_rewards = {'exp_reward': [], 'reward': [], 'ground_truth': []} if len(towers[0]) == 1: # only single block towers, always stable reward = [tower[0].dimensions[2] for tower in towers] all_rewards['exp_reward'] = reward all_rewards['reward'] = reward all_rewards['ground_truth'] = reward else: tower_loader = make_tower_dataset(towers) # this assumes there is only one batch for tensor, _ in tower_loader: with torch.no_grad(): preds = model.forward(tensor) p_stables = preds.mean(dim=1) # average ensemble output exp_rewards = [] rewards = [] ground_truths = [] for ix, (p, tower) in enumerate(zip(p_stables, towers)): tower_height = np.sum([block.dimensions[2] for block in tower]) rewards += [tower_height] if p > 0.5: # stable exp_rewards += [float(p * tower_height)] else: exp_rewards += [0] ground_truths += [ self.tp.tower_is_constructable(tower) * tower_height ] all_rewards['exp_reward'] = exp_rewards all_rewards['reward'] = rewards all_rewards['ground_truth'] = ground_truths return all_rewards
class EnsemblePlanner: def __init__(self, logger, n_samples=None): self.tower_keys = ['2block', '3block', '4block', '5block'] self.n_samples = { 2: 5000, 3: 10000, 4: 20000, 5: 100000, 6: 250000, 7: 500000 } self.tp = TowerPlanner(stability_mode='contains') self.logger = logger self.using_cache = False def get_cached_towers(self, args, num_blocks, blocks, n_tower): key = '%dblock' % num_blocks if not self.using_cache: if args.block_set_fname == '': cache_name = 'random_set.pkl' else: cache_name = os.path.basename(args.block_set_fname) # see if cache for this block set exists try: with open( os.path.join('learning/evaluate/cached_towers', cache_name), 'rb') as handle: block_set_towers = pickle.load(handle) except: return None, None # if it cached file exists, save it self.cached_towers = block_set_towers if (key in block_set_towers) and \ (n_tower in block_set_towers[key]) and \ (len(block_set_towers[key][n_tower][0]) >= self.n_samples[num_blocks]): print('Using random planning towers from cache for tower %d' % n_tower) self.using_cache = True return self.cached_towers[key][n_tower][:self. n_samples[num_blocks]] else: return None, None # if already loaded cache, get relevant towers else: try: towers, block_ids = self.cached_towers[key][ n_tower][:self.n_samples[num_blocks]] except: return None, None print( 'Using saved random planning towers from cache for tower %d' % n_tower) return towers, block_ids # NOTE: this will not cache towers if some towers have already previously been found in the cache def cache_towers(self, args, towers, tower_block_ids, n_tower): if not self.using_cache: if args.block_set_fname == '': cache_name = 'random_set.pkl' else: cache_name = os.path.basename(args.block_set_fname) num_blocks = len(towers[0]) # see if cache exists try: with open( os.path.join('learning/evaluate/cached_towers', cache_name), 'rb') as handle: block_set_towers = pickle.load(handle) except: block_set_towers = {} if '%dblock' % num_blocks not in block_set_towers: block_set_towers['%dblock' % num_blocks] = {} print('Saving randomly generated towers for tower %d' % n_tower) block_set_towers['%dblock' % num_blocks][n_tower] = (towers, tower_block_ids) with open( os.path.join('learning/evaluate/cached_towers', cache_name), 'wb') as handle: pickle.dump(block_set_towers, handle) def generate_candidate_towers(self, blocks, args, num_blocks=None, n_tower=None): if num_blocks is None: num_blocks = len(blocks) #tower_vectors, tower_block_ids = self.get_cached_towers(args, num_blocks, blocks, n_tower) tower_vectors = None if tower_vectors is None: tower_vectors = [] tower_block_ids = [] for _ in range(0, self.n_samples[num_blocks]): tower, rotated_tower = sample_random_tower(blocks, num_blocks=num_blocks, \ ret_rotated=True, discrete=args.discrete) tower_vectors.append([b.vectorize() for b in rotated_tower]) tower_block_ids.append([b.get_id() for b in rotated_tower]) #self.cache_towers(args, tower_vectors, tower_block_ids, n_tower) return tower_vectors, tower_block_ids def plan(self, blocks, ensemble, reward_fn, args, num_blocks=None, n_tower=None): #n = len(blocks) #max_height = 0 #max_tower = [] # Step (1): Build dataset of potential towers. tower_vectors, tower_block_ids = self.generate_candidate_towers( blocks, args, num_blocks, n_tower) # Step (2): Get predictions for each tower. towers = np.array(tower_vectors) block_ids = np.array(tower_block_ids) if args.planning_model == 'learned': # Since we are only planning for towers of a single size, # always use the '2block' key for simplicity. The rest currently # need at least some data for the code to work. labels = np.zeros((towers.shape[0], )) tower_dict = {} for k in self.tower_keys: tower_dict[k] = {} if k == '2block': tower_dict[k]['towers'] = towers tower_dict[k]['labels'] = labels tower_dict[k]['block_ids'] = block_ids else: tower_dict[k]['towers'] = towers[:5, ...] tower_dict[k]['labels'] = labels[:5] tower_dict[k]['block_ids'] = block_ids[:5, ...] tower_dataset = TowerDataset(tower_dict, augment=False) tower_sampler = TowerSampler(dataset=tower_dataset, batch_size=64, shuffle=False) tower_loader = DataLoader(dataset=tower_dataset, batch_sampler=tower_sampler) preds = [] if hasattr(self.logger.args, 'sampler') and self.logger.args.sampler == 'sequential': for tensor, _ in tower_loader: sub_tower_preds = [] for n_blocks in range(2, tensor.shape[1] + 1): if torch.cuda.is_available(): tensor = tensor.cuda() with torch.no_grad(): sub_tower_preds.append( ensemble.forward(tensor[:, :n_blocks, :])) sub_tower_preds = torch.stack(sub_tower_preds, dim=0) preds.append(sub_tower_preds.prod(dim=0)) else: for tensor, _ in tower_loader: if torch.cuda.is_available(): tensor = tensor.cuda() with torch.no_grad(): preds.append(ensemble.forward(tensor)) p_stables = torch.cat(preds, dim=0).mean(dim=1) elif args.planning_model == 'noisy-model': n_estimate = 10 p_stables = np.zeros(len(tower_vectors)) for ti, tower_vec in enumerate(tower_vectors): # estimate prob of constructability results = np.ones(n_estimate) all_stable = 1. for n in range(n_estimate): noisy_tower = [] for block_vec in tower_vec: noisy_block = deepcopy(block_vec) noisy_block[7:9] += np.random.randn( 2) * args.plan_xy_noise noisy_tower.append(noisy_block) block_tower = [ Object.from_vector(block) for block in noisy_tower ] if not self.tp.tower_is_constructable(block_tower): results[n] = 0. all_stable = 0. break p_stables[ti] = all_stable # np.mean(results) elif args.planning_model == 'simple-model': p_stables = np.zeros(len(tower_vectors)) for ti, tower_vec in enumerate(tower_vectors): block_tower = [ Object.from_vector(block) for block in tower_vec ] if self.tp.tower_is_constructable(block_tower): p_stables[ti] = 1. # Step (3): Find the tallest tower of a given height. max_reward, max_exp_reward, max_tower, max_stable = -100, -100, None, 0 ground_truth = -100 max_reward_block_ids = None for ix, (p, tower, tower_block_ids) in enumerate( zip(p_stables, towers, block_ids)): reward = reward_fn(tower) exp_reward = p * reward if exp_reward >= max_exp_reward: # and p > 0.5: if exp_reward > max_exp_reward or (exp_reward == max_exp_reward and p > max_stable): max_tower = tower_vectors[ix] max_reward = reward max_exp_reward = exp_reward max_stable = p max_reward_block_ids = tower_block_ids # Check ground truth stability to find maximum reward. if self.tp.tower_is_constructable([Object.from_vector(tower[ix,:]) for ix in range(tower.shape[0])]) \ and reward > ground_truth: ground_truth = reward if max_tower is None: print('None Found') max_tower = tower_vectors[0] max_reward = reward_fn(towers[0]) return max_tower, max_reward, ground_truth, max_reward_block_ids
def __init__(self, max_height): self.samples_per_block = 5 self.max_height = max_height self.tp = TowerPlanner(stability_mode='contains')