Ejemplo n.º 1
0
def plot_constructability_over_time(logger):
    tower_keys = ['2block', '3block', '4block', '5block']
    tallest_stable_over_time = np.zeros((logger.args.max_acquisitions, 5))
    tp = TowerPlanner(stability_mode='contains')
    for tx in range(logger.args.max_acquisitions):
        acquired_data, _ = logger.load_acquisition_data(tx)

        # For each tower, figure out when it fell over.
        for kx, k in enumerate(tower_keys):
            towers = acquired_data[k]['towers']
            for ix in range(0, towers.shape[0]):
                height = 1
                for top_id in range(1, towers.shape[1]):
                    block_tower = [Object.from_vector(towers[ix, bx, :]) for bx in range(0, top_id+1)]
                    if not tp.tower_is_constructable(block_tower):
                        break
                    height += 1
                tallest_stable_over_time[tx, height-1] += 1

    max_x = 40 + 10*logger.args.max_acquisitions
    xs = np.arange(40, max_x, 10)

    w = 10
    plt.figure(figsize=(20, 10))
    plt.bar(xs, tallest_stable_over_time[:, 0], width=w, label=1)
    for kx in range(1, 5):
        plt.bar(xs, tallest_stable_over_time[:, kx], bottom=np.sum(tallest_stable_over_time[:, :kx], axis=1), width=w, label=kx+1)
    
    plt.xlabel('Acquisition Step')
    plt.ylabel('Height of tallest stable subtower')
    plt.legend()
    plt.savefig(logger.get_figure_path('tallest_breakdown.png'))
Ejemplo n.º 2
0
def is_robust(orig_tower, n_attempts=10, noise=0.001):
    """ Perturb each block in the tower by 1mm multiple times and make sure the label does not change. """
    tp = TowerPlanner(stability_mode='contains')
    robust = True
    tower_vec = np.array(
        [orig_tower[bx].vectorize() for bx in range(0, len(orig_tower))])
    label = tp.tower_is_constructable(orig_tower)
    for _ in range(n_attempts):
        tower = tower_vec.copy()
        tower[:, 7:9] += np.random.randn(2 * tower.shape[0]).reshape(
            tower.shape[0], 2) * noise

        block_tower = [
            Object.from_vector(tower[kx, :]) for kx in range(tower.shape[0])
        ]

        if tp.tower_is_constructable(block_tower) != label:
            robust = False

    return robust
Ejemplo n.º 3
0
def inspect_validation_set(fname):
    with open(fname, 'rb') as handle:
        val_towers = pickle.load(handle)

    tp = TowerPlanner(stability_mode='contains')
    
    towers = val_towers['5block']['towers']
    labels = val_towers['5block']['labels']

    # Check how may towers fall over at a lower block.
    for tower_vec, label in zip(towers, labels):
        if label == 0:
            tower = [Object.from_vector(tower_vec[bx, :]) for bx in range(0, tower_vec.shape[0])]
            print(tp.tower_is_constructable(tower[:1]))
Ejemplo n.º 4
0
def check_validation_robustness(noise=0.001, n_attempts=10):
    """
    Try adding noise to the placement of each block in the validation set
    to see how many of the towers are robust to placement noise.
    """
    with open('learning/data/validation_towers_robust.pkl', 'rb') as handle:
    #with open('learning/data/random_blocks_(x2000)_5blocks_uniform_mass.pkl', 'rb') as handle:
        val_towers = pickle.load(handle)
    robust = {k: 0 for k in val_towers.keys()}
    tp = TowerPlanner(stability_mode='contains')
    stable_towers = copy.deepcopy(val_towers)
    unstable_towers = copy.deepcopy(val_towers)
    for k in robust.keys():
        stable_indices = []
        unstable_indices = []
        for ix in range(0, val_towers[k]['towers'].shape[0]):
            stable = True
            if val_towers[k]['labels'][ix] == 0:
                continue

            for _ in range(n_attempts):
                tower = val_towers[k]['towers'][ix, :, :].copy()
                label = val_towers[k]['labels'][ix]
                tower[:, 7:9] += np.random.randn(2*tower.shape[0]).reshape(tower.shape[0], 2)*noise

                block_tower = [Object.from_vector(tower[kx, :]) for kx in range(tower.shape[0])]

                if tp.tower_is_constructable(block_tower) != label:
                    stable = False

            if stable:
                robust[k] += 1
                stable_indices.append(ix)
            else:
                unstable_indices.append(ix)
        
        stable_towers[k]['towers'] = stable_towers[k]['towers'][stable_indices,...]
        stable_towers[k]['labels'] = stable_towers[k]['labels'][stable_indices]
        
        unstable_towers[k]['towers'] = unstable_towers[k]['towers'][unstable_indices,...]
        unstable_towers[k]['labels'] = unstable_towers[k]['labels'][unstable_indices]
        
        # with open('learning/data/stable_val.pkl', 'wb') as handle:
        #     pickle.dump(stable_towers, handle)

        # with open('learning/data/unstable_val.pkl', 'wb') as handle:
        #     pickle.dump(unstable_towers, handle)
        
        print(k, ':', robust[k], '/', val_towers[k]['towers'].shape[0] )
Ejemplo n.º 5
0
def check_stable_bases(logger):
    tower_keys = ['2block', '3block', '4block', '5block']
    tp = TowerPlanner(stability_mode='contains')
    for tx in range(0, 80):
        print(tx)
        towers, _ = logger.load_acquisition_data(tx)

        for k in tower_keys:
            print(k)
            for tower_vec in towers[k]['towers']:
                tower = [Object.from_vector(tower_vec[bx, :]) for bx in range(0, tower_vec.shape[0])]
                if tp.tower_is_constructable(tower[:-1]):
                    print('Stable Base')
                else:
                    print('Unstable Base')
Ejemplo n.º 6
0
def get_labels(samples,
               exec_mode,
               agent,
               logger,
               xy_noise,
               save_tower=False,
               label_subtowers=False):
    """ Takes as input a dictionary from the get_subset function. 
    Augment it with stability labels. 
    :param samples:
    :param exec_mode: str in ['simple-model', 'noisy-model', 'sim', 'real']
    :param agent: PandaAgent or None (if exec_mode == 'simple-model' or 'noisy-model')
    :return:
    """
    labeled_samples = {'%dblock' % k: {} for k in [2, 3, 4, 5]}
    for k in labeled_samples:
        labeled_samples[k]['towers'] = []
        labeled_samples[k]['block_ids'] = []
        labeled_samples[k]['labels'] = []

    block_placements = 0

    tp = TowerPlanner(stability_mode='contains')
    for k in samples.keys():
        n_towers, n_blocks, _ = samples[k]['towers'].shape
        labels = np.ones((n_towers, ))

        for ix in range(0, n_towers):
            print(f'Collecting tower {ix+1}/{n_towers} for {k} towers...')
            # Add noise to blocks and convert tower to Block representation.
            block_tower = []
            for jx in range(n_blocks):
                vec_block = deepcopy(samples[k]['towers'][ix, jx, :])
                if exec_mode == 'noisy-model':
                    vec_block[7:9] += np.random.randn(2) * xy_noise
                block = Object.from_vector(
                    vec_block)  # block is already rotated
                if 'block_ids' in samples[k].keys():
                    block.name = 'obj_' + str(samples[k]['block_ids'][ix, jx])
                block_tower.append(block)
            #  Use tp to check for stability.
            if exec_mode == 'simple-model' or exec_mode == 'noisy-model':

                # iterate through each subtower until it falls (is not constructable)
                subtowers = [
                    block_tower[:k_sub]
                    for k_sub in list(range(2,
                                            len(block_tower) + 1))
                ]
                for k_sub, subtower in enumerate(subtowers, 2):
                    if tp.tower_is_constructable(subtower):
                        label = 1.0
                    else:
                        label = 0.0

                    # add to labeled samples
                    labeled_samples['%dblock' % k_sub]['towers'].append(
                        samples[k]['towers'][ix, :k_sub, :])
                    if 'block_ids' in labeled_samples['%dblock' % k_sub]:
                        labeled_samples['%dblock' % k_sub]['block_ids'].append(
                            samples[k]['block_ids'][ix, :k_sub])
                    labeled_samples['%dblock' % k_sub]['labels'].append(label)

                    # save tower file
                    if save_tower:
                        if 'block_ids' in samples[k].keys():
                            logger.save_towers_data(
                                samples[k]['towers'][ix, :k_sub, :],
                                samples[k]['block_ids'][ix, :k_sub], label)
                        else:
                            logger.save_towers_data(
                                samples[k]['towers'][ix, :k_sub, :], None,
                                label)
                    # stop when tower falls
                    if label == 0.0:
                        block_placements += k_sub
                        labels[ix] = 0.0
                        break
            else:
                vis = True
                success = False
                real = (exec_mode == 'real')
                # if planning fails, reset and try again
                while not success:
                    success, label = agent.simulate_tower(block_tower,
                                                          vis,
                                                          real=real)
                    print(f"Received success: {success}, label: {label}")
                    if not success:
                        if real:
                            input(
                                'Resolve conflict causing planning to fail, then press \
                                    enter to try again.')
                            if isinstance(agent, PandaClientAgent):
                                agent.restart_services()
                        else:  # in sim
                            input(
                                'Should reset sim. Not yet handled. Exit and restart training.'
                            )
                labels[ix] = label
                if 'block_ids' in samples[k].keys():
                    logger.save_towers_data(samples[k]['towers'][ix, :, :],
                                            samples[k]['block_ids'][ix, :],
                                            labels[ix])
                else:
                    logger.save_towers_data(samples[k]['towers'][ix, :, :],
                                            None, labels[ix])
        samples[k]['labels'] = labels

    if save_tower:
        # save block placement data
        logger.save_block_placement_data(block_placements)

    if label_subtowers:
        # vectorize labeled samples and return
        for ki, k in enumerate(labeled_samples, 2):
            if labeled_samples[k]['towers'] == []:
                labeled_samples[k]['towers'] = np.zeros((0, ki, 21))
                labeled_samples[k]['block_ids'] = np.zeros((0, ki))
                labeled_samples[k]['labels'] = np.zeros(0)
            labeled_samples[k]['towers'] = np.array(
                labeled_samples[k]['towers'])
            labeled_samples[k]['block_ids'] = np.array(
                labeled_samples[k]['block_ids'])
            labeled_samples[k]['labels'] = np.array(
                labeled_samples[k]['labels'])
        return labeled_samples
    else:
        return samples
Ejemplo n.º 7
0
def evaluate_planner(logger, blocks, reward_fn, fname, args, save_imgs=False, img_prefix=''):
    tower_keys = [str(ts)+'block' for ts in args.tower_sizes]
    tp = TowerPlanner(stability_mode='contains')
    ep = EnsemblePlanner(logger)

    # Store regret for towers of each size.
    regrets = {k: [] for k in tower_keys}
    rewards = {k: [] for k in tower_keys}

    if args.max_acquisitions is not None: 
        eval_range = range(0, args.max_acquisitions, 10)
    elif args.acquisition_step is not None: 
        eval_range = [args.acquisition_step]
    
    for tx in eval_range:
        print('Acquisition step:', tx)

        ensemble = logger.get_ensemble(tx)
        if torch.cuda.is_available():
            ensemble = ensemble.cuda()
            
        for k, size in zip(tower_keys, args.tower_sizes):
            print('Tower size', k)
            num_failures, num_pw_failures = 0, 0
            curr_regrets = []
            curr_rewards = []
            for t in range(0, args.n_towers):
                print('Tower number', t)
                
                if blocks is not None:
                    plan_blocks = np.random.choice(blocks, size, replace=False)	
                    plan_blocks = copy.deepcopy(plan_blocks)	
                else:
                    plan_blocks = [Object.random() for _ in range(size)]
                    
                tower, reward, max_reward, tower_block_ids = ep.plan(plan_blocks, 
                                                                ensemble, 
                                                                reward_fn,
                                                                args,
                                                                num_blocks=size,
                                                                n_tower=t)
                                
                block_tower = []
                for vec_block, block_id in zip(tower, tower_block_ids):
                    block = Object.from_vector(vec_block)
                    block.name = 'obj_%d' % block_id
                    block_tower.append(block)         
                                           
                # save tower info to /evaluation_towers
                if args.exec_mode is None:
                    if args.planning_model == 'noisy-model':
                        logger.save_evaluation_tower(block_tower, reward, max_reward, tx, args.planning_model, args.problem, noise=args.plan_xy_noise)
                    else:
                        logger.save_evaluation_tower(block_tower, reward, max_reward, tx, args.planning_model, args.problem)

                # perturb tower if evaluating with noisy model
                if args.exec_mode == 'noisy-model':
                    block_tower = []
                    for vec_block, block_id in zip(tower, tower_block_ids):
                        vec_block[7:9] += np.random.randn(2)*args.exec_xy_noise
                        block = Object.from_vector(vec_block)
                        block.name = 'obj_%d' % block_id
                        block_tower.append(block)     
    
                # build found tower
                if args.exec_mode == 'noisy-model' or args.exec_mode == 'simple-model':
                    if not tp.tower_is_constructable(block_tower):
                        reward = 0
                        num_failures += 1
                        if tp.tower_is_pairwise_stable(block_tower):
                            num_pw_failures += 1
                        else:
                            pairs = []
                            dists = []
                            for i in range(len(tower) - 1):
                                # check that each pair of blocks is stably individually
                                top = block_tower[i+1]
                                bottom = block_tower[i]
                                if not tp.pair_is_stable(bottom, top): 
                                    pairs.append(False)
                                else:
                                    pairs.append(True)
                                top_rel_pos = np.array(top.pose.pos) - np.array(bottom.pose.pos)
                                top_rel_com = top_rel_pos + top.com
                                dists.append((np.abs(top_rel_com)*2 - bottom.dimensions)[:2])
                            #print('Pairs:', pairs, dists)
                            
                    #print('PW Stable:', tp.tower_is_pairwise_stable(block_tower))
                    #print('Global Stable:', tp.tower_is_stable(block_tower))
                    
                    if False and reward != 0:
                        print(reward, max_reward)
                        w = World(block_tower)
                        env = Environment([w], vis_sim=True, vis_frames=True)
                        input()
                        for tx in range(240):
                            env.step(vis_frames=True)
                            time.sleep(1/240.)
                        env.disconnect()
                
                    # Note that in general max reward may not be the best possible due to sampling.
                    #ground_truth = np.sum([np.max(b.dimensions) for b in blocks])
                    #print(max_reward, ground_truth)

                    # Compare heights and calculate regret.
                    regret = (max_reward - reward)/max_reward
                    #print(reward, max_reward)
                    #print(regret)
                    curr_regrets.append(regret)
                    curr_rewards.append(reward)

            if args.exec_mode == 'noisy-model' or args.exec_mode == 'simple-model':
                regrets[k].append(curr_regrets)
                rewards[k].append(curr_rewards)

        if args.max_acquisitions is not None:
            if args.exec_mode == 'noisy-model' or args.exec_mode == 'simple-model':
                with open(logger.get_figure_path(fname+'_regrets.pkl'), 'wb') as handle:
                    pickle.dump(regrets, handle)
                    
                with open(logger.get_figure_path(fname+'_rewards.pkl'), 'wb') as handle:
                    pickle.dump(rewards, handle)
            
    # if just ran for one acquisition step, output final regret and reward
    if args.acquisition_step is not None:
        if args.exec_mode == 'noisy-model' or args.exec_mode == 'simple-model':
            final_median_regret = np.median(regrets[k][0])
            final_upper75_regret = np.quantile(regrets[k][0], 0.75)
            final_lower25_regret = np.quantile(regrets[k][0][0], 0.25)
            
            final_median_reward = np.median(rewards[k][0])
            final_upper75_reward = np.quantile(rewards[k][0], 0.75)
            final_lower25_reward = np.quantile(rewards[k][0], 0.25)
            
            final_average_regret = np.average(regrets[k][0])
            final_std_regret = np.std(regrets[k][0])
            
            final_average_reward = np.average(rewards[k][0])
            final_std_reward = np.std(rewards[k][0])
            
            print('Final Median Regret: (%f) %f (%f)' % (final_lower25_regret, final_median_regret, final_upper75_regret))
            print('Final Median Reward: (%f) %f (%f)' % (final_lower25_reward, final_median_reward, final_upper75_reward))
            
            print('Final Average Regret: %f +/- %f' % (final_average_regret, final_std_regret))
            print('Final Average Reward: %f +/- %f' % (final_average_reward, final_std_reward))
Ejemplo n.º 8
0
def inspect_2block_towers(logger):
    """
    In the full dataset, show the distribution of features.
    """
    tower_keys = ['2block', '3block', '4block', '5block']

    # dataset = logger.load_dataset(logger.args.max_acquisitions - 1)
    # print(dataset.tower_tensors['2block'].shape)
    # plt.hist(dataset.tower_tensors['2block'][:,1,8], bins=10)
    # plt.show()

    #ensemble = logger.get_ensemble(10)
    ensemble = logger.get_ensemble(logger.args.max_acquisitions - 1)
    unlabeled = sample_unlabeled_data(10000)
    preds = get_predictions(unlabeled, ensemble)
    bald_scores = bald(preds).numpy()
    print('Best BALD')
    ixs = np.argsort(bald_scores)[::-1][:10]
    print(bald_scores[ixs])
    input()
    tp = TowerPlanner(stability_mode='contains')
    preds2 = preds[:unlabeled['2block']['towers'].shape[0], :]
    bald_scores2 = bald_scores[:unlabeled['2block']['towers'].shape[0]]
    acquire_indices = np.argsort(bald_scores2)[::-1][:50]
    # for ix in range(preds2.shape[0]):
    #     print(np.around(preds2[ix,:].numpy(), 2), np.around(bald_scores2[ix], 3))
    print('-----')
    for ix in acquire_indices:
        d = decision_distance(unlabeled['2block']['towers'][ix,:,:])
        tower = unlabeled['2block']['towers'][ix,:,:]
        l = tp.tower_is_constructable([Object.from_vector(tower[bx, :]) for bx in range(tower.shape[0])])
        print(np.around(preds2[ix,:].numpy(), 4), np.around(bald_scores2[ix], 3), d, l)

    for ix in acquire_indices:
        unlabeled['2block']['towers'][ix,1,7:8] += 0.0
    new_preds = get_predictions(unlabeled, ensemble)
    print('-----')
    for ix in acquire_indices:
        d = decision_distance(unlabeled['2block']['towers'][ix,:,:])
        print(np.around(new_preds[ix,:].numpy(), 2))
    plt.hist(unlabeled['2block']['towers'][acquire_indices,1,0])
    plt.show()
    print('-----')
    start = 0
    for k in tower_keys:
        end = start + unlabeled[k]['towers'].shape[0]
        p, b = preds[start:end, :], bald_scores[start:end]
        informative = b[b > 0.3]
        print(p.shape, informative.shape)

    accs = {k: [] for k in tower_keys}
    with open('learning/data/random_blocks_(x2000)_5blocks_uniform_mass.pkl', 'rb') as handle:
        val_towers = pickle.load(handle)

    preds = get_predictions(val_towers, ensemble).mean(1).numpy()
    dists = []
    for ix in range(0, val_towers['2block']['towers'].shape[0]):
        d = decision_distance(val_towers['2block']['towers'][ix,:,:])
        dists.append(d)
    print(len(dists))
    plt.hist(dists, bins=100)
    plt.show()

    start = 0
    for k in tower_keys:
        end = start + val_towers[k]['towers'].shape[0]
        acc = ((preds[start:end]>0.5) == val_towers[k]['labels']).mean()
        accs[k].append(acc)
        start = end
    print(accs)
Ejemplo n.º 9
0
class EnsemblePlanner:
    def __init__(self, logger, n_samples=None):
        self.tower_keys = ['2block', '3block', '4block', '5block']
        self.n_samples = {
            2: 5000,
            3: 10000,
            4: 20000,
            5: 100000,
            6: 250000,
            7: 500000
        }
        self.tp = TowerPlanner(stability_mode='contains')
        self.logger = logger
        self.using_cache = False

    def get_cached_towers(self, args, num_blocks, blocks, n_tower):
        key = '%dblock' % num_blocks
        if not self.using_cache:
            if args.block_set_fname == '':
                cache_name = 'random_set.pkl'
            else:
                cache_name = os.path.basename(args.block_set_fname)

            # see if cache for this block set exists
            try:
                with open(
                        os.path.join('learning/evaluate/cached_towers',
                                     cache_name), 'rb') as handle:
                    block_set_towers = pickle.load(handle)
            except:
                return None, None

            # if it cached file exists, save it
            self.cached_towers = block_set_towers
            if (key in block_set_towers) and \
              (n_tower in block_set_towers[key]) and \
              (len(block_set_towers[key][n_tower][0]) >= self.n_samples[num_blocks]):
                print('Using random planning towers from cache for tower %d' %
                      n_tower)
                self.using_cache = True
                return self.cached_towers[key][n_tower][:self.
                                                        n_samples[num_blocks]]
            else:
                return None, None
        # if already loaded cache, get relevant towers
        else:
            try:
                towers, block_ids = self.cached_towers[key][
                    n_tower][:self.n_samples[num_blocks]]
            except:
                return None, None
            print(
                'Using saved random planning towers from cache for tower %d' %
                n_tower)
            return towers, block_ids

    # NOTE: this will not cache towers if some towers have already previously been found in the cache
    def cache_towers(self, args, towers, tower_block_ids, n_tower):
        if not self.using_cache:
            if args.block_set_fname == '':
                cache_name = 'random_set.pkl'
            else:
                cache_name = os.path.basename(args.block_set_fname)

            num_blocks = len(towers[0])
            # see if cache exists
            try:
                with open(
                        os.path.join('learning/evaluate/cached_towers',
                                     cache_name), 'rb') as handle:
                    block_set_towers = pickle.load(handle)
            except:
                block_set_towers = {}
            if '%dblock' % num_blocks not in block_set_towers:
                block_set_towers['%dblock' % num_blocks] = {}
            print('Saving randomly generated towers for tower %d' % n_tower)
            block_set_towers['%dblock' %
                             num_blocks][n_tower] = (towers, tower_block_ids)

            with open(
                    os.path.join('learning/evaluate/cached_towers',
                                 cache_name), 'wb') as handle:
                pickle.dump(block_set_towers, handle)

    def generate_candidate_towers(self,
                                  blocks,
                                  args,
                                  num_blocks=None,
                                  n_tower=None):
        if num_blocks is None:
            num_blocks = len(blocks)
        #tower_vectors, tower_block_ids = self.get_cached_towers(args, num_blocks, blocks, n_tower)
        tower_vectors = None
        if tower_vectors is None:
            tower_vectors = []
            tower_block_ids = []
            for _ in range(0, self.n_samples[num_blocks]):
                tower, rotated_tower = sample_random_tower(blocks, num_blocks=num_blocks, \
                                            ret_rotated=True, discrete=args.discrete)
                tower_vectors.append([b.vectorize() for b in rotated_tower])
                tower_block_ids.append([b.get_id() for b in rotated_tower])
            #self.cache_towers(args, tower_vectors, tower_block_ids, n_tower)
        return tower_vectors, tower_block_ids

    def plan(self,
             blocks,
             ensemble,
             reward_fn,
             args,
             num_blocks=None,
             n_tower=None):
        #n = len(blocks)
        #max_height = 0
        #max_tower = []

        # Step (1): Build dataset of potential towers.
        tower_vectors, tower_block_ids = self.generate_candidate_towers(
            blocks, args, num_blocks, n_tower)

        # Step (2): Get predictions for each tower.
        towers = np.array(tower_vectors)
        block_ids = np.array(tower_block_ids)
        if args.planning_model == 'learned':
            # Since we are only planning for towers of a single size,
            # always use the '2block' key for simplicity. The rest currently
            # need at least some data for the code to work.
            labels = np.zeros((towers.shape[0], ))
            tower_dict = {}
            for k in self.tower_keys:
                tower_dict[k] = {}
                if k == '2block':
                    tower_dict[k]['towers'] = towers
                    tower_dict[k]['labels'] = labels
                    tower_dict[k]['block_ids'] = block_ids
                else:
                    tower_dict[k]['towers'] = towers[:5, ...]
                    tower_dict[k]['labels'] = labels[:5]
                    tower_dict[k]['block_ids'] = block_ids[:5, ...]

            tower_dataset = TowerDataset(tower_dict, augment=False)
            tower_sampler = TowerSampler(dataset=tower_dataset,
                                         batch_size=64,
                                         shuffle=False)
            tower_loader = DataLoader(dataset=tower_dataset,
                                      batch_sampler=tower_sampler)
            preds = []
            if hasattr(self.logger.args,
                       'sampler') and self.logger.args.sampler == 'sequential':
                for tensor, _ in tower_loader:
                    sub_tower_preds = []
                    for n_blocks in range(2, tensor.shape[1] + 1):
                        if torch.cuda.is_available():
                            tensor = tensor.cuda()
                        with torch.no_grad():
                            sub_tower_preds.append(
                                ensemble.forward(tensor[:, :n_blocks, :]))
                    sub_tower_preds = torch.stack(sub_tower_preds, dim=0)
                    preds.append(sub_tower_preds.prod(dim=0))
            else:
                for tensor, _ in tower_loader:
                    if torch.cuda.is_available():
                        tensor = tensor.cuda()
                    with torch.no_grad():
                        preds.append(ensemble.forward(tensor))

            p_stables = torch.cat(preds, dim=0).mean(dim=1)

        elif args.planning_model == 'noisy-model':
            n_estimate = 10
            p_stables = np.zeros(len(tower_vectors))
            for ti, tower_vec in enumerate(tower_vectors):
                # estimate prob of constructability
                results = np.ones(n_estimate)
                all_stable = 1.
                for n in range(n_estimate):
                    noisy_tower = []
                    for block_vec in tower_vec:
                        noisy_block = deepcopy(block_vec)
                        noisy_block[7:9] += np.random.randn(
                            2) * args.plan_xy_noise
                        noisy_tower.append(noisy_block)
                    block_tower = [
                        Object.from_vector(block) for block in noisy_tower
                    ]
                    if not self.tp.tower_is_constructable(block_tower):
                        results[n] = 0.
                        all_stable = 0.
                        break
                p_stables[ti] = all_stable  # np.mean(results)

        elif args.planning_model == 'simple-model':
            p_stables = np.zeros(len(tower_vectors))
            for ti, tower_vec in enumerate(tower_vectors):
                block_tower = [
                    Object.from_vector(block) for block in tower_vec
                ]
                if self.tp.tower_is_constructable(block_tower):
                    p_stables[ti] = 1.

        # Step (3): Find the tallest tower of a given height.
        max_reward, max_exp_reward, max_tower, max_stable = -100, -100, None, 0
        ground_truth = -100
        max_reward_block_ids = None
        for ix, (p, tower, tower_block_ids) in enumerate(
                zip(p_stables, towers, block_ids)):
            reward = reward_fn(tower)
            exp_reward = p * reward
            if exp_reward >= max_exp_reward:  # and p > 0.5:
                if exp_reward > max_exp_reward or (exp_reward == max_exp_reward
                                                   and p > max_stable):
                    max_tower = tower_vectors[ix]
                    max_reward = reward
                    max_exp_reward = exp_reward
                    max_stable = p
                    max_reward_block_ids = tower_block_ids

            # Check ground truth stability to find maximum reward.
            if self.tp.tower_is_constructable([Object.from_vector(tower[ix,:]) for ix in range(tower.shape[0])]) \
                and reward > ground_truth:
                ground_truth = reward

        if max_tower is None:
            print('None Found')
            max_tower = tower_vectors[0]
            max_reward = reward_fn(towers[0])

        return max_tower, max_reward, ground_truth, max_reward_block_ids
Ejemplo n.º 10
0
class Tallest(Problem):
    def __init__(self, max_height):
        self.samples_per_block = 5
        self.max_height = max_height
        self.tp = TowerPlanner(stability_mode='contains')

    def sample_actions(self, parent_node, model, discrete=False):
        new_towers = []
        new_blocks_remaining = []
        if len(parent_node['tower']) == 0:
            # first action: place each possible block at (0,0) at a random orientation
            for block in parent_node['blocks_remaining']:
                for _ in range(self.samples_per_block):
                    tower = random_placement(block, [], discrete=discrete)
                    tower[0].pose = Pose(ZERO_POS, tower[0].pose.orn)
                    blocks_remaining = copy(parent_node['blocks_remaining'])
                    blocks_remaining.remove(block)
                    new_towers.append(tower)
                    new_blocks_remaining.append(blocks_remaining)
        else:
            # randomly sample a placement of a random block
            for block in parent_node['blocks_remaining']:
                blocks_remaining = copy(parent_node['blocks_remaining'])
                blocks_remaining.remove(block)
                for _ in range(self.samples_per_block):
                    tower = random_placement(block,
                                             parent_node['tower'],
                                             discrete=discrete)
                    new_towers.append(tower)
                    new_blocks_remaining.append(blocks_remaining)

        all_rewards = self.reward_fn(new_towers, model)

        terms = [False] * len(new_towers)
        for i, tower in enumerate(new_towers):
            # rewards of 0 are unstable --> terminal nodes
            # once max height is reached --> terminal nodes
            if all_rewards['exp_reward'][i] == 0 or len(
                    tower) == self.max_height:
                terms[i] = True

        new_nodes = []
        for i, (tower, blocks_remaining,
                term) in enumerate(zip(new_towers, new_blocks_remaining,
                                       terms)):
            new_node = {
                'parent': None,
                'children': [],
                'term': term,
                'leaf': True,
                'exp_reward': all_rewards['exp_reward'][i],
                'value': 0,
                'count': 0,
                'tower': tower,
                'blocks_remaining': blocks_remaining,
                'tower_height': all_rewards['reward'][i],
                'ground_truth': all_rewards['ground_truth'][i]
            }
            new_nodes.append(new_node)
        return new_nodes

    '''
    def sample_action(self, parent_node, model, discrete=False):
        block = np.random.choice(parent_node['blocks_remaining'])
        if len(parent_node['tower']) == 0:
            # first action: place block at (0,0)
            block.pose = Pose(ZERO_POS, block.pose.orn)
            tower = [block]
        else:
            # randomly sample a placement of a random block
            tower = random_placement(block, parent_node['tower'], discrete=discrete)
        blocks_remaining = copy(parent_node['blocks_remaining'])
        blocks_remaining.remove(block)
        
        all_rewards = self.reward_fn([tower], model)
        
        # rewards of 0 are unstable --> terminal nodes
        # once max height is reached --> terminal nodes
        term = False
        if all_rewards['exp_reward'][0] == 0 or len(tower) == self.max_height:
            term = True
            
        new_node =  {'parent': None, 
                        'children': [],
                        'term': term,
                        'leaf': True,
                        'exp_reward': all_rewards['exp_reward'][0], 
                        'value': 0,
                        'count': 0,
                        'tower': tower,
                        'blocks_remaining': blocks_remaining,
                        'tower_height': all_rewards['reward'][0],
                        'ground_truth': all_rewards['ground_truth'][0]}
    
        return new_node
    '''

    def reward_fn(self, towers, model):
        all_rewards = {'exp_reward': [], 'reward': [], 'ground_truth': []}
        if len(towers[0]) == 1:
            # only single block towers, always stable
            reward = [tower[0].dimensions[2] for tower in towers]
            all_rewards['exp_reward'] = reward
            all_rewards['reward'] = reward
            all_rewards['ground_truth'] = reward
        else:
            tower_loader = make_tower_dataset(towers)
            # this assumes there is only one batch
            for tensor, _ in tower_loader:
                with torch.no_grad():
                    preds = model.forward(tensor)
            p_stables = preds.mean(dim=1)  # average ensemble output

            exp_rewards = []
            rewards = []
            ground_truths = []
            for ix, (p, tower) in enumerate(zip(p_stables, towers)):
                tower_height = np.sum([block.dimensions[2] for block in tower])
                rewards += [tower_height]
                if p > 0.5:  # stable
                    exp_rewards += [float(p * tower_height)]
                else:
                    exp_rewards += [0]
                ground_truths += [
                    self.tp.tower_is_constructable(tower) * tower_height
                ]
            all_rewards['exp_reward'] = exp_rewards
            all_rewards['reward'] = rewards
            all_rewards['ground_truth'] = ground_truths
        return all_rewards
Ejemplo n.º 11
0
from block_utils import Object
from learning.domains.towers.generate_tower_training_data import sample_random_tower
from learning.domains.towers.tower_data import TowerDataset, TowerSampler
from tower_planner import TowerPlanner
import pickle
import copy

if __name__ == '__main__':
    with open('learning/domains/towers/eval_block_set_12.pkl', 'rb') as handle:
        blocks = pickle.load(handle)
    tp = TowerPlanner(stability_mode='contains')
    towers = []

    n_stable = 0
    for _ in range(0, 10000):
        copy_blocks = copy.deepcopy(blocks)
        tower, rotated_tower = sample_random_tower(copy_blocks, num_blocks=5, \
                                    ret_rotated=True, discrete=False)
        
        stable = tp.tower_is_constructable(rotated_tower)
        n_stable += stable

    print(f"n_stable: {n_stable}")