Example #1
0
    def __init__(self, data_root, split, image_ext='.jpg'):
        self.data_root = data_root
        self.split = split
        self.image_ext = image_ext
        self.input_size = C.RPIN.INPUT_SIZE  # number of input images
        self.pred_size = eval(
            f'C.RPIN.PRED_SIZE_{"TRAIN" if split == "train" else "TEST"}')
        self.seq_size = self.input_size + self.pred_size
        self.input_height, self.input_width = C.RPIN.INPUT_HEIGHT, C.RPIN.INPUT_WIDTH

        protocal = C.PHYRE_PROTOCAL
        fold = C.PHYRE_FOLD

        num_pos = 400 if split == 'train' else 100
        num_neg = 1600 if split == 'train' else 400

        eval_setup = f'ball_{protocal}_template'
        train_tasks, dev_tasks, test_tasks = phyre.get_fold(eval_setup, fold)
        tasks = train_tasks + dev_tasks if split == 'train' else test_tasks
        action_tier = phyre.eval_setup_to_action_tier(eval_setup)

        # all the actions
        cache = phyre.get_default_100k_cache('ball')
        training_data = cache.get_sample(tasks, None)
        # (100000 x 3)
        actions = training_data['actions']
        # (num_tasks x 100000)
        sim_statuses = training_data['simulation_statuses']

        self.simulator = phyre.initialize_simulator(tasks, action_tier)

        self.video_info = np.zeros((0, 4))
        for t_id, t in enumerate(tqdm(tasks)):
            sim_status = sim_statuses[t_id]
            pos_acts = actions[sim_status == 1].copy()
            neg_acts = actions[sim_status == -1].copy()
            np.random.shuffle(pos_acts)
            np.random.shuffle(neg_acts)
            pos_acts = pos_acts[:num_pos]
            neg_acts = neg_acts[:num_neg]
            acts = np.concatenate([pos_acts, neg_acts])
            video_info = np.zeros((acts.shape[0], 4))
            video_info[:, 0] = t_id
            video_info[:, 1:] = acts
            self.video_info = np.concatenate([self.video_info, video_info])
def train_kde(tasks, tier):
    cache = phyre.get_default_100k_cache(tier)
    all_solved_actions = {}
    for task_id in tasks:
        task_type = task_id.split(":")[0]
        statuses = cache.load_simulation_states(task_id)
        solved_actions = cache.action_array[statuses ==
                                            phyre.simulation_cache.SOLVED, :]
        if task_type not in all_solved_actions:
            all_solved_actions[task_type] = solved_actions
        else:
            all_solved_actions[task_type] = np.concatenate(
                (all_solved_actions[task_type], solved_actions), 0)

    solved_actions_pdf = {}
    for task_type in all_solved_actions.keys():
        solved_actions_pdf[task_type] = gaussian_kde(np.transpose(
            all_solved_actions[task_type]),
                                                     bw_method="silverman")

    return solved_actions_pdf
def count_ball_sizes(task_ids, tier, ball_sizes, num_pos):
    cache = phyre.get_default_100k_cache(tier)
    simulator = phyre.initialize_simulator(task_ids, tier)
    num_solved = 0
    positions = np.linspace(0, 1, num_pos)
    for task_index, task_id in tqdm(enumerate(task_ids),
                                    desc='Evaluate Tasks',
                                    total=len(task_ids)):
        statuses = cache.load_simulation_states(task_id)
        solved_actions = cache.action_array[statuses ==
                                            phyre.simulation_cache.SOLVED, :]
        solved_actions[:,
                       2] = ball_sizes[abs(solved_actions[:, 2][None, :] -
                                           ball_sizes[:, None]).argmin(axis=0)]
        for solved_action in solved_actions:
            sim_result = simulator.simulate_action(task_index,
                                                   solved_action,
                                                   need_images=False)
            if sim_result.status.is_solved():
                num_solved += 1
                break
    return num_solved
Example #4
0
    def gen_proposal(self, start_id=0, end_id=25):
        random.seed(0)
        np.random.seed(0)
        protocal = C.PHYRE_PROTOCAL
        fold_id = C.PHYRE_FOLD
        print(f'generate proposal for {protocal} fold {fold_id}')
        max_p_acts, max_n_acts, max_acts = 200, 800, 100000
        self.proposal_dir = f'{self.output_dir.split("/")[-1]}_' \
                            f'p{max_p_acts}n{max_n_acts}a{max_acts // 1000}'
        eval_setup = f'ball_{protocal}_template'
        action_tier = phyre.eval_setup_to_action_tier(eval_setup)
        train_tasks, dev_tasks, test_tasks = phyre.get_fold(
            eval_setup, fold_id)
        # filter task
        train_tasks = train_tasks + dev_tasks
        candidate_list = [f'{i:05d}' for i in range(start_id, end_id)]

        for split in ['train', 'test']:
            train_list = [
                task for task in train_tasks
                if task.split(':')[0] in candidate_list
            ]
            test_list = [
                task for task in test_tasks
                if task.split(':')[0] in candidate_list
            ]
            if len(eval(f'{split}_list')) == 0:
                return

            simulator = phyre.initialize_simulator(eval(f'{split}_list'),
                                                   action_tier)
            cache = phyre.get_default_100k_cache('ball')
            training_data = cache.get_sample(eval(f'{split}_list'), None)
            actions = cache.action_array[:max_acts]

            final_list = eval(f'{split}_list')
            t_list = tqdm(final_list, 'Task')
            for task_id, task in enumerate(t_list):
                box_cache_name = f'data/PHYRE_proposal/cache/{task.replace(":", "_")}_box.hkl'
                act_cache_name = f'data/PHYRE_proposal/cache/{task.replace(":", "_")}_act.hkl'
                use_cache = os.path.exists(box_cache_name) and os.path.exists(
                    act_cache_name)
                if use_cache:
                    acts = hickle.load(act_cache_name)
                    all_boxes = hickle.load(box_cache_name)
                else:
                    sim_statuses = training_data['simulation_statuses'][
                        task_id]
                    pos_acts = actions[sim_statuses == 1]
                    neg_acts = actions[sim_statuses == -1]
                    np.random.shuffle(pos_acts)
                    np.random.shuffle(neg_acts)
                    pos_acts = pos_acts[:max_p_acts]
                    neg_acts = neg_acts[:max_n_acts]
                    acts = np.concatenate([pos_acts, neg_acts])
                    hickle.dump(acts,
                                act_cache_name,
                                mode='w',
                                compression='gzip')
                    all_boxes = []

                valid_act_id = 0
                for act_id, act in enumerate(
                        tqdm(acts, 'Candidate Action', leave=False)):
                    sim = simulator.simulate_action(
                        task_id,
                        act,
                        stride=60,
                        need_images=True,
                        need_featurized_objects=True)
                    if not use_cache:
                        if act_id < len(pos_acts):
                            assert sim.status == phyre.SimulationStatus.SOLVED
                        else:
                            assert sim.status == phyre.SimulationStatus.NOT_SOLVED

                    assert sim.status != phyre.SimulationStatus.INVALID_INPUT
                    raw_images = sim.images

                    rst_images = np.stack([
                        np.ascontiguousarray(
                            cv2.resize(rst_image,
                                       (self.input_width, self.input_height),
                                       interpolation=cv2.INTER_NEAREST)[::-1])
                        for rst_image in raw_images
                    ])

                    # prepare input for network:
                    image = cv2.resize(raw_images[0],
                                       (self.input_width, self.input_height),
                                       interpolation=cv2.INTER_NEAREST)
                    image = phyre.observations_to_float_rgb(image)
                    # parse object
                    objs_color = sim.featurized_objects.colors
                    objs_valid = [('BLACK' not in obj_color)
                                  and ('PURPLE' not in obj_color)
                                  for obj_color in objs_color]
                    objs = sim.featurized_objects.features[:, objs_valid, :]
                    objs_color = np.array(objs_color)[objs_valid]
                    num_objs = objs.shape[1]

                    if use_cache:
                        boxes = all_boxes[valid_act_id]
                        valid_act_id += 1
                    else:
                        boxes = np.zeros((1, num_objs, 5))
                        for o_id in range(num_objs):
                            mask = phyre.objects_util.featurized_objects_vector_to_raster(
                                objs[0][[o_id]])
                            mask_im = phyre.observations_to_float_rgb(mask)
                            mask_im[mask_im == 1] = 0
                            mask_im = mask_im.sum(-1) > 0

                            [h, w] = np.where(mask_im)
                            x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max()
                            x1 *= (self.input_width - 1) / (phyre.SCENE_WIDTH -
                                                            1)
                            x2 *= (self.input_width - 1) / (phyre.SCENE_WIDTH -
                                                            1)
                            y1 *= (self.input_height -
                                   1) / (phyre.SCENE_HEIGHT - 1)
                            y2 *= (self.input_height -
                                   1) / (phyre.SCENE_HEIGHT - 1)
                            boxes[0, o_id] = [o_id, x1, y1, x2, y2]
                        all_boxes.append(boxes)

                    data = image.transpose((2, 0, 1))[None, None, :]
                    data = torch.from_numpy(data.astype(np.float32))
                    rois = torch.from_numpy(boxes[..., 1:].astype(
                        np.float32))[None, :]

                    bg_image = rst_images[0].copy()
                    for fg_id in [1, 2, 3, 5]:
                        bg_image[bg_image == fg_id] = 0
                    boxes, masks = self.generate_trajs(data, rois)
                    rst_masks = np.stack([
                        self.render_mask_to_image(boxes[0, i],
                                                  masks[0, i],
                                                  images=bg_image.copy(),
                                                  color=objs_color).astype(
                                                      np.uint8)
                        for i in range(self.pred_rollout)
                    ])

                    output_dir = f'data/PHYRE_proposal/{self.proposal_dir}/{split}/'
                    output_dir = output_dir + 'pos/' if sim.status == phyre.SimulationStatus.SOLVED else output_dir + 'neg/'
                    output_dir = output_dir + f'{task.replace(":", "_")}/'
                    os.makedirs(output_dir, exist_ok=True)
                    rst_dict = {'gt_im': rst_images, 'pred_im': rst_masks}
                    hickle.dump(rst_dict,
                                f'{output_dir}/{act_id}.hkl',
                                mode='w',
                                compression='gzip')

                if not use_cache:
                    all_boxes = np.stack(all_boxes)
                    hickle.dump(all_boxes,
                                box_cache_name,
                                mode='w',
                                compression='gzip')
Example #5
0
        plt.show()
    if inspect == -2:
        for i in range(len(X)):
            plt.imsave(f"result/flownet/{i}_mask.png", local_masks[i, 0])
    #x = tmp.reshape_as(X)
    return X * local_masks + points.reshape_as(X)


if __name__ == "__main__":
    ## TESTING HANDCRAFTED ACTION EXTRACTOR WITH GROUNDTRUTH ACTION PATH
    # SETUP of phyre simulator
    SAVE_IMAGES = False
    eval_setup = 'ball_within_template'
    fold_id = 0
    train_tasks, dev_tasks, test_tasks = phyre.get_fold(eval_setup, fold_id)
    cache = phyre.get_default_100k_cache("ball")
    actions = cache.action_array
    print(cache.task_ids)
    tasks = train_tasks  #+dev_tasks+test_tasks
    print(f"{len(tasks)} tasks")
    sim = phyre.initialize_simulator(tasks, 'ball')
    init_scenes = sim.initial_scenes
    X = T.tensor(scenes_to_channels(init_scenes)).float()
    print("Init Scenes Shape:\n", X.shape)

    # COLLECT action path
    action_paths = []
    for i, t in enumerate(tasks):
        while True:
            action = actions[cache.load_simulation_states(t) == 1]
            if len(action) == 0:
task = task_dict[task_str]
t1 = time.time()
print(t1-t0,"Load Single Task Time")

t0 = time.time()
_, _, images,_ = phyre.simulator.magic_ponies(task, phyre.simulator.scene_if.UserInput(),need_images=True,stride = stride)
t1 = time.time()
print(t1-t0,"Sim Time")

t0 = time.time()
seq_data = ImgToObj.getObjectAndGoalSequence(images)
t1 = time.time()
print(t1-t0,"Sequence Contour Finding Time")

t0 = time.time()
cache = phyre.get_default_100k_cache(tier)
statuses = cache.load_simulation_states(task_str)
t1 = time.time()
print(t1-t0,"Cache Load Time")


t0 = time.time()
good_actions = []
discrete_actions = cache.action_array.tolist()
good_action_count = 0
solved_action_count = 0

goal_type = ImgToObj.Layer.dynamic_goal.value
if goal_type not in images[0]:
  goal_type = ImgToObj.Layer.static_goal.value
    return parser.parse_args()


if __name__ == '__main__':
    args = arg_parse()
    output_dir = f'outputs/phys/PHYRECls/within/{args.output}'
    os.makedirs(output_dir, exist_ok=True)

    action_tier_name = 'ball'

    eval_setup = 'ball_within_template'
    train_tasks, dev_tasks, test_ids = phyre.get_fold(eval_setup, 1)
    task_ids = train_tasks + dev_tasks
    dev_tasks_ids = test_ids

    cache = phyre.get_default_100k_cache(action_tier_name)

    train_batch_size = 64
    learning_rate = 0.0003
    max_train_actions = None
    updates = 100000
    negative_sampling_prob = 1.0
    save_checkpoints_every = 10000
    fusion_place = 'last'
    network_type = 'resnet18'
    balance_classes = 1
    num_auccess_actions = 10000
    eval_every = 20000
    action_layers = 1
    action_hidden_size = 256
    cosine_scheduler = 1
Example #8
0
 def train(cls, task_ids: TaskIds, tier: str, **kwargs) -> State:
     cache = phyre.get_default_100k_cache(tier)
     return cls._train_with_cache(cache, task_ids, tier=tier, **kwargs)
    def train(cls, train_task_ids, dev_task_ids, full_eval_fn, output_dir,
              summary_writer, cfg):
        cache = phyre.get_default_100k_cache(cfg.tier)
        trainer = hydra.utils.instantiate(cfg.agent.trainer)
        if cfg.max_train_actions:
            num_actions = cfg.max_train_actions
        else:
            num_actions = len(cache)

        def full_eval_from_model(model):
            return full_eval_fn(
                dict(model=model,
                     trainer=trainer,
                     num_actions=num_actions,
                     cache=cache))

        init_model = trainer.gen_model(cfg)
        if cfg.agent.weights_folder is not None:
            if cfg.agent.weights_folder == 'last':
                # Just use the last trained model
                cfg.agent.weights_folder = output_dir
            fwd_model = trainer.load_agent_from_folder(
                init_model, cfg.agent.weights_folder)
        else:
            if cfg.agent.init_weights_folder is not None:
                init_model = trainer.load_agent_from_folder(
                    init_model, cfg.agent.init_weights_folder, strict=False)
            training_data = cache.get_sample(
                train_task_ids, cfg.train.data_loader.max_train_actions)
            task_indices, is_solved, actions, _, _ = (
                neural_agent.compact_simulation_data_to_trainset(
                    cfg.tier, **training_data))
            if cfg.train.data_loader.fwd_model.use_obj_fwd_model:
                obj_fwd_model = obj_fwd_agent.ObjTrainer.gen_model(cfg)
                if cfg.train.data_loader.fwd_model.weights is not None:
                    obj_fwd_model = trainer.load_agent_from_folder(
                        obj_fwd_model, cfg.train.data_loader.fwd_model.weights)
                obj_fwd_model = obj_fwd_model.module.cpu()
            else:
                obj_fwd_model = None
            dataset = PhyreDataset(
                cfg.tier,
                train_task_ids,
                task_indices,
                is_solved,
                actions,
                cfg.simulator,
                mode='train',
                balance_classes=cfg.train.data_loader.balance_classes,
                hard_negatives=cfg.train.data_loader.hard_negatives,
                init_clip_ratio_to_sim=cfg.train.init_clip_ratio_to_sim,
                frames_per_clip=cfg.train.frames_per_clip,
                n_hist_frames=cfg.train.n_hist_frames,
                shuffle_indices=cfg.train.shuffle_indices,
                drop_objs=cfg.train.drop_objs,
                obj_fwd_model=obj_fwd_model)
            fwd_model = trainer.train(init_model, dataset, output_dir,
                                      summary_writer, full_eval_from_model,
                                      cfg)

        return dict(model=fwd_model,
                    trainer=trainer,
                    num_actions=num_actions,
                    cache=cache)
Example #10
0
 def train(cls, task_ids, tier, simulation_cache_size=None, **kwargs):
     assert simulation_cache_size is None, 'Non-default cache size is not supported.'
     cache = phyre.get_default_100k_cache(tier)
     return cls._train_with_cache(cache, task_ids, tier=tier, **kwargs)
def evaluate_agent(task_ids, tier, solved_actions_pdf):
    cache = phyre.get_default_100k_cache(tier)
    evaluator = phyre.Evaluator(task_ids)
    simulator = phyre.initialize_simulator(task_ids, tier)
    task_data_dict = phyre.loader.load_compiled_task_dict()
    stride = 100
    eval_stride = 2
    goal = 3.0 * 60.0 / eval_stride
    empty_action = phyre.simulator.scene_if.UserInput()
    tasks_solved = 0
    alpha = 1.0
    N = 5
    max_actions = 100

    for task_index in tqdm(range(len(task_ids)), desc='Evaluate tasks'):
        task_id = task_ids[task_index]
        task_type = task_id.split(":")[0]
        task_data = task_data_dict[task_id]
        statuses = cache.load_simulation_states(task_id)
        _, _, images, _ = phyre.simulator.magic_ponies(task_data,
                                                       empty_action,
                                                       need_images=True,
                                                       stride=stride)

        evaluator.maybe_log_attempt(task_index,
                                    phyre.simulation_cache.NOT_SOLVED)

        seq_data = ImgToObj.getObjectAndGoalSequence(images)

        goal_type = ImgToObj.Layer.dynamic_goal.value
        if goal_type not in images[0]:
            goal_type = ImgToObj.Layer.static_goal.value

        tested_actions = np.array([[-1, -1, -1, 1, 0]])

        solved_task = False
        max_score = 0
        while evaluator.get_attempts_for_task(
                task_index
        ) < phyre.MAX_TEST_ATTEMPTS and not solved_task and max_score < 1.0:
            random_action = np.random.random_sample((1, 5))
            if task_type in solved_actions_pdf and np.random.random_sample(
            ) >= .25:
                random_action[0, 0:3] = np.squeeze(
                    solved_actions_pdf[task_type].resample(size=1))

            test_action_dist = np.linalg.norm(tested_actions[:, 0:3] -
                                              random_action[:, 0:3],
                                              axis=1)
            if np.any(test_action_dist <= tested_actions[:, 3]
                      ) and np.random.random_sample() >= .75:
                continue
            if ImgToObj.check_seq_action_intersect(
                    images[0], seq_data, stride, goal_type,
                    np.squeeze(random_action[0:3])):

                sim_result = simulator.simulate_action(
                    task_index,
                    np.squeeze(random_action[:, 0:3]),
                    need_images=True,
                    stride=eval_stride)
                evaluator.maybe_log_attempt(task_index, sim_result.status)
                if not sim_result.status.is_invalid():
                    score = ImgToObj.objectTouchGoalSequence(sim_result.images)
                    eval_dist = .1
                    random_action[0, 3] = eval_dist
                    random_action[0, 4] = 1.0 - np.linalg.norm(
                        seq_data['object'][-1]['centroid'] -
                        seq_data['goal'][-1]['centroid']) / 256.0
                    random_action[0, 4] += ImgToObj.objectTouchGoalSequence(
                        sim_result.images) / goal
                    if random_action[0, 4] > max_score:
                        max_score = random_action[0, 4]
                    tested_actions = np.concatenate(
                        (tested_actions, random_action), 0)
                    solved_task = sim_result.status.is_solved()
                    tasks_solved += solved_task

        if not solved_task and evaluator.get_attempts_for_task(
                task_index) < phyre.MAX_TEST_ATTEMPTS:
            tested_actions = np.delete(tested_actions, 0, 0)
            theta = tested_actions[np.argmax(tested_actions[:, 4]), 0:3]
            theta_score = tested_actions[np.argmax(tested_actions[:, 4]), 4]
            while evaluator.get_attempts_for_task(
                    task_index
            ) + 2 * N + 1 < phyre.MAX_TEST_ATTEMPTS and not solved_task:
                delta = np.random.normal(0, .2, (N, 3))
                test_actions_pos = theta + delta
                test_actions_neg = theta - delta
                old_theta = np.copy(theta)
                for i in range(N):

                    pos_score = 0
                    sim_result_pos = simulator.simulate_action(
                        task_index,
                        np.squeeze(test_actions_pos[i, :]),
                        need_images=True,
                        stride=eval_stride)
                    evaluator.maybe_log_attempt(task_index,
                                                sim_result_pos.status)
                    if not sim_result_pos.status.is_invalid():
                        pos_result_seq_data = ImgToObj.getObjectAndGoalSequence(
                            sim_result_pos.images)
                        pos_score = 1.0 - np.linalg.norm(
                            pos_result_seq_data['object'][-1]['centroid'] -
                            pos_result_seq_data['goal'][-1]['centroid']) / 256.0
                        pos_score += ImgToObj.objectTouchGoalSequence(
                            sim_result_pos.images) / goal
                        solved_task = sim_result_pos.status.is_solved()
                        tasks_solved += solved_task

                    neg_score = 0
                    sim_result_neg = simulator.simulate_action(
                        task_index,
                        np.squeeze(test_actions_neg[i, :]),
                        need_images=True,
                        stride=eval_stride)
                    evaluator.maybe_log_attempt(task_index,
                                                sim_result_neg.status)
                    if not sim_result_neg.status.is_invalid():
                        neg_result_seq_data = ImgToObj.getObjectAndGoalSequence(
                            sim_result_neg.images)
                        neg_score = 1.0 - np.linalg.norm(
                            neg_result_seq_data['object'][-1]['centroid'] -
                            neg_result_seq_data['goal'][-1]['centroid']) / 256.0
                        neg_score += ImgToObj.objectTouchGoalSequence(
                            sim_result_neg.images) / goal
                        solved_task = sim_result_neg.status.is_solved()
                        tasks_solved += solved_task

                    theta = theta + alpha / N * (pos_score -
                                                 neg_score) * delta[i, :]

                sim_result = simulator.simulate_action(task_index,
                                                       np.squeeze(theta),
                                                       need_images=True,
                                                       stride=eval_stride)
                evaluator.maybe_log_attempt(task_index, sim_result.status)
                if not sim_result.status.is_invalid():
                    result_seq_data = ImgToObj.getObjectAndGoalSequence(
                        sim_result.images)
                    score = 1.0 - np.linalg.norm(
                        result_seq_data['object'][-1]['centroid'] -
                        result_seq_data['goal'][-1]['centroid']) / 256.0
                    score += ImgToObj.objectTouchGoalSequence(
                        sim_result.images) / goal
                    solved_task = sim_result.status.is_solved()
                    tasks_solved += solved_task

    print(tasks_solved, "Tasks solved out of ", len(task_ids), "Total Tasks")
    return (evaluator.get_aucess(), tasks_solved, len(task_ids))
Example #12
0
def gen_single_task(task_list_item, action_tier_name, cache_name):
    random.seed(0)
    cache = phyre.get_default_100k_cache('ball')
    training_data = cache.get_sample([task_list_item], None)
    actions = cache.action_array
    sim_statuses = training_data['simulation_statuses'][0]

    simulator = phyre.initialize_simulator([task_list_item], action_tier_name)
    pos_acts = actions[sim_statuses == 1]
    neg_acts = actions[sim_statuses == -1]
    print(
        f'{simulator.task_ids[0].replace(":", "_")}: success: {len(pos_acts)}, fail: {len(neg_acts)}'
    )

    task_id = simulator.task_ids[0]
    im_save_root = f'{cache_name}/images/{task_id.split(":")[0]}/{task_id.split(":")[1]}'
    fim_save_root = f'{cache_name}/full/{task_id.split(":")[0]}/{task_id.split(":")[1]}'
    bm_save_root = f'{cache_name}/labels/{task_id.split(":")[0]}/{task_id.split(":")[1]}'
    os.makedirs(im_save_root, exist_ok=True)
    os.makedirs(fim_save_root, exist_ok=True)
    os.makedirs(bm_save_root, exist_ok=True)

    np.random.shuffle(pos_acts)
    np.random.shuffle(neg_acts)
    pos_acts = pos_acts[:max_p_acts]
    neg_acts = neg_acts[:max_n_acts]
    acts = np.concatenate([pos_acts, neg_acts])

    for act_id, action in enumerate(tqdm(acts)):
        sim = simulator.simulate_action(0,
                                        action,
                                        stride=60,
                                        need_images=True,
                                        need_featurized_objects=True)
        images = sim.images
        assert sim.status != 0
        # filter out static objects
        objs_color = sim.featurized_objects.colors
        objs_valid = [('BLACK' not in obj_color)
                      and ('PURPLE' not in obj_color)
                      for obj_color in objs_color]
        objs = sim.featurized_objects.features[:, objs_valid, :]

        num_objs = objs.shape[1]
        boxes = np.zeros((len(images), num_objs, 5))
        masks = np.zeros((len(images), num_objs, mask_size, mask_size))

        full_images = np.zeros((len(images), input_h, input_w))

        for im_id, (raw_image, obj) in enumerate(zip(images, objs)):
            # image = phyre.observations_to_float_rgb(raw_image)
            im_height = raw_image.shape[0]
            im_width = raw_image.shape[1]
            image = cv2.resize(raw_image, (input_w, input_h),
                               interpolation=cv2.INTER_NEAREST)
            if im_id == 0:
                np.save(f'{im_save_root}/{act_id:03d}.npy', image)
            full_images[im_id] = image
            for o_id in range(num_objs):
                mask = phyre.objects_util.featurized_objects_vector_to_raster(
                    obj[[o_id]])
                mask_im = phyre.observations_to_float_rgb(mask)
                mask_im[mask_im == 1] = 0
                mask_im = mask_im.sum(-1) > 0

                [h, w] = np.where(mask_im)

                assert len(h) > 0 and len(w) > 0
                x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max()
                masks[im_id, o_id] = cv2.resize(
                    mask_im[y1:y2 + 1, x1:x2 + 1].astype(np.float32),
                    (mask_size, mask_size)) >= 0.5

                x1 *= (input_w - 1) / (im_width - 1)
                x2 *= (input_w - 1) / (im_width - 1)
                y1 *= (input_h - 1) / (im_height - 1)
                y2 *= (input_h - 1) / (im_height - 1)
                boxes[im_id, o_id] = [o_id, x1, y1, x2, y2]

            # debugging data generation
            # ---- uncomment below for visualize output
            # # debug box output
            # import matplotlib.pyplot as plt
            # plt.imshow(image)
            # for o_id in range(num_objs):
            #     x1, y1, x2, y2 = boxes[t, o_id, 1:]
            #     rect = plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, linewidth=2, color='r')
            #     plt.gca().add_patch(rect)
            # plt.savefig(os.path.join(save_dir, f'{t:03d}_debug.jpg')), plt.close()
            # # debug mask output
            # for o_id in range(num_objs):
            #     mask_im = np.zeros((128, 128))
            #     x1, y1, x2, y2 = boxes[t, o_id, 1:].astype(np.int)
            #     mask = cv2.resize(masks[t, o_id].astype(np.float32), (x2 - x1 + 1, y2 - y1 + 1))
            #     mask_im[y1:y2 + 1, x1:x2 + 1] = mask
            #
            #     plt.imshow(mask_im)
            #     plt.savefig(os.path.join(save_dir, f'{t:03d}_{o_id}_debug.jpg')), plt.close()

        # save bounding boxes
        hickle.dump(full_images,
                    f'{fim_save_root}/{act_id:03d}_image.hkl',
                    mode='w',
                    compression='gzip')
        hickle.dump(int(sim.status == 1),
                    f'{bm_save_root}/{act_id:03d}_label.hkl',
                    mode='w',
                    compression='gzip')
        hickle.dump(boxes,
                    f'{bm_save_root}/{act_id:03d}_boxes.hkl',
                    mode='w',
                    compression='gzip')
        hickle.dump(masks,
                    f'{bm_save_root}/{act_id:03d}_masks.hkl',
                    mode='w',
                    compression='gzip')
Example #13
0
def evaluate_agent(task_ids, tier, solved_actions_pdf):
    cache = phyre.get_default_100k_cache(tier)
    evaluator = phyre.Evaluator(task_ids)
    simulator = phyre.initialize_simulator(task_ids, tier)
    task_data_dict = phyre.loader.load_compiled_task_dict()
    stride = 5
    empty_action = phyre.simulator.scene_if.UserInput()
    tasks_solved = 0

    for task_index in tqdm(range(len(task_ids)), desc='Evaluate tasks'):
        task_id = task_ids[task_index]
        task_type = task_id.split(":")[0]
        task_data = task_data_dict[task_id]
        statuses = cache.load_simulation_states(task_id)
        _, _, images, _ = phyre.simulator.magic_ponies(task_data,
                                                       empty_action,
                                                       need_images=True,
                                                       stride=stride)

        evaluator.maybe_log_attempt(task_index,
                                    phyre.simulation_cache.NOT_SOLVED)

        seq_data = ImgToObj.getObjectAndGoalSequence(images)

        goal_type = ImgToObj.Layer.dynamic_goal.value
        if goal_type not in images[0]:
            goal_type = ImgToObj.Layer.static_goal.value

        tested_actions = np.array([[-1, -1, -1, 1]])

        solved_task = False

        while evaluator.get_attempts_for_task(
                task_index) < phyre.MAX_TEST_ATTEMPTS and not solved_task:
            random_action = np.random.random_sample((1, 4))
            if task_type in solved_actions_pdf and np.random.random_sample(
            ) >= .25:
                random_action[0, 0:3] = np.squeeze(
                    solved_actions_pdf[task_type].resample(size=1))

            test_action_dist = np.linalg.norm(tested_actions[:, 0:3] -
                                              random_action[:, 0:3],
                                              axis=1)
            if np.any(test_action_dist <= tested_actions[:, 3]
                      ) and np.random.random_sample() >= .75:
                continue
            if ImgToObj.check_seq_action_intersect(
                    images[0], seq_data, stride, goal_type,
                    np.squeeze(random_action[0:3])):
                eval_stride = 10
                goal = 3.0 * 60.0 / eval_stride
                sim_result = simulator.simulate_action(
                    task_index,
                    np.squeeze(random_action[:, 0:3]),
                    need_images=True,
                    stride=eval_stride)
                evaluator.maybe_log_attempt(task_index, sim_result.status)
                if not sim_result.status.is_invalid():
                    score = ImgToObj.objectTouchGoalSequence(sim_result.images)
                    eval_dist = .25 * (score == 0) + .1
                    random_action[0, 3] = eval_dist
                    tested_actions = np.concatenate(
                        (tested_actions, random_action), 0)
                    solved_task = sim_result.status.is_solved()
                    tasks_solved += solved_task

    print(tasks_solved, "Tasks solved out of ", len(task_ids), "Total Tasks")
    return (evaluator.get_aucess(), tasks_solved, len(task_ids))
def count_good_actions(task_ids, tier):
    cache = phyre.get_default_100k_cache(tier)
    task_data_dict = phyre.loader.load_compiled_task_dict()
    simulator = phyre.initialize_simulator(task_ids, tier)
    results = []
    stride = 50
    empty_action = phyre.simulator.scene_if.UserInput()
    max_actions = 100
    for task_index in tqdm(range(len(task_ids)), desc='Evaluate tasks'):
        task_id = task_ids[task_index]
        task_data = task_data_dict[task_id]
        statuses = cache.load_simulation_states(task_id)
        _, _, images, _ = phyre.simulator.magic_ponies(task_data,
                                                       empty_action,
                                                       need_images=True,
                                                       stride=stride)

        seq_data = ImgToObj.getObjectAndGoalSequence(images)

        discrete_actions = cache.action_array.tolist()
        good_action_count = 0
        solved_action_count = 0

        goal_type = ImgToObj.Layer.dynamic_goal.value
        if goal_type not in images[0]:
            goal_type = ImgToObj.Layer.static_goal.value

        tested_actions_count = 0
        tested_actions = np.array([[-1, -1, -1, 1]])
        while tested_actions_count < max_actions and solved_action_count <= 0:
            random_action = np.random.random_sample((1, 4))

            test_action_dist = np.linalg.norm(tested_actions[:, 0:3] -
                                              random_action[:, 0:3],
                                              axis=1)
            if np.any(test_action_dist <= tested_actions[:, 3]
                      ) and np.random.random_sample() >= .25:
                continue

            if ImgToObj.check_seq_action_intersect(
                    images[0], seq_data, stride, goal_type,
                    np.squeeze(random_action[0:3])):
                eval_stride = 5
                goal = 3.0 * 60.0 / eval_stride
                sim_result = simulator.simulate_action(
                    task_index,
                    np.squeeze(random_action[:, 0:3]),
                    need_images=True,
                    stride=eval_stride)
                if not sim_result.status.is_invalid():
                    good_action_count += 1
                    tested_actions_count += 1
                    score = ImgToObj.objectTouchGoalSequence(sim_result.images)
                    eval_dist = 0.0
                    random_action[0, 3] = eval_dist
                    tested_actions = np.concatenate(
                        (tested_actions, random_action), 0)
                    solved_task = sim_result.status.is_solved()
                    solved_action_count += solved_task

        results.append({
            'num_good': good_action_count,
            'num_solved': solved_action_count,
            'num_total': len(discrete_actions)
        })

    return results
Example #15
0
def count_good_actions(task_ids, tier):
    cache = phyre.get_default_100k_cache(tier)
    task_data_dict = phyre.loader.load_compiled_task_dict()
    simulator = phyre.initialize_simulator(task_ids, tier)
    results = []
    stride = 5
    empty_action = phyre.simulator.scene_if.UserInput()
    max_actions = 100
    alpha = 1.0
    N = 5
    eval_stride = 1
    goal = 3.0 * 60.0 / eval_stride

    for task_index in tqdm(range(len(task_ids)), desc='Evaluate tasks'):
        task_id = task_ids[task_index]
        task_data = task_data_dict[task_id]
        statuses = cache.load_simulation_states(task_id)
        _, _, images, _ = phyre.simulator.magic_ponies(task_data,
                                                       empty_action,
                                                       need_images=True,
                                                       stride=stride)

        seq_data = ImgToObj.getObjectAndGoalSequence(images)

        discrete_actions = cache.action_array.tolist()
        good_action_count = 0
        solved_action_count = 0

        goal_type = ImgToObj.Layer.dynamic_goal.value
        if goal_type not in images[0]:
            goal_type = ImgToObj.Layer.static_goal.value

        tested_actions_count = 0
        tested_actions = np.array([[-1, -1, -1, 1, 0]])
        max_score = 0
        while tested_actions_count < max_actions and solved_action_count <= 0 and max_score < 1.0:
            random_action = np.random.random_sample((1, 5))

            test_action_dist = np.linalg.norm(tested_actions[:, 0:3] -
                                              random_action[:, 0:3],
                                              axis=1)
            if np.any(test_action_dist <= tested_actions[:, 3]
                      ) and np.random.random_sample() >= .25:
                continue

            if ImgToObj.check_seq_action_intersect(
                    images[0], seq_data, stride, goal_type,
                    np.squeeze(random_action[0:3])):
                sim_result = simulator.simulate_action(
                    task_index,
                    np.squeeze(random_action[:, 0:3]),
                    need_images=True,
                    stride=eval_stride)
                if not sim_result.status.is_invalid():
                    result_seq_data = ImgToObj.getObjectAndGoalSequence(
                        sim_result.images)
                    good_action_count += 1
                    tested_actions_count += 1
                    eval_dist = .05
                    random_action[0, 3] = eval_dist
                    random_action[0, 4] = 1.0 - np.linalg.norm(
                        seq_data['object'][-1]['centroid'] -
                        seq_data['goal'][-1]['centroid']) / 256.0
                    random_action[0, 4] += ImgToObj.objectTouchGoalSequence(
                        sim_result.images) / goal
                    if random_action[0, 4] > max_score:
                        max_score = random_action[0, 4]
                    tested_actions = np.concatenate(
                        (tested_actions, random_action), 0)
                    solved_task = sim_result.status.is_solved()
                    solved_action_count += solved_task

        if solved_action_count <= 0:
            tested_actions = np.delete(tested_actions, 0, 0)
            theta = tested_actions[np.argmax(tested_actions[:, 4]), 0:3]
            theta_score = tested_actions[np.argmax(tested_actions[:, 4]), 4]
            b = 3
            mu = np.zeros(3)
            esp = np.eye(3)
            while tested_actions_count + 2 * N + 1 < max_actions and solved_action_count <= 0:
                old_theta = np.copy(theta)
                scores = np.zeros((N, 2))
                deltas = np.zeros((N, 3))
                i = 0
                while i < N and tested_actions_count + 2 * N + 1 < max_actions:
                    delta = np.random.normal(0, .2, (1, 3))
                    test_action_pos = theta + delta
                    test_action_neg = theta - delta

                    pos_score = 0
                    sim_result_pos = simulator.simulate_action(
                        task_index,
                        np.squeeze(test_action_pos),
                        need_images=True,
                        stride=eval_stride)
                    if not sim_result_pos.status.is_invalid():
                        tested_actions_count += 1
                        good_action_count += 1
                        pos_result_seq_data = ImgToObj.getObjectAndGoalSequence(
                            sim_result_pos.images)
                        pos_score = 1.0 - np.linalg.norm(
                            pos_result_seq_data['object'][-1]['centroid'] -
                            pos_result_seq_data['goal'][-1]['centroid']) / 256.0
                        pos_score += ImgToObj.objectTouchGoalSequence(
                            sim_result_pos.images) / goal
                        solved_task = sim_result_pos.status.is_solved()
                        solved_action_count += solved_task

                    neg_score = 0
                    sim_result_neg = simulator.simulate_action(
                        task_index,
                        np.squeeze(test_action_neg),
                        need_images=True,
                        stride=eval_stride)
                    if not sim_result_neg.status.is_invalid():
                        tested_actions_count += 1
                        good_action_count += 1
                        neg_result_seq_data = ImgToObj.getObjectAndGoalSequence(
                            sim_result_neg.images)
                        neg_score = 1.0 - np.linalg.norm(
                            neg_result_seq_data['object'][-1]['centroid'] -
                            neg_result_seq_data['goal'][-1]['centroid']) / 256.0
                        neg_score += ImgToObj.objectTouchGoalSequence(
                            sim_result_neg.images) / goal
                        solved_task = sim_result_neg.status.is_solved()
                        solved_action_count += solved_task

                    if pos_score != 0 or neg_score != 0:
                        deltas[i, :] = delta
                        scores[i, 0] = pos_score
                        scores[i, 1] = neg_score
                        i += 1

                max_scores = np.amax(scores, axis=1)
                max_index = np.argpartition(max_scores, b)[-b:]

                for i in max_index:
                    if np.std(scores) == 0:
                        print(task_id)
                        theta = theta + (alpha /
                                         (b)) * (scores[i, 0] -
                                                 scores[i, 1]) * deltas[i, :]
                    else:
                        theta = theta + (alpha /
                                         (b * np.std(scores[max_index]))) * (
                                             scores[i, 0] -
                                             scores[i, 1]) * deltas[i, :]

                sim_result = simulator.simulate_action(task_index,
                                                       np.squeeze(theta),
                                                       need_images=True,
                                                       stride=eval_stride)

                if not sim_result.status.is_invalid():
                    result_seq_data = ImgToObj.getObjectAndGoalSequence(
                        sim_result.images)
                    score = 1.0 - np.linalg.norm(
                        result_seq_data['object'][-1]['centroid'] -
                        result_seq_data['goal'][-1]['centroid']) / 256.0
                    score += ImgToObj.objectTouchGoalSequence(
                        sim_result.images) / goal
                    #print(task_id,theta,score,old_theta,theta_score,sim_result.status.is_solved(),tested_actions_count)
                    good_action_count += 1
                    tested_actions_count += 1
                    solved_task = sim_result.status.is_solved()
                    solved_action_count += solved_task
                else:
                    theta = old_theta

        results.append({
            'num_good': good_action_count,
            'num_solved': solved_action_count,
            'num_total': len(discrete_actions)
        })

    return results
Example #16
0
    def test(self, start_id=0, end_id=25):
        random.seed(0)
        np.random.seed(0)
        protocal, fold_id = C.PHYRE_PROTOCAL, C.PHYRE_FOLD
        self.score_model.eval()
        print(f'testing using protocal {protocal} and fold {fold_id}')

        # setup the PHYRE evaluation split
        eval_setup = f'ball_{protocal}_template'
        action_tier = phyre.eval_setup_to_action_tier(eval_setup)
        _, _, test_tasks = phyre.get_fold(eval_setup, fold_id)  # PHYRE setup
        candidate_list = [f'{i:05d}'
                          for i in range(start_id, end_id)]  # filter tasks
        test_list = [
            task for task in test_tasks if task.split(':')[0] in candidate_list
        ]
        simulator = phyre.initialize_simulator(test_list, action_tier)

        # the action candidates are provided by the author of PHYRE benchmark
        num_actions = 10000
        cache = phyre.get_default_100k_cache('ball')
        acts = cache.action_array[:num_actions]
        training_data = cache.get_sample(test_list, None)

        # some statistics variable when doing the evaluation
        auccess = np.zeros((len(test_list), 100))
        batched_pred = C.SOLVER.BATCH_SIZE
        objs_color = None
        all_data, all_acts, all_rois, all_image = [], [], [], []

        # cache the initial bounding boxes from the simulator
        os.makedirs('cache', exist_ok=True)

        t_list = tqdm(test_list, 'Task')
        for task_id, task in enumerate(t_list):
            sim_statuses = training_data['simulation_statuses'][task_id]
            confs, successes = [], []

            boxes_cache_name = f'cache/{task.replace(":", "_")}.hkl'
            use_cache = os.path.exists(boxes_cache_name)
            all_boxes = hickle.load(boxes_cache_name) if use_cache else []

            valid_act_id = 0
            for act_id, act in enumerate(
                    tqdm(acts, 'Candidate Action', leave=False)):
                sim = simulator.simulate_action(task_id,
                                                act,
                                                stride=60,
                                                need_images=True,
                                                need_featurized_objects=True)
                assert sim.status == sim_statuses[
                    act_id], 'sanity check not passed'
                if sim.status == phyre.SimulationStatus.INVALID_INPUT:
                    if act_id == len(acts) - 1 and len(
                            all_data) > 0:  # final action is invalid
                        conf_t = self.batch_score(all_data, all_rois,
                                                  all_image, objs_color)
                        confs = confs + conf_t
                        all_data, all_acts, all_rois, all_image = [], [], [], []
                    continue
                successes.append(sim.status == phyre.SimulationStatus.SOLVED)

                # parse object, prepare input for network, the logic is the same as tools/gen_phyre.py
                image = cv2.resize(sim.images[0],
                                   (self.input_width, self.input_height),
                                   interpolation=cv2.INTER_NEAREST)
                all_image.append(image[::-1])
                image = phyre.observations_to_float_rgb(image)
                objs_color = sim.featurized_objects.colors
                objs_valid = [('BLACK' not in obj_color)
                              and ('PURPLE' not in obj_color)
                              for obj_color in objs_color]
                objs = sim.featurized_objects.features[:, objs_valid, :]
                objs_color = np.array(objs_color)[objs_valid]
                num_objs = objs.shape[1]

                if use_cache:
                    boxes = all_boxes[valid_act_id]
                    valid_act_id += 1
                else:
                    boxes = np.zeros((1, num_objs, 5))
                    for o_id in range(num_objs):
                        mask = phyre.objects_util.featurized_objects_vector_to_raster(
                            objs[0][[o_id]])
                        mask_im = phyre.observations_to_float_rgb(mask)
                        mask_im[mask_im == 1] = 0
                        mask_im = mask_im.sum(-1) > 0

                        [h, w] = np.where(mask_im)
                        x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max()
                        x1 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1)
                        x2 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1)
                        y1 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT -
                                                         1)
                        y2 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT -
                                                         1)
                        boxes[0, o_id] = [o_id, x1, y1, x2, y2]
                    all_boxes.append(boxes)

                data = image.transpose((2, 0, 1))[None, None, :]
                data = torch.from_numpy(data.astype(np.float32))
                rois = torch.from_numpy(boxes[...,
                                              1:].astype(np.float32))[None, :]

                all_data.append(data)
                all_rois.append(rois)

                if len(all_data) % batched_pred == 0 or act_id == len(
                        acts) - 1:
                    conf_t = self.batch_score(all_data, all_rois, all_image,
                                              objs_color)
                    confs = confs + conf_t
                    all_data, all_rois, all_image = [], [], []

            if not use_cache:
                all_boxes = np.stack(all_boxes)
                hickle.dump(all_boxes,
                            boxes_cache_name,
                            mode='w',
                            compression='gzip')

            info = f'current AUCESS: '
            top_acc = np.array(successes)[np.argsort(confs)[::-1]]
            for i in range(100):
                auccess[task_id, i] = int(np.sum(top_acc[:i + 1]) > 0)
            w = np.array([np.log(k + 1) - np.log(k) for k in range(1, 101)])
            s = auccess[:task_id + 1].sum(0) / auccess[:task_id + 1].shape[0]
            info += f'{np.sum(w * s) / np.sum(w) * 100:.2f}'
            t_list.set_description(info)
Example #17
0
 def enumerate_actions():
     tier = 'ball'
     actions = phyre.get_default_100k_cache(tier).action_array[:10000]
     return actions
Example #18
0
    def test(self, start_id=0, end_id=25, fold_id=0, protocal='within'):
        random.seed(0)
        print(f'testing {protocal} fold {fold_id}')
        eval_setup = f'ball_{protocal}_template'
        action_tier = phyre.eval_setup_to_action_tier(eval_setup)
        _, _, test_tasks = phyre.get_fold(eval_setup, fold_id)  # PHYRE setup
        candidate_list = [f'{i:05d}' for i in range(start_id, end_id)]  # filter tasks
        test_list = [task for task in test_tasks if task.split(':')[0] in candidate_list]
        simulator = phyre.initialize_simulator(test_list, action_tier)
        # PHYRE evaluation
        num_all_actions = [1000, 2000, 5000, 8000, 10000]
        auccess = np.zeros((len(num_all_actions), len(test_list), 100))
        batched_pred = C.SOLVER.BATCH_SIZE
        # DATA for network:
        all_data, all_acts, all_rois, all_image = [], [], [], []
        cache = phyre.get_default_100k_cache('ball')
        acts = cache.action_array[:10000]
        # actions = cache.action_array[:100000]
        # training_data = cache.get_sample(test_list, None)

        pos_all, neg_all, pos_correct, neg_correct = 0, 0, 0, 0

        objs_color = None
        for task_id, task in enumerate(test_list):
            confs, successes, num_valid_act_idx = [], [], []

            boxes_cache_name = f'cache/{task.replace(":", "_")}.hkl'
            use_cache = os.path.exists(boxes_cache_name)
            all_boxes = hickle.load(boxes_cache_name) if use_cache else []

            valid_act_cnt = 0

            # sim_statuses = training_data['simulation_statuses'][task_id]
            # pos_acts = actions[sim_statuses == 1]
            # neg_acts = actions[sim_statuses == -1]
            # np.random.shuffle(pos_acts)
            # np.random.shuffle(neg_acts)
            # pos_acts = pos_acts[:50]
            # neg_acts = neg_acts[:200]
            # acts = np.concatenate([pos_acts, neg_acts])

            for act_id, act in enumerate(acts):
                if act_id == 0:
                    pprint(f'{task}: {task_id} / {len(test_list)}')
                sim = simulator.simulate_action(task_id, act, stride=60, need_images=True, need_featurized_objects=True)
                if sim.status == phyre.SimulationStatus.INVALID_INPUT:
                    num_valid_act_idx.append(0)
                    if act_id == len(acts) - 1 and len(all_data) > 0:  # final action is invalid
                        conf_t = self.batch_score(all_data, all_acts, all_rois, all_image, objs_color, task)
                        confs = confs + conf_t
                        all_data, all_acts, all_rois, all_image = [], [], [], []
                    continue
                num_valid_act_idx.append(1)
                successes.append(sim.status == phyre.SimulationStatus.SOLVED)

                if self.score_with_heuristic or self.score_with_mask:
                    # parse object, prepare input for network:
                    image = cv2.resize(sim.images[0], (self.input_width, self.input_height),
                                       interpolation=cv2.INTER_NEAREST)
                    all_image.append(image[::-1])  # for heuristic method to detect goal location, need to flip
                    image = phyre.observations_to_float_rgb(image)
                    objs_color = sim.featurized_objects.colors
                    objs_valid = [('BLACK' not in obj_color) and ('PURPLE' not in obj_color) for obj_color in objs_color]
                    objs = sim.featurized_objects.features[:, objs_valid, :]
                    objs_color = np.array(objs_color)[objs_valid]
                    num_objs = objs.shape[1]

                    if use_cache:
                        boxes = all_boxes[valid_act_cnt]
                        valid_act_cnt += 1
                    else:
                        boxes = np.zeros((1, num_objs, 5))
                        for o_id in range(num_objs):
                            mask = phyre.objects_util.featurized_objects_vector_to_raster(objs[0][[o_id]])
                            mask_im = phyre.observations_to_float_rgb(mask)
                            mask_im[mask_im == 1] = 0
                            mask_im = mask_im.sum(-1) > 0

                            [h, w] = np.where(mask_im)
                            x1, x2, y1, y2 = w.min(), w.max(), h.min(), h.max()
                            x1 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1)
                            x2 *= (self.input_width - 1) / (phyre.SCENE_WIDTH - 1)
                            y1 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1)
                            y2 *= (self.input_height - 1) / (phyre.SCENE_HEIGHT - 1)
                            boxes[0, o_id] = [o_id, x1, y1, x2, y2]
                        all_boxes.append(boxes)

                    data = image.transpose((2, 0, 1))[None, None, :]
                    data = torch.from_numpy(data.astype(np.float32))
                    rois = torch.from_numpy(boxes[..., 1:].astype(np.float32))[None, :]

                    all_data.append(data)
                    all_rois.append(rois)
                elif self.score_with_act:
                    init = np.ascontiguousarray(simulator.initial_scenes[task_id][::-1])
                    init128 = cv2.resize(init, (self.input_width, self.input_height),
                                         interpolation=cv2.INTER_NEAREST)
                    all_data.append(torch.from_numpy(init128))
                    all_acts.append(torch.from_numpy(act[None, :]))
                elif self.score_with_vid_cls:
                    rst_images = np.stack([np.ascontiguousarray(
                        cv2.resize(rst_image, (self.input_width, self.input_height),
                                   interpolation=cv2.INTER_NEAREST)[::-1]
                    ) for rst_image in sim.images])
                    all_data.append(torch.from_numpy(rst_images))
                else:
                    raise NotImplementedError

                if len(all_data) % batched_pred == 0 or act_id == len(acts) - 1:
                    conf_t = self.batch_score(all_data, all_acts, all_rois, all_image, objs_color, task)
                    confs = confs + conf_t
                    all_data, all_acts, all_rois, all_image = [], [], [], []

            if self.score_with_heuristic or self.score_with_mask:
                if not use_cache:
                    all_boxes = np.stack(all_boxes)
                    hickle.dump(all_boxes, boxes_cache_name, mode='w', compression='gzip')
                else:
                    assert valid_act_cnt == len(all_boxes)

            pred = np.array(confs) >= 0.5
            labels = np.array(successes)

            pos_all += (labels == 1).sum()
            neg_all += (labels == 0).sum()
            pos_correct += (pred == labels)[labels == 1].sum()
            neg_correct += (pred == labels)[labels == 0].sum()

            pos_acc = (pred == labels)[labels == 1].sum() / (labels == 1).sum()
            neg_acc = (pred == labels)[labels == 0].sum() / (labels == 0).sum()
            info = f'{pos_acc * 100:.1f} / {neg_acc * 100:.1f} '
            # info = f'{task}: '
            for j, num_acts in enumerate(num_all_actions):
                num_valid = np.sum(num_valid_act_idx[:num_acts])
                top_acc = np.array(successes[:num_valid])[np.argsort(confs[:num_valid])[::-1]]
                for i in range(100):
                    auccess[j, task_id, i] = int(np.sum(top_acc[:i + 1]) > 0)
                w = np.array([np.log(k + 1) - np.log(k) for k in range(1, 101)])
                s = auccess[j, :task_id + 1].sum(0) / auccess[j, :task_id + 1].shape[0]
                info += f'{np.sum(w * s) / np.sum(w) * 100:.2f} {np.sum(successes[:num_valid])}/{num_acts // 1000}k | '
            pprint(info)
        pprint(pos_correct, pos_all, pos_correct / pos_all)
        pprint(neg_correct, neg_all, neg_correct / neg_all)
        cache_output_dir = f'{self.output_dir.replace("figures/", "")}/' \
                           f'{self.proposal_setting}_{self.method}_{protocal}_fold_{fold_id}/'
        os.makedirs(cache_output_dir, exist_ok=True)
        print(cache_output_dir)
        stats = {
            'auccess': auccess,
            'p_c': pos_correct,
            'p_a': pos_all,
            'n_c': neg_correct,
            'n_a': neg_all,
        }
        with open(f'{cache_output_dir}/{start_id}_{end_id}.pkl', 'wb') as f:
            pickle.dump(stats, f, pickle.HIGHEST_PROTOCOL)