Beispiel #1
0
 def validate_step(self, feed_dict, metric, meters=None):
     feed_dict_np = as_numpy(feed_dict)
     feed_dict = mark_volatile(as_variable(feed_dict))
     output_dict = self._model(feed_dict)
     output_dict_np = as_numpy(output_dict)
     result = as_float(metric(feed_dict_np, output_dict_np))
     if meters is not None:
         meters.update(result)
     return result
Beispiel #2
0
 def validate_step(self, feed_dict, metric, meters=None):
     feed_dict_np = as_numpy(feed_dict)
     feed_dict = as_tensor(feed_dict)
     with torch.no_grad():
         output_dict = self._model(feed_dict)
     output_dict_np = as_numpy(output_dict)
     result = as_float(metric(feed_dict_np, output_dict_np))
     if meters is not None:
         meters.update(result)
     return result
Beispiel #3
0
def main():
    logger.critical('Loading the word embedding.')
    vocab, word_embeddings = load_word_embedding(args.vse)

    logger.critical('Building up the model.')
    model = CompletionModel(word_embeddings)
    if args.use_gpu:
        model.cuda()
    # Disable the cudnn benchmark.
    model.eval()
    cudnn.benchmark = False

    logger.critical('Loading the dataset.')

    dev_dataset = CompletionDataset(vocab, pjoin(args.data_dir, args.dev_img), pjoin(args.data_dir, args.dev_cap), mode=args.mode)
    test_dataset = CompletionDataset(vocab, pjoin(args.data_dir, args.test_img), pjoin(args.data_dir, args.test_cap), mode=args.mode)

    logger.critical('Building up the data loader.')
    dev_dataloader = make_dataloader(dev_dataset, num_workers=args.data_workers, batch_size=64, shuffle=False, drop_last=False, pin_memory=True)
    test_dataloader = make_dataloader(test_dataset, num_workers=args.data_workers, batch_size=64, shuffle=False, drop_last=False, pin_memory=True)

    for epoch_id in range(1, 11):
        load_weights(model, pjoin(args.load, 'epoch_{}.pth'.format(epoch_id)))

        for loader in [dev_dataloader, test_dataloader]:
            meters = GroupMeters()

            end = time.time()
            with tqdm_pbar(total=len(loader), leave=False) as pbar:
                for i, data in enumerate(loader):
                    feed_dict = data
                    feed_dict = mark_volatile(feed_dict)

                    if args.use_gpu:
                        feed_dict = async_copy_to(feed_dict, 0)

                    data_time = time.time() - end; end = time.time()

                    output_dict = model(feed_dict)
                    output_dict = as_numpy(output_dict)

                    gpu_time = time.time() - end;  end = time.time()

                    meters.update({k: float(v) for k, v in output_dict.items() if k.startswith('top')}, n=len(feed_dict['image']))
                    meters.update({'time/data': data_time, 'time/gpu': gpu_time})

                    pbar.set_description(format_meters('sentid={}'.format(i), meters.val, '{}={:.4f}', ', '))
                    pbar.update()

                    end = time.time()

            print(epoch_id, sorted(meters.avg.items()))
Beispiel #4
0
 def _inference_model(self, feed_dict):
     feed_dict = as_variable(feed_dict)
     mark_volatile(feed_dict)
     return as_numpy(self._model(feed_dict))
Beispiel #5
0
def run_episode(env,
                model,
                mode,
                number,
                play_name='',
                dump=False,
                dataset=None,
                eval_only=False,
                use_argmax=False,
                need_restart=False,
                entropy_beta=0.0):
    """Run one episode using the model with $number blocks."""
    is_over = False
    traj = collections.defaultdict(list)
    score = 0
    if need_restart:
        env.restart()

    optimal = None
    if args.task == 'path':
        optimal = env.unwrapped.dist
        relation = env.unwrapped.graph.get_edges()
        relation = np.stack([relation, relation.T], axis=-1).astype(dtype=np.float32)
        st, ed = env.current_state
        nodes_trajectory = [int(st)]
        destination = int(ed)
        policies = []
    elif args.task == 'sort':
        optimal = env.unwrapped.optimal
        array = [str(i) for i in env.unwrapped.array]

    # If dump_play=True, store the states and actions in a json file
    # for visualization.
    dump_play = args.dump_play and dump
    if dump_play:
        nr_objects = number + 1
        array = env.unwrapped.current_state
        moves, new_pos, policies = [], [], []

    if args.model == 'dlm':
        # by default network isn't in training mode during data collection
        # but with dlm we don't want to use argmax only
        # except in 2 cases (testing the interpretability or the last mining phase to get an interpretable policy):
        if ('inter' in mode) or (('mining' in mode) or ('inherit' in mode) and number == args.curriculum_graduate):
            model.lowernoise()
        else:
            model.train(True)

            if args.dlm_noise == 1 and (('mining' in mode) or ('inherit' in mode) or ('test' in mode)):
                model.lowernoise()
            elif args.dlm_noise == 2:
                model.lowernoise()

    step = 0
    while not is_over:
        if args.task == 'path':
            st, ed = env.current_state
            state = np.zeros((relation.shape[0], 2), dtype=np.float32)
            state[st, 0] = 1
            state[ed, 1] = 1
            feed_dict = dict(states=[np.array([state]), np.array([relation])])
        else:
            state = env.current_state
            if 'nlrl' not in args.task or args.task == 'sort':
                feed_dict = dict(states=np.array([state]))
            else:
                feed_dict = dict(states=state)
        feed_dict['entropy_beta'] = as_tensor(entropy_beta).float()
        feed_dict['training'] = as_tensor(False)
        feed_dict = as_tensor(feed_dict)

        with torch.set_grad_enabled(False):
            output_dict = model(feed_dict)
        policy = output_dict['policy']
        p = as_numpy(policy.data[0])
        action = p.argmax() if use_argmax else random.choice(len(p), p=p)
        if args.pred_weight != 0.0:
            # Need to ensure that the env.utils.MapActionProxy is the outermost class.
            mapped_x, mapped_y = env.mapping[action]
            # env.unwrapped to get the innermost Env class.
            valid = env.unwrapped.world.moveable(mapped_x, mapped_y)
        reward, is_over = env.action(action)
        step += 1
        if dump_play:
            moves.append([mapped_x, mapped_y])
            res = tuple(env.current_state[mapped_x][2:])
            new_pos.append((int(res[0]), int(res[1])))

            logits = as_numpy(output_dict['logits'].data[0])
            tops = np.argsort(p)[-10:][::-1]
            tops = list(
                map(lambda x: (env.mapping[x], float(p[x]), float(logits[x])), tops))
            policies.append(tops)
        # For now, assume reward=1 only when succeed, otherwise reward=0.
        # Manipulate the reward and get success information according to reward.
        if reward == 0 and args.penalty is not None:
            reward = args.penalty
        succ = 1 if is_over and reward > 0.99 else 0

        score += reward

        if type(feed_dict['states']) is list:
            traj['states'].append([f for f in feed_dict['states']])
        else:
            traj['states'].append(state)

        traj['rewards'].append(reward)
        traj['actions'].append(action)

        if args.pred_weight != 0.0:
            if not eval_only and dataset is not None and mapped_x != mapped_y:
                dataset.append(nr_objects, state, action, valid)

    # Dump json file as record of the playing.
    if dump_play and not (args.dump_fail_only and succ):
        array = array[:, 2:].astype('int32').tolist()
        array = [array[:nr_objects], array[nr_objects:]]
        json_str = json.dumps(
            # Let indent=True for an indented view of json files.
            dict(array=array, moves=moves, new_pos=new_pos,
                 policies=policies))
        dump_file = os.path.join(
            args.current_dump_dir,
            '{}_blocks{}.json'.format(play_name, env.unwrapped.nr_blocks))
        with open(dump_file, 'w') as f:
            f.write(json_str)

    length = step

    if args.model == 'dlm':
        model.restorenoise()

    return succ, score, traj, length, optimal
Beispiel #6
0
 def _inference_model(self, feed_dict):
     feed_dict = as_tensor(feed_dict)
     with torch.no_grad():
         return as_numpy(self._model(feed_dict))
Beispiel #7
0
def run_episode(env,
                model,
                number,
                play_name='',
                dump=False,
                dataset=None,
                eval_only=False,
                use_argmax=False,
                need_restart=False,
                entropy_beta=0.0):
    """Run one episode using the model with $number blocks."""
    is_over = False
    traj = collections.defaultdict(list)
    score = 0
    if need_restart:
        env.restart()
    nr_objects = number + 1
    # If dump_play=True, store the states and actions in a json file
    # for visualization.
    dump_play = args.dump_play and dump
    if dump_play:
        array = env.unwrapped.current_state
        moves, new_pos, policies = [], [], []

    while not is_over:
        state = env.current_state
        feed_dict = dict(states=np.array([state]))
        feed_dict['entropy_beta'] = as_tensor(entropy_beta).float()
        feed_dict = as_tensor(feed_dict)
        if args.use_gpu:
            feed_dict = as_cuda(feed_dict)

        with torch.set_grad_enabled(not eval_only):
            output_dict = model(feed_dict)
        policy = output_dict['policy']
        p = as_numpy(policy.data[0])
        action = p.argmax() if use_argmax else random.choice(len(p), p=p)
        # Need to ensure that the env.utils.MapActionProxy is the outermost class.
        mapped_x, mapped_y = env.mapping[action]
        # env.unwrapped to get the innermost Env class.
        valid = env.unwrapped.world.moveable(mapped_x, mapped_y)
        reward, is_over = env.action(action)
        if dump_play:
            moves.append([mapped_x, mapped_y])
            res = tuple(env.current_state[mapped_x][2:])
            new_pos.append((int(res[0]), int(res[1])))

            logits = as_numpy(output_dict['logits'].data[0])
            tops = np.argsort(p)[-10:][::-1]
            tops = list(
                map(lambda x: (env.mapping[x], float(p[x]), float(logits[x])),
                    tops))
            policies.append(tops)

        # For now, assume reward=1 only when succeed, otherwise reward=0.
        # Manipulate the reward and get success information according to reward.
        if reward == 0 and args.penalty is not None:
            reward = args.penalty
        succ = 1 if is_over and reward > 0.99 else 0

        score += reward
        traj['states'].append(state)
        traj['rewards'].append(reward)
        traj['actions'].append(action)
        if not eval_only and dataset is not None and mapped_x != mapped_y:
            dataset.append(nr_objects, state, action, valid)

    # Dump json file as record of the playing.
    if dump_play and not (args.dump_fail_only and succ):
        array = array[:, 2:].astype('int32').tolist()
        array = [array[:nr_objects], array[nr_objects:]]
        json_str = json.dumps(
            # Let indent=True for an indented view of json files.
            dict(array=array, moves=moves, new_pos=new_pos, policies=policies))
        dump_file = os.path.join(
            args.current_dump_dir,
            '{}_blocks{}.json'.format(play_name, env.unwrapped.nr_blocks))
        with open(dump_file, 'w') as f:
            f.write(json_str)

    length = len(traj['rewards'])
    return succ, score, traj, length
Beispiel #8
0
def run_episode(env,
                model,
                number,
                play_name='',
                dump=False,
                eval_only=False,
                use_argmax=False,
                need_restart=False,
                entropy_beta=0.0):
    """Run one episode using the model with $number nodes/numbers."""
    is_over = False
    traj = collections.defaultdict(list)
    score = 0
    moves = []
    # If dump_play=True, store the states and actions in a json file
    # for visualization.
    dump_play = args.dump_play and dump

    if need_restart:
        env.restart()

    if args.is_path_task:
        optimal = env.unwrapped.dist
        relation = env.unwrapped.graph.get_edges()
        relation = np.stack([relation, relation.T], axis=-1)
        st, ed = env.current_state
        nodes_trajectory = [int(st)]
        destination = int(ed)
        policies = []
    elif args.is_sort_task:
        optimal = env.unwrapped.optimal
        array = [str(i) for i in env.unwrapped.array]

    while not is_over:
        if args.is_path_task:
            st, ed = env.current_state
            state = np.zeros((relation.shape[0], 2))
            state[st, 0] = 1
            state[ed, 1] = 1
            feed_dict = dict(states=np.array([state]),
                             relations=np.array([relation]))
        elif args.is_sort_task:
            state = env.current_state
            feed_dict = dict(states=np.array([state]))
        feed_dict['entropy_beta'] = as_tensor(entropy_beta).float()
        feed_dict = as_tensor(feed_dict)
        if args.use_gpu:
            feed_dict = as_cuda(feed_dict)

        with torch.set_grad_enabled(not eval_only):
            output_dict = model(feed_dict)

        policy = output_dict['policy']
        p = as_numpy(policy.data[0])
        action = p.argmax() if use_argmax else random.choice(len(p), p=p)
        reward, is_over = env.action(action)

        # collect moves information
        if dump_play:
            if args.is_path_task:
                moves.append(int(action))
                nodes_trajectory.append(int(env.current_state[0]))
                logits = as_numpy(output_dict['logits'].data[0])
                tops = np.argsort(p)[-10:][::-1]
                tops = list(
                    map(lambda x: (int(x), float(p[x]), float(logits[x])),
                        tops))
                policies.append(tops)
            if args.is_sort_task:
                # Need to ensure that env.utils.MapActionProxy is the outermost class.
                mapped_x, mapped_y = env.mapping[action]
                moves.append([mapped_x, mapped_y])

        # For now, assume reward=1 only when succeed, otherwise reward=0.
        # Manipulate the reward and get success information according to reward.
        if reward == 0 and args.penalty is not None:
            reward = args.penalty
        succ = 1 if is_over and reward > 0.99 else 0

        score += reward
        traj['states'].append(state)
        if args.is_path_task:
            traj['relations'].append(relation)
        traj['rewards'].append(reward)
        traj['actions'].append(action)

    # dump json file storing information of playing
    if dump_play and not (args.dump_fail_only and succ):
        if args.is_path_task:
            num = env.unwrapped.nr_nodes
            graph = relation[:, :, 0].tolist()
            coordinates = env.unwrapped.graph.get_coordinates().tolist()
            json_str = json.dumps(
                dict(graph=graph,
                     coordinates=coordinates,
                     policies=policies,
                     destination=destination,
                     current=nodes_trajectory,
                     moves=moves))
        if args.is_sort_task:
            num = env.unwrapped.nr_numbers
            json_str = json.dumps(dict(array=array, moves=moves))
        dump_file = os.path.join(args.current_dump_dir,
                                 '{}_size{}.json'.format(play_name, num))
        with open(dump_file, 'w') as f:
            f.write(json_str)

    length = len(traj['rewards'])
    return succ, score, traj, length, optimal