Exemple #1
0
    def _add_attributes(self, env, dataset):
        """ Adds attributes to Agent """
        self._sample_timer = Timer('sample')
        self._learn_timer = Timer('train')

        self._return_stats = getattr(self, '_return_stats', False)

        self.RECORD = getattr(self, 'RECORD', False)
        self.N_EVAL_EPISODES = getattr(self, 'N_EVAL_EPISODES', 1)

        # intervals between calling self._summary
        self._to_summary = Every(self.LOG_PERIOD, self.LOG_PERIOD)
    def toggle_convert_map(self) -> None:
        if isinstance(self._services.algorithm.map, DenseMap):
            self._services.debug.write("Converting map to SparseMap",
                                       DebugLevel.BASIC)
            mp = self._services.algorithm.map.convert_to_sparse_map()
        elif isinstance(self._services.algorithm.map, SparseMap):
            self._services.debug.write("Converting map to DenseMap",
                                       DebugLevel.BASIC)
            mp = self._services.algorithm.map.convert_to_dense_map()
        else:
            self._services.debug.write("Map conversion not applicable",
                                       DebugLevel.BASIC)
            return

        if mp is None:
            self._services.debug.write("Map conversion not applicable",
                                       DebugLevel.BASIC)
            return

        self.reset()
        timer: Timer = Timer()
        self._services.algorithm.map = mp
        self._services.debug.write(
            "Done converting. Total time: " + str(timer.stop()),
            DebugLevel.BASIC)
        self._services.debug.write(self._services.algorithm.map,
                                   DebugLevel.MEDIUM)
        self._services.ev_manager.post(KeyFrameEvent())
Exemple #3
0
 def wrapper(*args, **kwargs):
     self.write("Started: {}".format(func.__name__), debug_level)
     timer = Timer()
     res = func(*args, **kwargs)
     exec_time = timer.stop()
     self.write("Finished: {}, execution time: {} seconds".format(func.__name__, round(exec_time, 2)), debug_level)
     return res
Exemple #4
0
 def algorithm_start(self) -> None:
     """
     Marks the start of the algorithms
     """
     self._services.debug.write(
         "Algorithm " + str(self._services.algorithm.algorithm_type) +
         " started..", DebugLevel.BASIC)
     self.timer = Timer()
Exemple #5
0
 def __init__(self, services: Services, model: Model,
              root_view: Optional[View]) -> None:
     super().__init__(services, model, root_view)
     self._services.ev_manager.register_tick_listener(self)
     self.__frame_timer = Timer()
     self.__frame_count = 0
     self.__frame_mvg_average = 0
     self.__fps = 0
     self.__screen = None
     self._initialised = False
    def __init__(self, services: Services) -> None:
        super().__init__(services)
        self._services.ev_manager.register_tick_listener(self)
        self.last_thread = None
        self.key_frame_is_paused = False
        self.cv = Condition()
        self.processing_key_frame = False
        self.frame_timer = Timer()
        self.speed = 1

        self._services.algorithm.set_root()
Exemple #7
0
        def _run(self, replay):
            def collect(*args, **kwargs):
                self._collect(*args, **kwargs)
                if self.buffer.is_full():
                    self._send_data(replay)

            start_step = self.runner.step
            with Timer('run') as rt:
                self.env_step = self.runner.run(step_fn=collect)
            self._info['time/run'] = rt.average()

            return self.env_step - start_step
Exemple #8
0
    def __init__(self, cmd_router):
        self.cmd_router = cmd_router

        self.broadcast_interval = 3
        self.broadcast_timer = Timer()

        self.player_dict = {}

        self.cmd_router.register_cmd_handler(Cmd.player_join,
                                             self.recv_player_join)
        self.cmd_router.register_cmd_handler(Cmd.player_transform,
                                             self.recv_player_transform)
        pass
Exemple #9
0
        def _act_loop(self, workers, learner, monitor):
            objs = {
                workers[wid].env_output.remote(eid): (wid, eid)
                for wid in range(self._wpa) for eid in range(self._n_envvecs)
            }

            self.env_step = 0
            while True:
                # retrieve ready objs
                with Timer('wait') as wt:
                    ready_objs, _ = ray.wait(list(objs),
                                             num_returns=self._action_batch)
                n_ready = len(ready_objs)
                wids, eids = zip(*[objs.pop(i) for i in ready_objs])
                env_output = EnvOutput(*[
                    np.concatenate(v, axis=0)
                    for v in zip(*ray.get(ready_objs))
                ])
                if self._act_eps_mapping is not None:
                    self._act_eps = np.reshape(
                        self._act_eps_mapping[wids, eids],
                        (-1) if self._action_shape == () else (-1, 1))
                assert len(wids) == len(eids) == n_ready, \
                    (len(wids), len(eids), n_ready)

                actions, terms = self(wids, eids, env_output)

                # distribute action and terms
                actions = np.split(actions, n_ready)
                terms = [
                    list(itertools.product([k], np.split(v, n_ready)))
                    for k, v in terms.items()
                ]
                terms = [dict(v) for v in zip(*terms)]

                # environment step
                objs.update({
                    workers[wid].env_step.remote(eid, a, t): (wid, eid)
                    for wid, eid, a, t in zip(wids, eids, actions, terms)
                })

                self.env_step += n_ready * self._n_envs
                if self._to_sync(self.env_step):
                    self.pull_weights(learner)
                    monitor.record_run_stats.remote(
                        **{
                            'time/wait_env': wt.average(),
                            'n_ready': n_ready
                        })
    def tick(self) -> None:
        if self.processing_key_frame:
            self.processing_key_frame = False
            with self.cv:
                self.requires_key_frame = False
                self.cv.notify_all()

        if not self.key_frame_is_paused and self.last_thread is not None:
            MAX_FRAME_DT = 1 / 16
            dt = self.frame_timer.stop()
            if self.requires_key_frame or (dt < MAX_FRAME_DT):
                with self.cv:
                    if cond_var_wait_for(self.cv,
                                         lambda: self.requires_key_frame or
                                         self.last_thread is None,
                                         timeout=(MAX_FRAME_DT - dt)):
                        self.processing_key_frame = True
                        self._services.ev_manager.post(KeyFrameEvent())
                self.frame_timer = Timer()
Exemple #11
0
    def update(self) -> None:
        if self.__frame_timer.stop() >= 1:
            if self.__frame_mvg_average == 0:
                self.__fps = self.__frame_count
            else:
                self.__fps += (self.__frame_count -
                               self.__fps) / self.__frame_mvg_average
            self.__frame_mvg_average = min(self.__frame_mvg_average + 1,
                                           self.MVG_AVG_SIZE)
            self.__frame_count = 0
            self.__frame_timer = Timer()
            self._services.debug.write("FPS: " + str(self.__fps),
                                       DebugLevel.MEDIUM)

        for child in self._children:
            child.update()

        # update window
        self._services.graphics.window.update()
        self.__frame_count += 1
Exemple #12
0
def train(agent, env, eval_env, replay):
    collect_fn = pkg.import_module('agent', algo=agent.name).collect
    collect = functools.partial(collect_fn, replay)

    env_step = agent.env_step
    runner = Runner(env, agent, step=env_step, nsteps=agent.TRAIN_PERIOD)
    while not replay.good_to_learn():
        env_step = runner.run(
            # NOTE: random action below makes a huge difference for Mujoco tasks
            # by default, we don't use it as it's not a conventional practice.
            # action_selector=env.random_action,
            step_fn=collect)

    to_eval = Every(agent.EVAL_PERIOD)
    to_log = Every(agent.LOG_PERIOD, agent.LOG_PERIOD)
    to_eval = Every(agent.EVAL_PERIOD)
    to_record = Every(agent.EVAL_PERIOD * 10)
    rt = Timer('run')
    tt = Timer('train')
    et = Timer('eval')
    lt = Timer('log')
    print('Training starts...')
    while env_step <= int(agent.MAX_STEPS):
        with rt:
            env_step = runner.run(step_fn=collect)
        with tt:
            agent.learn_log(env_step)

        if to_eval(env_step):
            with TempStore(agent.get_states, agent.reset_states):
                with et:
                    record = agent.RECORD and to_record(env_step)
                    eval_score, eval_epslen, video = evaluate(
                        eval_env,
                        agent,
                        n=agent.N_EVAL_EPISODES,
                        record=agent.RECORD,
                        size=(64, 64))
                    if record:
                        video_summary(f'{agent.name}/sim',
                                      video,
                                      step=env_step)
                    agent.store(eval_score=eval_score, eval_epslen=eval_epslen)

        if to_log(env_step):
            with lt:
                fps = rt.average() * agent.TRAIN_PERIOD
                tps = tt.average() * agent.N_UPDATES

                agent.store(
                    env_step=agent.env_step,
                    train_step=agent.train_step,
                    fps=fps,
                    tps=tps,
                )
                agent.store(
                    **{
                        'train_step': agent.train_step,
                        'time/run': rt.total(),
                        'time/train': tt.total(),
                        'time/eval': et.total(),
                        'time/log': lt.total(),
                        'time/run_mean': rt.average(),
                        'time/train_mean': tt.average(),
                        'time/eval_mean': et.average(),
                        'time/log_mean': lt.average(),
                    })
                agent.log(env_step)
                agent.save()
Exemple #13
0
def train(agent, env, eval_env, buffer):
    collect_fn = pkg.import_module('agent', algo=agent.name).collect
    collect = functools.partial(collect_fn, buffer)

    step = agent.env_step
    runner = Runner(env, agent, step=step, nsteps=agent.N_STEPS)
    exp_buffer = get_expert_data(f'{buffer.DATA_PATH}-{env.name}')

    if step == 0 and agent.is_obs_normalized:
        print('Start to initialize running stats...')
        for _ in range(10):
            runner.run(action_selector=env.random_action, step_fn=collect)
            agent.update_obs_rms(np.concatenate(buffer['obs']))
            agent.update_reward_rms(buffer['reward'], buffer['discount'])
            buffer.reset()
        buffer.clear()
        agent.save(print_terminal_info=True)

    runner.step = step
    # print("Initial running stats:", *[f'{k:.4g}' for k in agent.get_running_stats() if k])
    to_log = Every(agent.LOG_PERIOD, agent.LOG_PERIOD)
    to_eval = Every(agent.EVAL_PERIOD)
    rt = Timer('run')
    tt = Timer('train')
    et = Timer('eval')
    lt = Timer('log')
    print('Training starts...')
    while step < agent.MAX_STEPS:
        start_env_step = agent.env_step
        agent.before_run(env)
        with rt:
            step = runner.run(step_fn=collect)
        agent.store(fps=(step - start_env_step) / rt.last())
        buffer.reshape_to_sample()
        agent.disc_learn_log(exp_buffer)
        buffer.compute_reward_with_func(agent.compute_reward)
        buffer.reshape_to_store()

        # NOTE: normalizing rewards here may introduce some inconsistency
        # if normalized rewards is fed as an input to the network.
        # One can reconcile this by moving normalization to collect
        # or feeding the network with unnormalized rewards.
        # The latter is adopted in our implementation.
        # However, the following line currently doesn't store
        # a copy of unnormalized rewards
        agent.update_reward_rms(buffer['reward'], buffer['discount'])
        buffer.update('reward',
                      agent.normalize_reward(buffer['reward']),
                      field='all')
        agent.record_last_env_output(runner.env_output)
        value = agent.compute_value()
        buffer.finish(value)

        start_train_step = agent.train_step
        with tt:
            agent.learn_log(step)
        agent.store(tps=(agent.train_step - start_train_step) / tt.last())
        buffer.reset()

        if to_eval(agent.train_step) or step > agent.MAX_STEPS:
            with TempStore(agent.get_states, agent.reset_states):
                with et:
                    eval_score, eval_epslen, video = evaluate(
                        eval_env,
                        agent,
                        n=agent.N_EVAL_EPISODES,
                        record=agent.RECORD,
                        size=(64, 64))
                if agent.RECORD:
                    video_summary(f'{agent.name}/sim', video, step=step)
                agent.store(eval_score=eval_score, eval_epslen=eval_epslen)

        if to_log(agent.train_step) and agent.contains_stats('score'):
            with lt:
                agent.store(
                    **{
                        'train_step': agent.train_step,
                        'time/run': rt.total(),
                        'time/train': tt.total(),
                        'time/eval': et.total(),
                        'time/log': lt.total(),
                        'time/run_mean': rt.average(),
                        'time/train_mean': tt.average(),
                        'time/eval_mean': et.average(),
                        'time/log_mean': lt.average(),
                    })
                agent.log(step)
                agent.save()
def get_mockingjay_args():

    parser = argparse.ArgumentParser(
        description='Argument Parser for the mockingjay project.')

    # setting
    parser.add_argument('--config',
                        default='config/mockingjay_libri.yaml',
                        type=str,
                        help='Path to experiment config.')
    parser.add_argument('--seed',
                        default=1337,
                        type=int,
                        help='Random seed for reproducable results.',
                        required=False)

    # Logging
    parser.add_argument('--logdir',
                        default='log/log_mockingjay/',
                        type=str,
                        help='Logging path.',
                        required=False)
    parser.add_argument('--name',
                        default=None,
                        type=str,
                        help='Name for logging.',
                        required=False)

    # model ckpt
    parser.add_argument(
        '--load',
        action='store_true',
        help=
        'Load pre-trained model to restore training, no need to specify this during testing.'
    )
    parser.add_argument('--ckpdir',
                        default='result/result_mockingjay/',
                        type=str,
                        help='Checkpoint/Result path.',
                        required=False)
    parser.add_argument(
        '--ckpt',
        default='mockingjay_libri_sd1337_LinearLarge/mockingjay-500000.ckpt',
        type=str,
        help='path to mockingjay model checkpoint.',
        required=False)
    # parser.add_argument('--ckpt', default='mockingjay_libri_sd1337_MelBase/mockingjay-500000.ckpt', type=str, help='path to mockingjay model checkpoint.', required=False)
    parser.add_argument(
        '--dckpt',
        default=
        'baseline_sentiment_libri_sd1337/baseline_sentiment-500000.ckpt',
        type=str,
        help='path to downstream checkpoint.',
        required=False)
    parser.add_argument(
        '--apc_path',
        default='./result/result_apc/apc_libri_sd1337_standard/apc-500000.ckpt',
        type=str,
        help='path to the apc model checkpoint.',
        required=False)

    # mockingjay
    parser.add_argument('--train',
                        action='store_true',
                        help='Train the model.')
    parser.add_argument(
        '--run_mockingjay',
        action='store_true',
        help=
        'train and test the downstream tasks using mockingjay representations.'
    )
    parser.add_argument(
        '--run_apc',
        action='store_true',
        help='train and test the downstream tasks using apc representations.')
    parser.add_argument(
        '--fine_tune',
        action='store_true',
        help='fine tune the mockingjay model with downstream task.')
    parser.add_argument('--plot',
                        action='store_true',
                        help='Plot model generated results during testing.')

    # phone task
    parser.add_argument(
        '--train_phone',
        action='store_true',
        help='Train the phone classifier on mel or mockingjay representations.'
    )
    parser.add_argument(
        '--test_phone',
        action='store_true',
        help=
        'Test mel or mockingjay representations using the trained phone classifier.'
    )

    # sentiment task
    parser.add_argument(
        '--train_sentiment',
        action='store_true',
        help=
        'Train the sentiment classifier on mel or mockingjay representations.')
    parser.add_argument(
        '--test_sentiment',
        action='store_true',
        help=
        'Test mel or mockingjay representations using the trained sentiment classifier.'
    )

    # speaker verification task
    parser.add_argument(
        '--train_speaker',
        action='store_true',
        help=
        'Train the speaker classifier on mel or mockingjay representations.')
    parser.add_argument(
        '--test_speaker',
        action='store_true',
        help=
        'Test mel or mockingjay representations using the trained speaker classifier.'
    )

    # Options
    parser.add_argument(
        '--with_head',
        action='store_true',
        help=
        'inference with the spectrogram head, the model outputs spectrogram.')
    parser.add_argument('--output_attention',
                        action='store_true',
                        help='plot attention')
    parser.add_argument(
        '--load_ws',
        default=
        'result/result_mockingjay_sentiment/10111754-10170300-weight_sum/best_val.ckpt',
        help='load weighted-sum weights from trained downstream model')
    parser.add_argument('--cpu',
                        action='store_true',
                        help='Disable GPU training.')
    parser.add_argument('--no-msg',
                        action='store_true',
                        help='Hide all messages.')

    args = parser.parse_args()
    setattr(args, 'gpu', not args.cpu)
    setattr(args, 'verbose', not args.no_msg)
    config = yaml.load(open(args.config, 'r'))
    config['timer'] = Timer()

    return config, args
Exemple #15
0
        # when computing usage in Freeness.usage
        tensors_to_check = [
            inputs, initial_state.access_output, 
            initial_state.access_state.memory, 
            initial_state.access_state.read_weights,
            initial_state.access_state.linkage.link, 
            initial_state.access_state.linkage.precedence_weights,
            initial_state.controller_state
        ]

        theoretical, numerical = tf.test.compute_gradient(
            forward,
            tensors_to_check,
            delta=1e-5)
        err = 0
        for a1, a2 in zip(theoretical, numerical):
            err = np.maximum(err, np.max(np.abs(a1-a2)))

        assert err < .1

if __name__ == '__main__':
    from utility.timer import Timer
    writer = tf.summary.create_file_writer(f'logs/dnc_profiler')
    writer.set_as_default()
    test = TestClass()
    tf.summary.trace_on(profiler=True)
    with Timer('gradient', 1):
        test.test_gradients()
    tf.summary.trace_export('grad', step=0, profiler_outdir='logs/dnc_profiler')
    writer.flush()
Exemple #16
0
 def initialise(self) -> None:
     self.__frame_timer = Timer()
    def generate_map_from_image(self,
                                image_name: str,
                                rand_entities: bool = False,
                                entity_radius: int = None,
                                house_expo_flag: bool = False) -> Map:
        """
        Generate a map from an image
        Load the image from the default location and save the map in the default location
        :param image_name: The image name
        :return: The map
        """
        self.__services.debug.write(
            "Started map generation from image: " + str(image_name) +
            " With House_expo = " + str(house_expo_flag), DebugLevel.BASIC)
        timer: Timer = Timer()

        # loading image
        if house_expo_flag:
            surface: np.ndarray = self.__services.resources.house_expo_dir.load(
                image_name)
        else:
            surface: np.ndarray = self.__services.resources.images_dir.load(
                image_name)
        height, width, _ = surface.shape
        self.__services.debug.write(
            "Image loaded with Resolution:" + str(width) + " x " + str(height),
            DebugLevel.HIGH)

        grid = np.full(surface.shape[:-1], Map.CLEAR_ID, dtype=np.uint8)
        agent_avg_location: np.ndarray = np.array([.0, .0])
        agent_avg_count: int = 1
        goal_avg_location: np.ndarray = np.array([.0, .0])

        if house_expo_flag:
            '''
            We can optimize for house_expo dataset by skipping the check for the goal and agent at each pixel,
            instead, we can only identify obstacles
            '''
            self.__services.debug.write("Begin iteration through map",
                                        DebugLevel.HIGH)
            for idx in np.ndindex(surface.shape[:-1]):
                if Generator.is_in_color_range(surface[idx],
                                               Generator.WALL_COLOR):
                    grid[idx] = DenseMap.WALL_ID
        else:
            for idx in np.ndindex(surface.shape[:-1]):
                if Generator.is_in_color_range(surface[idx],
                                               Generator.AGENT_COLOR, 5):
                    agent_avg_location, agent_avg_count = \
                        Generator.increment_moving_average(agent_avg_location, agent_avg_count, np.array(idx[::-1]))
                elif Generator.is_in_color_range(surface[idx],
                                                 Generator.GOAL_COLOR, 5):
                    goal_avg_location, goal_avg_count = \
                        Generator.increment_moving_average(goal_avg_location, goal_avg_count, np.array(idx[::-1]))
                if Generator.is_in_color_range(surface[idx],
                                               Generator.WALL_COLOR):
                    grid[idx] = DenseMap.WALL_ID

        agent_avg_location = np.array(agent_avg_location, dtype=int)
        goal_avg_location = np.array(goal_avg_location, dtype=int)
        agent_radius: float = 0

        if rand_entities:
            self.__place_random_agent_and_goal(grid, Size(height, width))
            self.__services.debug.write("Placed random agent and goal ",
                                        DebugLevel.HIGH)
        else:
            grid[tuple(agent_avg_location[::-1])] = DenseMap.AGENT_ID
            grid[tuple(goal_avg_location[::-1])] = DenseMap.GOAL_ID

        if not house_expo_flag:
            '''
            We can optimize the house_expo generation by skipping this step, 
            as we have already defined the agent radius
            '''
            self.__services.debug.write(
                "Skipped agent_radius change checking ", DebugLevel.HIGH)

            for idx in np.ndindex(surface.shape[:-1]):
                if Generator.is_in_color_range(surface[idx],
                                               Generator.AGENT_COLOR, 5):
                    '''
                    If color at x y is red (agent) then change the radius of the agent to the max 
                    Change the agent radius to the max between the old radius (from previous iteration )
                    and the magnitude of the agent location - the point 
                        and the magnitude of the agent location - the point 
                    and the magnitude of the agent location - the point 
                    This basically defines the agent radius as the largest red size. But we don't need to do
                    this as we are supplying our own radius
                    '''
                    agent_radius = max(
                        agent_radius,
                        np.linalg.norm(agent_avg_location -
                                       np.array(idx[::-1])))

        agent_radius = int(agent_radius)

        if entity_radius:
            agent_radius = entity_radius

        res_map: DenseMap = DenseMap(grid)
        res_map.agent.radius = agent_radius
        res_map.goal.radius = agent_radius

        self.__services.debug.write(
            "Generated initial dense map in " + str(timer.stop()) + " seconds",
            DebugLevel.BASIC)
        timer = Timer()
        res_map.extend_walls()
        self.__services.debug.write(
            "Extended walls in " + str(timer.stop()) + " seconds",
            DebugLevel.BASIC)
        map_name: str = str(image_name.split('.')[0]) + ".pickle"
        if house_expo_flag:
            path = os.path.join(os.path.join(DATA_PATH, "maps"), "house_expo")
            self.__services.resources.house_expo_dir.save(
                map_name, res_map, path)
        else:
            self.__services.resources.maps_dir.save(map_name, res_map)
        self.__services.debug.write(
            "Finished generation. Map is in resources folder",
            DebugLevel.BASIC)
        return res_map
Exemple #18
0
def train(agent, env, eval_env, buffer):
    def collect(env, step, reset, next_obs, **kwargs):
        buffer.add(**kwargs)

    step = agent.env_step
    runner = Runner(env, agent, step=step, nsteps=agent.N_STEPS)
    actsel = lambda *args, **kwargs: np.random.randint(
        0, env.action_dim, size=env.n_envs)
    if not agent.rnd_rms_restored():
        print('Start to initialize observation running stats...')
        for _ in range(50):
            runner.run(action_selector=actsel, step_fn=collect)
            agent.update_obs_rms(buffer['obs'])
            buffer.reset()
        buffer.clear()
        agent.save()
        runner.step = step

    to_log = Every(agent.LOG_PERIOD, agent.LOG_PERIOD)
    to_eval = Every(agent.EVAL_PERIOD)
    print('Training starts...')
    while step < agent.MAX_STEPS:
        start_env_step = agent.env_step
        with Timer('env') as rt:
            step = runner.run(step_fn=collect)
        agent.store(fps=(step - start_env_step) / rt.last())

        agent.record_last_env_output(runner.env_output)
        value_int, value_ext = agent.compute_value()
        obs = buffer.get_obs(runner.env_output.obs)
        assert obs.shape[:2] == (env.n_envs, agent.N_STEPS + 1)
        assert obs.dtype == np.uint8
        agent.update_obs_rms(obs[:, :-1])
        norm_obs = agent.normalize_obs(obs)
        # compute intrinsic reward from the next normalized obs
        reward_int = agent.compute_int_reward(norm_obs[:, 1:])
        agent.update_int_reward_rms(reward_int)
        reward_int = agent.normalize_int_reward(reward_int)
        buffer.finish(reward_int, norm_obs[:, :-1], value_int, value_ext)
        agent.store(
            reward_int_max=np.max(reward_int),
            reward_int_min=np.min(reward_int),
            reward_int=np.mean(reward_int),
            reward_int_std=np.std(reward_int),
        )

        start_train_step = agent.train_step
        with Timer('train') as tt:
            agent.learn_log(step)
        agent.store(tps=(agent.train_step - start_train_step) / tt.last())
        buffer.reset()

        if to_eval(agent.train_step):
            with TempStore(agent.get_states, agent.reset_states):
                scores, epslens, video = evaluate(eval_env,
                                                  agent,
                                                  record=True,
                                                  video_len=4500)
                video_summary(f'{agent.name}/sim', video, step=step)
                if eval_env.n_envs == 1:
                    rews_int, rews_ext = agent.retrieve_eval_rewards()
                    assert len(rews_ext) == len(rews_int) == video.shape[1], (
                        len(rews_ext), len(rews_int), video.shape[1])
                    n = 10
                    idxes_int = rews_int.argsort()[::-1][:n]
                    idxes_ext = rews_ext.argsort()[::-1][:n]
                    assert idxes_int.shape == idxes_ext.shape, (
                        idxes_int.shape, idxes_ext.shape)

                    imgs_int = video[0, idxes_int]
                    imgs_ext = video[0, idxes_ext]
                    rews_int = rews_int[idxes_int]
                    rews_ext = rews_ext[idxes_ext]
                    terms = {
                        **{
                            f'eval/reward_int_{i}': rews_int[i]
                            for i in range(0, n)
                        },
                        **{
                            f'eval/reward_ext_{i}': rews_ext[i]
                            for i in range(0, n)
                        },
                    }
                    agent.store(**terms)
                    imgs = np.concatenate([imgs_int[:n], imgs_ext[:n]], 0)
                    image_summary(f'{agent.name}/img', imgs, step=step)

                    # info = eval_env.info()[0]
                    # episode = info.get('episode', {'visited_rooms': 1})
                    # agent.store(visited_rooms_max=len(episode['visited_rooms']))
                    agent.histogram_summary(
                        {'eval/action': agent.retrieve_eval_actions()},
                        step=step)
                agent.store(eval_score=scores, eval_epslen=epslens)

        if to_log(agent.train_step) and agent.contains_stats('score'):
            agent.store(
                **{
                    'episodes': runner.episodes,
                    'train_step': agent.train_step,
                    'time/run': rt.total(),
                    'time/train': tt.total()
                })
            agent.log(step)
            agent.save()
Exemple #19
0
def train(agent, env, eval_env, replay):
    collect_fn = pkg.import_module('agent', algo=agent.name).collect
    collect = functools.partial(collect_fn, replay)

    em = pkg.import_module(env.name.split("_")[0], pkg='env')
    info_func = em.info_func if hasattr(em, 'info_func') else None

    env_step = agent.env_step
    runner = Runner(env,
                    agent,
                    step=env_step,
                    run_mode=RunMode.TRAJ,
                    info_func=info_func)
    agent.TRAIN_PERIOD = env.max_episode_steps
    while not replay.good_to_learn():
        env_step = runner.run(step_fn=collect)
        replay.finish_episodes()

    to_eval = Every(agent.EVAL_PERIOD)
    to_log = Every(agent.LOG_PERIOD, agent.LOG_PERIOD)
    to_eval = Every(agent.EVAL_PERIOD)
    to_record = Every(agent.EVAL_PERIOD * 10)
    rt = Timer('run')
    tt = Timer('train')
    # et = Timer('eval')
    lt = Timer('log')
    print('Training starts...')
    while env_step <= int(agent.MAX_STEPS):
        with rt:
            env_step = runner.run(step_fn=collect)
        replay.finish_episodes()
        assert np.all(runner.env_output.reset), \
            (runner.env_output.reset, env.info().get('score', 0), env.info().get('epslen', 0))
        with tt:
            agent.learn_log(env_step)

        # if to_eval(env_step):
        #     with TempStore(agent.get_states, agent.reset_states):
        #         with et:
        #             record = agent.RECORD and to_record(env_step)
        #             eval_score, eval_epslen, video = evaluate(
        #                 eval_env, agent, n=agent.N_EVAL_EPISODES,
        #                 record=agent.RECORD, size=(64, 64))
        #             if record:
        #                 video_summary(f'{agent.name}/sim', video, step=env_step)
        #             agent.store(
        #                 eval_score=eval_score,
        #                 eval_epslen=eval_epslen)

        if to_log(env_step):
            with lt:
                fps = rt.average() * agent.TRAIN_PERIOD
                tps = tt.average() * agent.N_UPDATES

                agent.store(
                    env_step=agent.env_step,
                    train_step=agent.train_step,
                    fps=fps,
                    tps=tps,
                )
                agent.store(
                    **{
                        'train_step': agent.train_step,
                        'time/run': rt.total(),
                        'time/train': tt.total(),
                        # 'time/eval': et.total(),
                        'time/log': lt.total(),
                        'time/run_mean': rt.average(),
                        'time/train_mean': tt.average(),
                        # 'time/eval_mean': et.average(),
                        'time/log_mean': lt.average(),
                    })
                agent.log(env_step)
                agent.save()
Exemple #20
0
    def exec(self):
        ''' Testing of downstream tasks'''
        self.verbose('Testing set total ' + str(len(self.dataloader)) + ' batches.')
        timer = Timer()
        timer.start()

        valid_count = 0
        correct_count = 0
        loss_sum = 0
        all_logits = []

        oom_counter = 0
        for features, labels in tqdm(self.dataloader, desc="Iteration"):
            with torch.no_grad():
                try:
                    # features: (1, batch_size, seq_len, feature)
                    # dimension of labels is depends on task and dataset, but the first dimention is always trivial due to bucketing
                    labels = labels.squeeze(0).to(device=self.device)

                    if self.run_mockingjay and self.paras.with_head:
                        # representations shape: (batch_size, seq_len, feature)
                        representations = self.mockingjay.forward_with_head(features, process_from_loader=True)
                        features = self.up_sample_frames(features[0].squeeze(0))
                    elif self.run_mockingjay and self.fine_tune:
                        # representations shape: (batch_size, seq_len, feature)
                        representations = self.mockingjay.forward_fine_tune(features, tile=False if 'speaker' in self.task else True, process_from_loader=True)
                        features = self.up_sample_frames(features[0].squeeze(0)) if 'speaker' not in self.task else features[0].squeeze(0)
                    elif self.run_mockingjay:
                        # representations shape: (batch_size, layer, seq_len, feature)
                        representations = self.mockingjay.forward(features, tile=False if 'speaker' in self.task else True, process_from_loader=True)
                        features = self.up_sample_frames(features[0].squeeze(0)) if 'speaker' not in self.task else features[0].squeeze(0)
                    elif self.run_apc:
                        # representations shape: (batch_size, layer, seq_len, feature)
                        representations = self.apc.forward(features)
                        features = features.squeeze(0)
                    else:
                        # representations shape: (batch_size, seq_len, feature)
                        features = features.squeeze(0)
                        representations = features.to(device=self.device, dtype=torch.float32)

                    # Since zero padding technique, some timestamps of features are not valid
                    # For each timestamps, we mark 1 on valid timestamps, and 0 otherwise
                    # This variable can be useful for frame-wise metric, like phoneme recognition or speaker verification
                    # label_mask: (batch_size, seq_len), LongTensor
                    label_mask = (features.sum(dim=-1) != 0).type(torch.LongTensor).to(device=self.device, dtype=torch.long)
                    valid_lengths = label_mask.sum(dim=1)

                    if self.model_type == 'linear':
                        # labels: (batch_size, seq_len)
                        loss, logits, correct, valid = self.classifier(representations, labels, label_mask)
                    elif self.model_type == 'rnn':
                        # labels: (batch_size, )
                        loss, logits, correct, valid = self.classifier(representations, labels, valid_lengths)
                    else:
                        raise NotImplementedError
                    
                    loss_sum += loss.detach().cpu().item()
                    all_logits.append(logits)
                    correct_count += correct.item()
                    valid_count += valid.item()

                except RuntimeError:
                    if oom_counter > 10: break
                    else: oom_counter += 1
                    print('CUDA out of memory during testing, aborting after ' + str(10 - oom_counter) + ' more tries...')
                    torch.cuda.empty_cache()

        average_loss = loss_sum / len(self.dataloader)
        test_acc = correct_count * 1.0 / valid_count
        self.verbose(f'Test result: loss {average_loss}, acc {test_acc}')

        timer.end()
        timer.report()
        
        return average_loss, test_acc, all_logits