Ejemplo n.º 1
0
 def track_absolute(self, value):
     """
     Returns: True if we enter the next period
     """
     U.assert_type(value, int)
     self.value = value
     return self._update_endpoint()
Ejemplo n.º 2
0
    def __init__(self, env,
                 agent_id,
                 session_config,
                 separate_plots=True):
        """
        Display "reward" and "step_per_s" curves on Tensorboard

        Args:
            env:
            agent_id: int.
            session_config: to construct AgentTensorplex
            - interval: log to Tensorplex every N episodes.
            - average_episodes: average rewards/speed over the last N episodes
            separate_plots: True to put reward plot in a separate section on
                Tensorboard, False to put all plots together
        """
        super().__init__(env)
        U.assert_type(agent_id, int)
        self.tensorplex = get_tensorplex_client(
            '{}/{}'.format('agent', agent_id),
            session_config
        )
        interval = session_config['tensorplex']['update_schedule']['training_env']
        self._periodic = PeriodicTracker(interval)
        self._avg = interval
        self._separate_plots = separate_plots
Ejemplo n.º 3
0
 def __init__(self, obs_spec, action_spec, gamma):
     U.assert_type(obs_spec, dict)
     U.assert_type(action_spec, dict)
     self.action_type = ActionType[action_spec['type']]
     self.action_spec = action_spec
     self.obs_spec = obs_spec
     self.gamma = gamma
Ejemplo n.º 4
0
 def track_increment(self, incr=1):
     """
     Returns: True if we enter the next period
     """
     U.assert_type(incr, int)
     self.value += incr
     return self._update_endpoint()
Ejemplo n.º 5
0
    def init_dueling(self, *, action_dim, prelinear_size, fc_hidden_sizes,
                     dueling):
        """
        Args:
            - prelinear_size: size of feature vector before the linear layers,
                like flattened conv or LSTM features
            - fc_hidden_sizes: list of fully connected layer sizes before `action_dim` softmax
        """
        self.dueling = dueling
        self.prelinear_size = prelinear_size
        U.assert_type(fc_hidden_sizes, list)
        hiddens = [prelinear_size] + fc_hidden_sizes
        self.fc_action_layers = nn.ModuleList()
        hidden_list = hiddens + [action_dim]
        for prev_h, next_h in zip(hidden_list[:-1], hidden_list[1:]):
            lin = nn.Linear(prev_h, next_h)
            U.conv_fc_init(lin)
            self.fc_action_layers.append(lin)

        if dueling:
            self.fc_state_layers = nn.ModuleList()
            # output a single state value
            hidden_list = hiddens + [1]
            for prev_h, next_h in zip(hidden_list[:-1], hidden_list[1:]):
                lin = nn.Linear(prev_h, next_h)
                U.conv_fc_init(lin)
                self.fc_state_layers.append(lin)
Ejemplo n.º 6
0
 def __init__(self, tensorplex, min_update_interval):
     U.assert_type(tensorplex, TensorplexClient)
     self.tensorplex = tensorplex
     self.min_update_interval = min_update_interval
     self.history = U.AverageDictionary()
     self.lock = Lock()
     self.tracker = U.TimedTracker(self.min_update_interval)
     self.init_time = time.time()
Ejemplo n.º 7
0
 def __init__(self, period, init_value=0, init_endpoint=0):
     """
     first: if True, triggers at the first time
     """
     U.assert_type(period, int)
     assert period > 0
     U.assert_type(init_value, int)
     self.period = period
     self.value = init_value
     self._endpoint = init_endpoint
Ejemplo n.º 8
0
 def __init__(self, *,
              host,
              port,
              flush_iteration):
     """
     Args:
         flush_iteration: how many send() calls before we flush the buffer
     """
     U.assert_type(flush_iteration, int)
     self._client = ZmqSender(host=host,
                              port=port)
     self._exp_buffer = ExpBuffer()
     self._flush_tracker = PeriodicTracker(flush_iteration)
Ejemplo n.º 9
0
 def add(self, hash_dict, nonhash_dict):
     """
     Args:
         hash_dict: {obs_hash: [ .. can be nested .. ]}
         nonhash_dict: {reward: -1.2, done: True, ...}
     """
     U.assert_type(hash_dict, dict)
     U.assert_type(nonhash_dict, dict)
     exp = {}
     for key, values in hash_dict.items():
         assert not key.endswith('_hash'), 'do not manually append `_hash`'
         exp[key + '_hash'] = self._hash_nested(values)
     exp.update(nonhash_dict)
     self.exp_list.append(exp)
Ejemplo n.º 10
0
 def _sample_request_handler(self, req):
     """
     Handle requests to the learner
     https://stackoverflow.com/questions/29082268/python-time-sleep-vs-event-wait
     Since we don't have external notify, we'd better just use sleep
     """
     batch_size = U.deserialize(req)
     U.assert_type(batch_size, int)
     while not self.start_sample_condition():
         time.sleep(0.01)
     self.cumulative_sampled_count += batch_size
     self.cumulative_request_count += 1
     with self.sample_time.time():
         sample = self.sample(batch_size)
     with self.serialize_time.time():
         return U.serialize(sample)
Ejemplo n.º 11
0
def extend_config(config, default_config):
    """
    default_config must specify all the expected keys. Use the following special
    values for required placeholders:

    * _req_: require a single value (not a list or dict)
    * _req_DICT_: require a dict
    * _req_LIST_: require a list

    Returns:
        AttributeDict
        `config` filled by default values if certain keys are unspecified

    Raises:
        ConfigError if required placeholders are not satisfied
    """
    U.assert_type(config, dict)
    U.assert_type(default_config, dict)
    return Config(_fill_default_config(config, default_config, []))
Ejemplo n.º 12
0
 def get_exploration_schedule(self):
     C = self.learner_config.algo.exploration
     if C.schedule.lower() == 'linear':
         return U.LinearSchedule(
             initial_p=1.0,
             final_p=C.final_eps,
             schedule_timesteps=int(C.steps),
         )
     else:
         steps = C.steps
         final_epses = C.final_eps
         U.assert_type(steps, list)
         U.assert_type(final_epses, list)
         assert len(steps) == len(final_epses)
         endpoints = [(0, 1.0)]
         for step, eps in zip(steps, final_epses):
             endpoints.append((step, eps))
         return U.PiecewiseSchedule(
             endpoints=endpoints,
             outside_value=final_epses[-1]
         )
Ejemplo n.º 13
0
 def __init__(self, module_dict):
     U.assert_type(module_dict, dict)
     for k, m in module_dict.items():
         U.assert_type(k, str, 'Key "{}" must be string.'.format(k))
         U.assert_type(m, nnx.Module,
                       '"{}" must be torchx.nn.Module.'.format(m))
     self._module_dict = module_dict
Ejemplo n.º 14
0
 def __init__(self,
              tensorplex,
              period,
              is_average=True,
              keep_full_history=False):
     """
     Args:
         tensorplex: TensorplexClient object
         period: when you call `update()`, it will only send to Tensorplex
             at the specified period.
         is_average: if True, send the averaged value over the last `period`.
         keep_full_history: if False, only keep the last `period` of history.
     """
     if tensorplex is not None:  # None to turn off tensorplex
         U.assert_type(tensorplex, TensorplexClient)
     U.assert_type(period, int)
     assert period > 0
     self._tplex = tensorplex
     self._period = period
     self._is_average = is_average
     self._keep_full_history = keep_full_history
     self._tracker = PeriodicTracker(period)
     self._history = {}
     self._max_deque_size = None if keep_full_history else period
Ejemplo n.º 15
0
 def extend(self, default_config):
     U.assert_type(default_config, dict)
     return _fill_default_config(self, default_config, [])