Python StepOutput Beispiele, liaison.agents.StepOutput Python Beispiele

Beispiel #1

0

Datei anzeigen

  def step(self, step_type, reward, obs, prev_state):
    """Step through and return an action.
    This function will only be called once for graph creation and
    the resulting graph will be run repeatedly for agent evaluation.

    All the below fields are expected to be batched in the first
    dimension. (No time dimension)

    Args:
      step_type: [B,] Current steptype
      reward: [B,] Previous step reward.
      obs: Current Observations.
      prev_state: Prev agent state.

    Returns:
      StepOutput
    """

    self._validate_observations(obs)
    with tf.variable_scope(self._name):
      # flatten graph features for the policy network
      # convert dict to graphstuple
      obs['graph_features'] = self._process_graph_features(obs['graph_features'])

      logits, _ = self._model.get_logits(self._model.compute_graph_embeddings(obs),
                                         obs['node_mask'])

      action = sample_from_logits(logits, self.seed)
      return StepOutput(
          action,
          logits,
          self._model.dummy_state(infer_shape(step_type)[0]),
          self._model.dummy_state(infer_shape(step_type)[0]),
      )

Beispiel #2

0

Datei anzeigen

  def step(self, step_type, reward, obs, prev_state):
    """Step through and return an action.
    This function will only be called once for graph creation and
    the resulting graph will be run repeatedly for agent evaluation.

    All the below fields are expected to be batched in the first
    dimension. (No time dimension)

    Args:
      step_type: [B,] Current steptype
      reward: [B,] Previous step reward.
      obs: Current Observations.
      prev_state: Prev agent state.

    Returns:
      StepOutput
    """

    self._validate_observations(obs)
    with tf.variable_scope(self._name):
      # flatten graph features for the policy network
      # convert dict to graphstuple
      pack_as_structure = dict(**obs['graph_features'])
      obs['graph_features'] = self._process_graph_features(obs['graph_features'])
      ge = self._model.compute_graph_embeddings(obs)
      logitss, actions = self._model.get_actions(ge, obs)
      # pack by padding to the max nodes.
      packed_ge = self._model.pack_graph_embeddings(pack_as_structure, ge)
      return StepOutput(actions, logitss, self._model.dummy_state(infer_shape(step_type)[0]),
                        dict(**packed_ge._asdict()))

Beispiel #3

0

Datei anzeigen

 def step(self, step_type, reward, obs, prev_state):
     """Pick a random discrete action from action_spec."""
     with tf.variable_scope(self._name):
         with tf.name_scope('ur_step'):
             batch_size = tf.shape(step_type)[0]
             action = tf.fill((batch_size, ), 0)
             logits = tf.fill(tf.expand_dims(batch_size, 0), 0)
             return StepOutput(action, logits,
                               self._dummy_state(batch_size))

Beispiel #4

0

Datei anzeigen

Datei: gcn_attn_rins_test.py Projekt: aravic/liaison

    def testUpdate(self):
        agent = self._get_agent_instance()
        bs_ph = tf.placeholder_with_default(B, ())
        sess = self.session()

        init_state = agent.initial_state(bs=bs_ph)
        init_state_val = sess.run(init_state)

        step_type = np.zeros((T + 1, B), dtype=np.int32)
        reward = np.zeros((T + 1, B), dtype=np.float32)
        discount = np.zeros((T + 1, B), dtype=np.float32)

        var_type_mask = np.zeros((T + 1, B, N_NODES), dtype=np.int32)
        constraint_type_mask = np.zeros((T + 1, B, N_NODES), dtype=np.int32)
        obj_type_mask = np.zeros((T + 1, B, N_NODES), dtype=np.int32)
        var_type_mask[:, :, 0] = 1
        constraint_type_mask[:, :, 1] = 1
        obj_type_mask[:, :, 2] = 1

        obs = dict(features=np.zeros((T + 1, B, N_NODES), dtype=np.float32),
                   graph_features=self._get_graph_features_update(),
                   node_mask=np.ones(((T + 1), B, N_NODES), dtype=np.int32),
                   var_type_mask=var_type_mask,
                   constraint_type_mask=constraint_type_mask,
                   obj_type_mask=obj_type_mask)

        step_output = StepOutput(action=np.zeros((T, B), dtype=np.int32),
                                 logits=np.zeros((T, B, N_NODES),
                                                 dtype=np.float32),
                                 next_state=np.zeros_like(
                                     np.vstack([init_state_val] * T)))

        step_output, _, step_type, reward, obs, discount = agent.update_preprocess(
            step_output, None, step_type, reward, obs, discount)

        def f(np_arr):
            return tf.constant(np_arr)

        with tf.variable_scope('update', reuse=tf.AUTO_REUSE):
            agent.build_update_ops(
                nest.map_structure(f, step_output),
                tf.zeros_like(np.vstack([init_state_val] * (T + 1))),
                nest.map_structure(f, step_type),
                nest.map_structure(f, reward), nest.map_structure(f, obs),
                nest.map_structure(f, discount))

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        for _ in range(3):
            agent.update(sess, {}, {})
            print('.', end='')
        print('')
        print('Done!')

Beispiel #5

0

Datei anzeigen

  def step(self, step_type, reward, obs, prev_state):
    """Step through and return an action.
    This function will only be called once for graph creation and
    the resulting graph will be run repeatedly for agent evaluation.

    All the below fields are expected to be batched in the first
    dimension. (No time dimension)

    Args:
      step_type: [B,] Current steptype
      reward: [B,] Previous step reward.
      obs: Current Observations.
      prev_state: Prev agent state.

    Returns:
      StepOutput
    """
    with tf.variable_scope(self._name):
      if self.config.evaluation_mode:
        # evaluate gcn agent at the evaluators.
        # flatten graph features for the policy network
        # convert dict to graphstuple
        graph_features = gn.graphs.GraphsTuple(**obs['graph_features'])
        obs['graph_features'] = flatten_graphs(graph_features)

        logits, _ = self._model.get_logits(
            self._model.compute_graph_embeddings(obs), obs['node_mask'])

        action = sample_from_logits(logits, self.seed)
        return StepOutput(action, logits,
                          self._model.dummy_state(infer_shape(step_type)[0]))
      else:
        # use mlp to imitate during training mode in actors.
        logits, next_state, _ = self._mlp_model.get_logits_and_next_state(
            step_type, reward, obs, prev_state)
        action = sample_from_logits(logits, self.seed)
      return StepOutput(action, logits, next_state)

Beispiel #6

0

Datei anzeigen

    def step(self, step_type, reward, obs, prev_state):
        """Step through and return an action.
    This function will only be called once for graph creation and
    the resulting graph will be run repeatedly for agent evaluation.

    All the below fields are expected to be batched in the first
    dimension. (No time dimension)

    Args:
      step_type: [B,] Current steptype
      reward: [B,] Previous step reward.
      obs: Current Observations.
      prev_state: Prev agent state.

    Returns:
      StepOutput
    """
        with tf.variable_scope(self._name):
            logits, next_state, _ = self._model.get_logits_and_next_state(
                step_type, reward, obs, prev_state)
            action = sample_from_logits(logits, self.seed)
            return StepOutput(action, logits, next_state)

Beispiel #7

0

Datei anzeigen

    def step(self, step_type, reward, obs, prev_state):
        """Pick a random discrete action from action_spec."""
        with tf.variable_scope(self._name):
            with tf.name_scope('ur_step'):
                batch_size = tf.shape(step_type)[0]
                if 'mask' in obs:
                    logits = tf.cast(tf.identity(obs['mask']), tf.float32)
                    logits *= 1e9  # multiply by infinity
                    action = sample_from_logits(logits, self.seed)
                else:
                    base = tf.random.uniform(self._action_spec.shape,
                                             dtype=tf.float32,
                                             minval=0,
                                             maxval=1)

                    L = self._action_spec.minimum
                    R = self._action_spec.maximum

                    action = tf.cast(L + (base * (R - L)),
                                     self._action_spec.dtype)
                    logits = tf.fill(tf.expand_dims(batch_size, 0), 0)
                return StepOutput(action, logits,
                                  self._dummy_state(batch_size))

Beispiel #8

0

Datei anzeigen

 def start(self, step_type, reward, discount, observation, next_state):
     self.add(step_type, reward, discount, observation,
              StepOutput(next_state=next_state, action=None, logits=None))

Beispiel #9

0

Datei anzeigen

    def testUpdate(self):
        self._setup()
        agent = self._get_agent_instance()
        bs_ph = tf.placeholder_with_default(B, ())
        sess = self.session()

        init_state = agent.initial_state(bs=bs_ph)
        init_state_val = sess.run(init_state)

        step_type = np.zeros((T + 1, B), dtype=np.int32)
        reward = np.zeros((T + 1, B), dtype=np.float32)
        discount = np.zeros((T + 1, B), dtype=np.float32)
        obs = dict(features=np.zeros((T + 1, B, N_NODES), dtype=np.float32),
                   graph_features=self._get_graph_features_update(),
                   node_mask=np.ones(((T + 1), B, N_NODES), dtype=np.int32))

        step_output = StepOutput(action=np.zeros((T, B), dtype=np.int32),
                                 logits=np.zeros((T, B, N_NODES),
                                                 dtype=np.float32),
                                 next_state=np.zeros_like(
                                     np.vstack([init_state_val] * T)))

        step_output, _, step_type, reward, obs, discount = agent.update_preprocess(
            step_output, None, step_type, reward, obs, discount)

        feed_dict = {}

        def f(np_arr):
            ph = tf.placeholder(shape=np_arr.shape, dtype=np_arr.dtype)
            feed_dict[ph] = np_arr
            return ph

        with tf.variable_scope('update', reuse=tf.AUTO_REUSE):
            agent.build_update_ops(
                nest.map_structure(f, step_output),
                tf.zeros_like(np.vstack([init_state_val] * (T + 1))),
                nest.map_structure(f, step_type),
                nest.map_structure(f, reward), nest.map_structure(f, obs),
                nest.map_structure(f, discount))

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        N_ITERS = 50
        for i in range(N_ITERS):
            profile_kwargs = {}
            if i == N_ITERS - 1:
                run_metadata = tf.RunMetadata()
                profile_kwargs = dict(options=tf.RunOptions(
                    trace_level=tf.RunOptions.FULL_TRACE),
                                      run_metadata=run_metadata)

            agent.update(sess, feed_dict, profile_kwargs)
            print('.', end='')

        print('')

        # save the final timeline
        tl = timeline.Timeline(run_metadata.step_stats)
        ctf = tl.generate_chrome_trace_format()
        export_path = '/tmp/'
        with open(os.path.join(export_path, 'timeline.json'), 'w') as f:
            f.write(ctf)
        print('Done!')