Python device Examples

Programming Language: Python

Namespace/Package Name: gin.tf

Method/Function: device

Examples at hotexamples.com: 3

Python device - 3 examples found. These are the top rated real world Python examples of gin.tf.device extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: circular_replay_buffer.py Project: arthurarg/dopamine

  def create_sampling_ops(self, use_staging):
    """Creates the ops necessary to sample from the replay buffer.

    Creates the transition dictionary containing the sampling tensors.

    Args:
      use_staging: bool, when True it would use a staging area to prefetch
        the next sampling batch.
    """
    with tf.name_scope('sample_replay'):
      with tf.device('/cpu:*'):
        transition_type = self.memory.get_transition_elements()
        transition_tensors = tf.py_func(
            self.memory.sample_transition_batch, [],
            [return_entry.type for return_entry in transition_type],
            name='replay_sample_py_func')
        self._set_transition_shape(transition_tensors, transition_type)
        if use_staging:
          transition_tensors = self._set_up_staging(transition_tensors)
          self._set_transition_shape(transition_tensors, transition_type)

        # Unpack sample transition into member variables.
        self.unpack_transition(transition_tensors, transition_type)

Example #2

Show file

File: replay_memory.py Project: vsois/hanabi-agents

    def __init__(self,
                 num_actions,
                 observation_size,
                 stack_size,
                 use_staging=True,
                 replay_capacity=1000000,
                 batch_size=32,
                 update_horizon=1,
                 gamma=1.0,
                 wrapped_memory=None):
        """Initializes a graph wrapper for the python replay memory.

    Args:
      num_actions: int, number of possible actions.
      observation_size: int, size of an input frame.
      stack_size: int, number of frames to use in state stack.
      use_staging: bool, when True it would use a staging area to prefetch the
        next sampling batch.
      replay_capacity: int, number of transitions to keep in memory.
      batch_size: int.
      update_horizon: int, length of update ('n' in n-step update).
      gamma: int, the discount factor.
      wrapped_memory: The 'inner' memory data structure. Defaults to None, which
        creates the standard DQN replay memory.

    Raises:
      ValueError: If update_horizon is not positive.
      ValueError: If discount factor is not in [0, 1].
    """
        if replay_capacity < update_horizon + 1:
            raise ValueError(
                'Update horizon (%i) should be significantly smaller '
                'than replay capacity (%i).' %
                (update_horizon, replay_capacity))
        if not update_horizon >= 1:
            raise ValueError('Update horizon must be positive.')
        if not 0.0 <= gamma <= 1.0:
            raise ValueError('Discount factor (gamma) must be in [0, 1].')

        # Allow subclasses to create self.memory.
        if wrapped_memory is not None:
            self.memory = wrapped_memory
        else:
            self.memory = OutOfGraphReplayMemory(num_actions, observation_size,
                                                 stack_size, replay_capacity,
                                                 batch_size, update_horizon,
                                                 gamma)

        with tf.name_scope('replay'):
            with tf.name_scope('add_placeholders'):
                self.add_obs_ph = tf.placeholder(tf.uint8, [observation_size],
                                                 name='add_obs_ph')
                self.add_action_ph = tf.placeholder(tf.int32, [],
                                                    name='add_action_ph')
                self.add_reward_ph = tf.placeholder(tf.float32, [],
                                                    name='add_reward_ph')
                self.add_terminal_ph = tf.placeholder(tf.uint8, [],
                                                      name='add_terminal_ph')
                self.add_legal_actions_ph = tf.placeholder(
                    tf.float32, [num_actions], name='add_legal_actions_ph')

            add_transition_ph = [
                self.add_obs_ph, self.add_action_ph, self.add_reward_ph,
                self.add_terminal_ph, self.add_legal_actions_ph
            ]

            with tf.device('/cpu:*'):
                self.add_transition_op = tf.py_func(self.memory.add,
                                                    add_transition_ph, [],
                                                    name='replay_add_py_func')

                self.transition = tf.py_func(
                    self.memory.sample_transition_batch, [], [
                        tf.uint8, tf.int32, tf.float32, tf.uint8, tf.uint8,
                        tf.int32, tf.float32
                    ],
                    name='replay_sample_py_func')

                if use_staging:
                    # To hide the py_func latency use a staging area to pre-fetch the next
                    # batch of transitions.
                    (states, actions, rewards, next_states, terminals, indices,
                     next_legal_actions) = self.transition
                    # StagingArea requires all the shapes to be defined.
                    states.set_shape(
                        [batch_size, observation_size, stack_size])
                    actions.set_shape([batch_size])
                    rewards.set_shape([batch_size])
                    next_states.set_shape(
                        [batch_size, observation_size, stack_size])
                    terminals.set_shape([batch_size])
                    indices.set_shape([batch_size])
                    next_legal_actions.set_shape([batch_size, num_actions])

                    # Create the staging area in CPU.
                    prefetch_area = tf.contrib.staging.StagingArea([
                        tf.uint8, tf.int32, tf.float32, tf.uint8, tf.uint8,
                        tf.int32, tf.float32
                    ])

                    self.prefetch_batch = prefetch_area.put(
                        (states, actions, rewards, next_states, terminals,
                         indices, next_legal_actions))
                else:
                    self.prefetch_batch = tf.no_op()

            if use_staging:
                # Get the sample_transition_batch in GPU. This would do the copy from
                # CPU to GPU.
                self.transition = prefetch_area.get()

            (self.states, self.actions, self.rewards, self.next_states,
             self.terminals, self.indices,
             self.next_legal_actions) = self.transition

            # Since these are py_func tensors, no information about their shape is
            # present. Setting the shape only for the necessary tensors
            self.states.set_shape([None, observation_size, stack_size])
            self.next_states.set_shape([None, observation_size, stack_size])

Example #3

Show file

File: dqn_agent.py Project: dgrinwald93/Hanabi_Reinforcement_Learning

    def __init__(self,
                 num_actions=None,
                 observation_size=None,
                 num_players=None,
                 gamma=0.99,
                 update_horizon=1,
                 min_replay_history=500,
                 update_period=4,
                 stack_size=1,
                 target_update_period=500,
                 epsilon_fn=linearly_decaying_epsilon,
                 epsilon_train=0.02,
                 epsilon_eval=0.001,
                 epsilon_decay_period=1000,
                 graph_template=dqn_template,
                 tf_device='/cpu:*',
                 use_staging=True,
                 optimizer=tf.train.RMSPropOptimizer(learning_rate=.0025,
                                                     decay=0.95,
                                                     momentum=0.0,
                                                     epsilon=1e-6,
                                                     centered=True)):
        """Initializes the agent and constructs its graph.

    Args:
      num_actions: int, number of actions the agent can take at any state.
      observation_size: int, size of observation vector.
      num_players: int, number of players playing this game.
      gamma: float, discount factor as commonly used in the RL literature.
      update_horizon: int, horizon at which updates are performed, the 'n' in
        n-step update.
      min_replay_history: int, number of stored transitions before training.
      update_period: int, period between DQN updates.
      stack_size: int, number of observations to use as state.
      target_update_period: Update period for the target network.
      epsilon_fn: Function expecting 4 parameters: (decay_period, step,
        warmup_steps, epsilon), and which returns the epsilon value used for
        exploration during training.
      epsilon_train: float, final epsilon for training.
      epsilon_eval: float, epsilon during evaluation.
      epsilon_decay_period: int, number of steps for epsilon to decay.
      graph_template: function for building the neural network graph.
      tf_device: str, Tensorflow device on which to run computations.
      use_staging: bool, when True use a staging area to prefetch the next
        sampling batch.
      optimizer: Optimizer instance used for learning.
    """

        self.partial_reload = False

        tf.logging.info('Creating %s agent with the following parameters:',
                        self.__class__.__name__)
        tf.logging.info('\t gamma: %f', gamma)
        tf.logging.info('\t update_horizon: %f', update_horizon)
        tf.logging.info('\t min_replay_history: %d', min_replay_history)
        tf.logging.info('\t update_period: %d', update_period)
        tf.logging.info('\t target_update_period: %d', target_update_period)
        tf.logging.info('\t epsilon_train: %f', epsilon_train)
        tf.logging.info('\t epsilon_eval: %f', epsilon_eval)
        tf.logging.info('\t epsilon_decay_period: %d', epsilon_decay_period)
        tf.logging.info('\t tf_device: %s', tf_device)
        tf.logging.info('\t use_staging: %s', use_staging)
        tf.logging.info('\t optimizer: %s', optimizer)

        # Global variables.
        self.num_actions = num_actions
        self.observation_size = observation_size
        self.num_players = num_players
        self.gamma = gamma
        self.update_horizon = update_horizon
        self.cumulative_gamma = math.pow(gamma, update_horizon)
        self.min_replay_history = min_replay_history
        self.target_update_period = target_update_period
        self.epsilon_fn = epsilon_fn
        self.epsilon_train = epsilon_train
        self.epsilon_eval = epsilon_eval
        self.epsilon_decay_period = epsilon_decay_period
        self.update_period = update_period
        self.eval_mode = False
        self.training_steps = 0
        self.batch_staged = False
        self.optimizer = optimizer

        with tf.device(tf_device):
            # Calling online_convnet will generate a new graph as defined in
            # graph_template using whatever input is passed, but will always share
            # the same weights.
            online_convnet = tf.make_template('Online', graph_template)
            target_convnet = tf.make_template('Target', graph_template)
            # The state of the agent. The last axis is the number of past observations
            # that make up the state.
            states_shape = (1, observation_size, stack_size)
            self.state = np.zeros(states_shape)
            self.state_ph = tf.placeholder(tf.uint8,
                                           states_shape,
                                           name='state_ph')
            self.legal_actions_ph = tf.placeholder(tf.float32,
                                                   [self.num_actions],
                                                   name='legal_actions_ph')
            self._q = online_convnet(state=self.state_ph,
                                     num_actions=self.num_actions)
            self._replay = self._build_replay_memory(use_staging)
            self._replay_qs = online_convnet(self._replay.states,
                                             self.num_actions)
            self._replay_next_qt = target_convnet(self._replay.next_states,
                                                  self.num_actions)
            self._train_op = self._build_train_op()
            self._sync_qt_ops = self._build_sync_op()

            self._q_argmax = tf.argmax(self._q + self.legal_actions_ph,
                                       axis=1)[0]

        # Set up a session and initialize variables.
        self._sess = tf.Session(
            '', config=tf.ConfigProto(allow_soft_placement=True))
        self._init_op = tf.global_variables_initializer()
        self._sess.run(self._init_op)

        self._saver = tf.train.Saver(max_to_keep=3)

        # This keeps tracks of the observed transitions during play, for each
        # player.
        self.transitions = [[] for _ in range(num_players)]