Ejemplo n.º 1
0
def convert_to_noisy_variables(variables, activation=None):
    """ Converts a list of variables to noisy variables
        :param variables: A list of variables to make noisy
        :param activation: Optional. The activation function to use on the linear noisy transformation
        :return: Nothing, but modifies the graph in-place

        Reference: 1706.10295 - Noisy Networks for exploration
    """
    if tf.get_collection(tf.GraphKeys.TRAIN_OP):
        raise RuntimeError(
            'You must call convert_to_noisy_variables before applying an optimizer on the graph.'
        )

    graph = tf.get_default_graph()
    if not isinstance(variables, list):
        variables = list(variables)

    # Replacing each variable
    for variable in variables:
        variable_read_op = _get_variable_read_op(variable, graph)
        variable_outputs = _get_variable_outputs(variable_read_op, graph)
        variable_scope = variable.name.split(':')[0]
        variable_shape = variable.shape.as_list()
        fan_in = variable_shape[0]

        # Creating noisy variables
        with tf.variable_scope(variable_scope + '_noisy'):
            with tf.device(variable.device):
                s_init = tf.constant_initializer(0.5 / sqrt(fan_in))

                noisy_u = tf.identity(variable, name='mu')
                noisy_s = tf.get_variable(
                    name='sigma',
                    shape=variable.shape,
                    dtype=tf.float32,
                    initializer=s_init,
                    caching_device=variable._caching_device)  # pylint: disable=protected-access
                noise = tf.random.normal(shape=variable_shape)

                replaced_var = noisy_u + noisy_s * noise
                replaced_var = activation(
                    replaced_var) if activation else replaced_var

        # Replacing in-place
        inputs_index = [
            var_index for var_index, var_input in enumerate(
                graph_editor.sgv(*variable_outputs).inputs) if
            var_input.name.split(':')[0] == variable_read_op.name.split(':')[0]
        ]
        graph_editor.connect(
            graph_editor.sgv(replaced_var.op),
            graph_editor.sgv(*variable_outputs).remap_inputs(inputs_index),
            disconnect_first=True)
Ejemplo n.º 2
0
    def _encode_board(self, board_state, name, reuse=None):
        """ Encodes a board state or prev orders state
            :param board_state: The board state / prev orders state to encode - (batch, NB_NODES, initial_features)
            :param name: The name to use for the encoding
            :param reuse: Whether to reuse or not the weights from another encoding operation
            :return: The encoded board state / prev_orders state
        """
        from diplomacy_research.utils.tensorflow import tf
        from diplomacy_research.models.layers.graph_convolution import GraphConvolution, preprocess_adjacency

        # Quick function to retrieve hparams and placeholders and function shorthands
        hps = lambda hparam_name: self.hparams[hparam_name]
        relu = tf.nn.relu

        # Computing norm adjacency
        norm_adjacency = preprocess_adjacency(get_adjacency_matrix())
        norm_adjacency = tf.tile(tf.expand_dims(norm_adjacency, axis=0), [tf.shape(board_state)[0], 1, 1])

        # Building scope
        scope = tf.VariableScope(name='policy/%s' % name, reuse=reuse)
        with tf.variable_scope(scope):
            batch_size = tf.shape(board_state)[0]

            # Adding noise to break symmetry
            board_state = board_state + tf.random_normal(tf.shape(board_state), stddev=0.01)

            # Projecting (if needed) to 'gcn_size'
            if board_state.shape[-1].value == NB_FEATURES:
                with tf.variable_scope('proj', reuse=tf.AUTO_REUSE):
                    proj_w = tf.get_variable('W', shape=[1, NB_FEATURES, hps('gcn_size')], dtype=tf.float32)
                graph_conv = relu(tf.matmul(board_state, tf.tile(proj_w, [batch_size, 1, 1])))
            else:
                graph_conv = board_state

            # First and intermediate layers
            for _ in range(hps('nb_graph_conv') - 1):
                graph_conv = GraphConvolution(input_dim=hps('gcn_size'),                    # (b, NB_NODES, gcn_size)
                                              output_dim=hps('gcn_size'),
                                              norm_adjacency=norm_adjacency,
                                              activation_fn=relu,
                                              residual=True,
                                              bias=True)(graph_conv)

            # Last Layer
            graph_conv = GraphConvolution(input_dim=hps('gcn_size'),                        # (b, NB_NODES, final_size)
                                          output_dim=hps('attn_size') // 2,
                                          norm_adjacency=norm_adjacency,
                                          activation_fn=relu,
                                          residual=False,
                                          bias=True)(graph_conv)

        # Returning
        return graph_conv
Ejemplo n.º 3
0
def constant(name, shape, value, dtype=tf.float32, **kwargs):
    """ Creates a variable which is initiated to a constant value
        :param name: The name of the variable
        :param shape: The shape of the variable
        :param value: The constant value of the tensor
        :param dtype: The data type
        :return: A constant-initiated variable
    """
    initial = tf.constant_initializer(value=value, dtype=dtype)
    return tf.get_variable(name=name,
                           shape=shape,
                           initializer=initial,
                           **kwargs)
Ejemplo n.º 4
0
 def _create_version_step():
     """ Creates the version step tensor if it doesn't exist """
     from diplomacy_research.utils.tensorflow import tf
     if BaseAlgorithm._get_version_step() is not None:
         raise ValueError('"version_step" already exists.')
     with tf.get_default_graph().name_scope(None):
         return tf.get_variable(
             VERSION_STEP,
             shape=(),
             dtype=tf.int64,
             initializer=tf.zeros_initializer(),
             trainable=False,
             collections=[tf.GraphKeys.GLOBAL_VARIABLES, VERSION_STEP])
Ejemplo n.º 5
0
def uniform(name, shape, scale=0.05, **kwargs):
    """ Creates a variable which is randomly initiated between -scale and +scale
        :param name: The name of the variable
        :param shape: The shape of the variable to create
        :param scale: The minimum and maximum value of the random uniform distribution
        :return: A variable who is randomly uniformly distributed between -scale and +scale
    """
    initial = tf.random_uniform_initializer(minval=-scale,
                                            maxval=scale,
                                            dtype=tf.float32)
    return tf.get_variable(name=name,
                           shape=shape,
                           initializer=initial,
                           **kwargs)
Ejemplo n.º 6
0
def he(name, shape, **kwargs):
    """ Creates a variable which is initiated using the He normal method
        :param name: The name of the variable
        :param shape: The shape of the variable to create
        :return: A variable who is initiated using He normal
    """
    # pylint: disable=invalid-name
    fan_in = shape[-2] if len(shape) >= 2 else shape[-1]
    init_range = tf.sqrt(2. / fan_in)
    initial = tf.random_normal_initializer(mean=0.,
                                           stddev=init_range,
                                           dtype=tf.float32)
    return tf.get_variable(name=name,
                           shape=shape,
                           initializer=initial,
                           **kwargs)
Ejemplo n.º 7
0
def glorot(name, shape, **kwargs):
    """ Creates a variable which is initiated using the Glorot & Bengio uniform method
        :param name: The name of the variable
        :param shape: The shape of the variable to create
        :return: A variable who is initiated using Glorot uniform
    """
    if len(shape) >= 2:
        fan_in, fan_out = shape[-2], shape[-1]
    else:
        fan_in, fan_out = shape[-1], shape[-1]
    init_range = tf.sqrt(6. / (fan_in + fan_out))
    initial = tf.random_uniform_initializer(minval=-1. * init_range,
                                            maxval=init_range,
                                            dtype=tf.float32)
    return tf.get_variable(name=name,
                           shape=shape,
                           initializer=initial,
                           **kwargs)