Exemple #1
0
    def __init__(self, component):
        """Initializes layers.

    Args:
      component: Parent ComponentBuilderBase object.
    """
        layers = [
            network_units.Layer(self, 'lengths', -1),
            network_units.Layer(self, 'scores', -1),
            network_units.Layer(self, 'logits', -1),
            network_units.Layer(self, 'arcs', -1),
        ]
        super(MstSolverNetwork, self).__init__(component, init_layers=layers)

        self._attrs = network_units.get_attrs_with_defaults(
            component.spec.network_unit.parameters,
            defaults={
                'forest': False,
                'loss': 'softmax',
                'crf_max_dynamic_range': 20,
            })

        check.Eq(len(self._fixed_feature_dims.items()), 0,
                 'Expected no fixed features')
        check.Eq(len(self._linked_feature_dims.items()), 2,
                 'Expected two linked features')

        check.In('lengths', self._linked_feature_dims,
                 'Missing required linked feature')
        check.In('scores', self._linked_feature_dims,
                 'Missing required linked feature')
Exemple #2
0
 def create_hidden_layers(self, component, hidden_layer_sizes):
   """See base class."""
   # Construct the layer meta info for the DRAGNN builder. Note that the order
   # of h and c are reversed compared to the vanilla DRAGNN LSTM cell, as
   # this is the standard in tf.contrib.rnn.
   #
   # NB: The h activations of the last LSTM must be the last layer, in order
   # for _append_base_layers() to work.
   layers = []
   for index, num_units in enumerate(hidden_layer_sizes):
     layers.append(
         dragnn.Layer(component, name='state_c_%d' % index, dim=num_units))
     layers.append(
         dragnn.Layer(component, name='state_h_%d' % index, dim=num_units))
   context_layers = list(layers)  # copy |layers|, don't alias it
   return layers, context_layers
Exemple #3
0
    def __init__(self, component):
        """Initializes weights and layers.

    Args:
      component: Parent ComponentBuilderBase object.
    """
        super(BiaffineLabelNetwork, self).__init__(component)

        parameters = component.spec.network_unit.parameters
        self._num_labels = int(parameters['num_labels'])

        check.Gt(self._num_labels, 0, 'Expected some labels')
        check.Eq(len(self._fixed_feature_dims.items()), 0,
                 'Expected no fixed features')
        check.Eq(len(self._linked_feature_dims.items()), 2,
                 'Expected two linked features')

        check.In('sources', self._linked_feature_dims,
                 'Missing required linked feature')
        check.In('targets', self._linked_feature_dims,
                 'Missing required linked feature')

        self._source_dim = self._linked_feature_dims['sources']
        self._target_dim = self._linked_feature_dims['targets']

        # TODO(googleuser): Make parameter initialization configurable.
        self._weights = []
        self._weights.append(
            tf.get_variable(
                'weights_pair',
                [self._num_labels, self._source_dim, self._target_dim],
                tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))
        self._weights.append(
            tf.get_variable(
                'weights_source', [self._num_labels, self._source_dim],
                tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))
        self._weights.append(
            tf.get_variable(
                'weights_target', [self._num_labels, self._target_dim],
                tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))

        self._biases = []
        self._biases.append(
            tf.get_variable(
                'biases', [self._num_labels], tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))

        self._params.extend(self._weights + self._biases)
        self._regularized_weights.extend(self._weights)

        self._layers.append(
            network_units.Layer(self, 'labels', self._num_labels))
Exemple #4
0
    def __init__(self, component):
        """Initializes weights and layers.

    Args:
      component: Parent ComponentBuilderBase object.
    """
        super(BiaffineDigraphNetwork, self).__init__(component)

        check.Eq(len(self._fixed_feature_dims.items()), 0,
                 'Expected no fixed features')
        check.Eq(len(self._linked_feature_dims.items()), 2,
                 'Expected two linked features')

        check.In('sources', self._linked_feature_dims,
                 'Missing required linked feature')
        check.In('targets', self._linked_feature_dims,
                 'Missing required linked feature')
        self._source_dim = self._linked_feature_dims['sources']
        self._target_dim = self._linked_feature_dims['targets']

        # TODO(googleuser): Make parameter initialization configurable.
        self._weights = []
        self._weights.append(
            tf.get_variable(
                'weights_arc', [self._source_dim, self._target_dim],
                tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))
        self._weights.append(
            tf.get_variable(
                'weights_source', [self._source_dim], tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))
        self._weights.append(
            tf.get_variable(
                'root', [self._source_dim], tf.float32,
                tf.random_normal_initializer(stddev=1e-4, seed=self._seed)))

        self._params.extend(self._weights)
        self._regularized_weights.extend(self._weights)

        # Negative Layer.dim indicates that the dimension is dynamic.
        self._layers.append(network_units.Layer(self, 'adjacency', -1))
    def __init__(self, component):
        """Initializes weights and layers.

    Args:
      component: Parent ComponentBuilderBase object.
    """
        super(BiaffineDigraphNetwork, self).__init__(component)

        check.Eq(len(self._fixed_feature_dims.items()), 0,
                 'Expected no fixed features')
        check.Eq(len(self._linked_feature_dims.items()), 2,
                 'Expected two linked features')

        check.In('sources', self._linked_feature_dims,
                 'Missing required linked feature')
        check.In('targets', self._linked_feature_dims,
                 'Missing required linked feature')
        self._source_dim = self._linked_feature_dims['sources']
        self._target_dim = self._linked_feature_dims['targets']

        self._weights = []
        self._weights.append(
            tf.get_variable('weights_arc',
                            [self._source_dim, self._target_dim], tf.float32,
                            tf.orthogonal_initializer()))
        self._weights.append(
            tf.get_variable('weights_source', [self._source_dim], tf.float32,
                            tf.zeros_initializer()))
        self._weights.append(
            tf.get_variable('root', [self._source_dim], tf.float32,
                            tf.zeros_initializer()))

        self._params.extend(self._weights)
        self._regularized_weights.extend(self._weights)

        # Add runtime hooks for pre-computed weights.
        self._derived_params.append(self._get_root_weights)
        self._derived_params.append(self._get_root_bias)

        # Negative Layer.dim indicates that the dimension is dynamic.
        self._layers.append(network_units.Layer(component, 'adjacency', -1))
  def __init__(self, component):
    super(PairwiseBilinearLabelNetwork, self).__init__(component)
    parameters = component.spec.network_unit.parameters

    self._num_labels = int(parameters['num_labels'])

    self._source_dim = self._linked_feature_dims['sources']
    self._target_dim = self._linked_feature_dims['targets']

    self._weights = []
    self._weights.append(
        network_units.add_var_initialized('bilinear',
                                          [self._source_dim,
                                           self._num_labels,
                                           self._target_dim],
                                          'xavier'))

    self._params.extend(self._weights)
    self._regularized_weights.extend(self._weights)
    self._layers.append(network_units.Layer(component,
                                            name='bilinear_scores',
                                            dim=self._num_labels))
Exemple #7
0
    def __init__(self, component, additional_attr_defaults=None):
        """Initializes the LSTM base class.

    Parameters used:
      hidden_layer_sizes: Comma-delimited number of hidden units for each layer.
      input_dropout_rate (-1.0): Input dropout rate for each layer.  If < 0.0,
          use the global |dropout_rate| hyperparameter.
      recurrent_dropout_rate (0.8): Recurrent dropout rate.  If < 0.0, use the
          global |recurrent_dropout_rate| hyperparameter.
      layer_norm (True): Whether or not to use layer norm.

    Hyperparameters used:
      dropout_rate: Input dropout rate.
      recurrent_dropout_rate: Recurrent dropout rate.

    Args:
      component: parent ComponentBuilderBase object.
      additional_attr_defaults: Additional attributes for use by derived class.
    """
        attr_defaults = additional_attr_defaults or {}
        attr_defaults.update({
            'layer_norm': True,
            'input_dropout_rate': -1.0,
            'recurrent_dropout_rate': 0.8,
            'hidden_layer_sizes': '256',
        })
        self._attrs = dragnn.get_attrs_with_defaults(
            component.spec.network_unit.parameters, defaults=attr_defaults)

        self._hidden_layer_sizes = map(
            int, self._attrs['hidden_layer_sizes'].split(','))

        self._input_dropout_rate = self._attrs['input_dropout_rate']
        if self._input_dropout_rate < 0.0:
            self._input_dropout_rate = component.master.hyperparams.dropout_rate

        self._recurrent_dropout_rate = self._attrs['recurrent_dropout_rate']
        if self._recurrent_dropout_rate < 0.0:
            self._recurrent_dropout_rate = (
                component.master.hyperparams.recurrent_dropout_rate)
        if self._recurrent_dropout_rate < 0.0:
            self._recurrent_dropout_rate = component.master.hyperparams.dropout_rate

        tf.logging.info('[%s] input_dropout_rate=%s recurrent_dropout_rate=%s',
                        component.name, self._input_dropout_rate,
                        self._recurrent_dropout_rate)

        layers, context_layers = self.create_hidden_layers(
            component, self._hidden_layer_sizes)
        last_layer_dim = layers[-1].dim
        layers.append(
            dragnn.Layer(component, name='last_layer', dim=last_layer_dim))
        layers.append(
            dragnn.Layer(component, name='logits', dim=component.num_actions))

        # Provide initial layers and context layers, so the base class constructor
        # can safely use accessors like get_layer_size().
        super(BaseLSTMNetwork,
              self).__init__(component,
                             init_layers=layers,
                             init_context_layers=context_layers)

        # Allocate parameters for the softmax.
        self._params.append(
            tf.get_variable(
                'weights_softmax', [last_layer_dim, component.num_actions],
                initializer=tf.random_normal_initializer(stddev=1e-4)))
        self._params.append(
            tf.get_variable('bias_softmax', [component.num_actions],
                            initializer=tf.zeros_initializer()))
Exemple #8
0
 def create_hidden_layers(self, component, hidden_layer_sizes):
     """See base class."""
     dim = 2 * hidden_layer_sizes[-1]
     return [dragnn.Layer(component, name='outputs', dim=dim)], []
  def __init__(self, component):
    """Initializes parameters for this Transformer unit.

    Args:
      component: parent ComponentBuilderBase object.

    Parameters used to construct the network:
      num_layers: number of transformer layers (attention + MLP)
      hidden_size: size of hidden layers in MLPs
      filter_size: filter width for each attention head
      num_heads: number of attention heads
      residual_dropout: dropout keep rate for residual layers
      attention_dropout: dropout keep rate for attention weights
      mlp_dropout: dropout keep rate for mlp layers
      initialization: initialization scheme to use for model parameters
      bias_init: initial value for bias parameters
      scale_attention: whether to scale attention parameters by filter_size^-0.5
      layer_norm_residuals: whether to perform layer normalization on residual
        layers
      timing_signal: whether to add a position-wise timing signal to the input
      kernel: kernel width in middle MLP layers
      mlp_layers: number of MLP layers. Must be >= 2.

    Raises:
      ValueError: if mlp_layers < 2.

    The input depth of the first layer is inferred from the total concatenated
    size of the input features, minus 1 to account for the sequence lengths.

    Hyperparameters used:
      dropout_rate: The probability that an input is not dropped. This is the
          default when the |dropout_keep_prob| parameter is unset.
    """

    super(TransformerEncoderNetwork, self).__init__(component)
    default_dropout_rate = component.master.hyperparams.dropout_rate
    self._attrs = network_units.get_attrs_with_defaults(
        component.spec.network_unit.parameters, defaults={
            'num_layers': 4,
            'hidden_size': 256,
            'filter_size': 64,
            'num_heads': 8,
            'residual_drop': default_dropout_rate,
            'attention_drop': default_dropout_rate,
            'mlp_drop': default_dropout_rate,
            'initialization': 'xavier',
            'bias_init': 0.001,
            'scale_attention': True,
            'layer_norm_residuals': True,
            'timing_signal': True,
            'kernel': 1,
            'mlp_layers': 2})

    self._num_layers = self._attrs['num_layers']
    self._hidden_size = self._attrs['hidden_size']
    self._filter_size = self._attrs['filter_size']
    self._num_heads = self._attrs['num_heads']
    self._residual_dropout = self._attrs['residual_drop']
    self._attention_dropout = self._attrs['attention_drop']
    self._mlp_dropout = self._attrs['mlp_drop']
    self._initialization = self._attrs['initialization']
    self._bias_init = self._attrs['bias_init']
    self._scale_attn = self._attrs['scale_attention']
    self._layer_norm_res = self._attrs['layer_norm_residuals']
    self._timing_signal = self._attrs['timing_signal']
    self._kernel = self._attrs['kernel']
    self._mlp_depth = self._attrs['mlp_layers']

    if self._mlp_depth < 2:
      raise ValueError('TransformerEncoderNetwork needs mlp_layers >= 2')

    self._combined_filters = self._num_heads * self._filter_size

    self._weights = []
    self._biases = []
    self._layer_norms = {}

    # Hacky: one dimension comes from the lengths input; subtract it.
    self._concatenated_input_dim -= 1

    # Initial projection of inputs, this is mainly to project input down to the
    # right size for residual layers
    proj_shape = [1, 1, self._concatenated_input_dim, self._combined_filters]
    self._weights.append(
        network_units.add_var_initialized('init_proj', proj_shape,
                                          self._initialization))
    self._biases.append(tf.get_variable('init_bias',
                                        self._combined_filters,
                                        initializer=tf.constant_initializer(
                                            self._bias_init),
                                        dtype=tf.float32))

    for i in range(self._num_layers):
      with tf.variable_scope('transform_%d' % i):
        # Attention weights: 3 * self.combined_filters = (q, k, v)
        # We assume that q, k and v all have the same dimension
        attn_shape = [1, 1, self._combined_filters, 3 * self._combined_filters]
        self._weights.append(
            network_units.add_var_initialized('attn_weights',
                                              attn_shape,
                                              self._initialization))

        # Attention final projection weights
        proj_shape = [1, 1, self._combined_filters, self._combined_filters]
        self._weights.append(
            network_units.add_var_initialized('proj_weights',
                                              proj_shape,
                                              self._initialization))

        # MLP weights
        with tf.variable_scope('mlp'):
          ff_shape = [1, 1, self._combined_filters, self._hidden_size]
          self._weights.append(
              network_units.add_var_initialized('ff_weights_0',
                                                ff_shape,
                                                self._initialization))
          ff_shape = [1, self._kernel, self._hidden_size, self._hidden_size]
          for j in range(1, self._mlp_depth - 1):
            self._weights.append(
                network_units.add_var_initialized('ff_weights_%d' % j,
                                                  ff_shape,
                                                  self._initialization))
          ff_shape = [1, 1, self._hidden_size, self._combined_filters]
          self._weights.append(
              network_units.add_var_initialized('ff_weights_%d' %
                                                (self._mlp_depth - 1),
                                                ff_shape,
                                                self._initialization))

        # Layer normalization for residual layers
        if self._layer_norm_res:
          attn_layer_norm = network_units.LayerNorm(component,
                                                    'attn_layer_norm_%d' % i,
                                                    self._combined_filters,
                                                    tf.float32)
          self._layer_norms['attn_layer_norm_%d' % i] = attn_layer_norm

          ff_layer_norm = network_units.LayerNorm(component,
                                                  'ff_layer_norm_%d' % i,
                                                  self._combined_filters,
                                                  tf.float32)
          self._layer_norms['ff_layer_norm_%d' % i] = ff_layer_norm

          # Layer norm parameters are not added to self._weights,
          # which means that they are not l2 regularized
          self._params.extend(attn_layer_norm.params + ff_layer_norm.params)

    self._params.extend(self._weights)
    self._params.extend(self._biases)
    self._regularized_weights.extend(self._weights)
    self._layers.append(
        network_units.Layer(component, name='transformer_output',
                            dim=self._combined_filters))