def __init__(self, component): """Initializes layers. Args: component: Parent ComponentBuilderBase object. """ layers = [ network_units.Layer(self, 'lengths', -1), network_units.Layer(self, 'scores', -1), network_units.Layer(self, 'logits', -1), network_units.Layer(self, 'arcs', -1), ] super(MstSolverNetwork, self).__init__(component, init_layers=layers) self._attrs = network_units.get_attrs_with_defaults( component.spec.network_unit.parameters, defaults={ 'forest': False, 'loss': 'softmax', 'crf_max_dynamic_range': 20, }) check.Eq(len(self._fixed_feature_dims.items()), 0, 'Expected no fixed features') check.Eq(len(self._linked_feature_dims.items()), 2, 'Expected two linked features') check.In('lengths', self._linked_feature_dims, 'Missing required linked feature') check.In('scores', self._linked_feature_dims, 'Missing required linked feature')
def create_hidden_layers(self, component, hidden_layer_sizes): """See base class.""" # Construct the layer meta info for the DRAGNN builder. Note that the order # of h and c are reversed compared to the vanilla DRAGNN LSTM cell, as # this is the standard in tf.contrib.rnn. # # NB: The h activations of the last LSTM must be the last layer, in order # for _append_base_layers() to work. layers = [] for index, num_units in enumerate(hidden_layer_sizes): layers.append( dragnn.Layer(component, name='state_c_%d' % index, dim=num_units)) layers.append( dragnn.Layer(component, name='state_h_%d' % index, dim=num_units)) context_layers = list(layers) # copy |layers|, don't alias it return layers, context_layers
def __init__(self, component): """Initializes weights and layers. Args: component: Parent ComponentBuilderBase object. """ super(BiaffineLabelNetwork, self).__init__(component) parameters = component.spec.network_unit.parameters self._num_labels = int(parameters['num_labels']) check.Gt(self._num_labels, 0, 'Expected some labels') check.Eq(len(self._fixed_feature_dims.items()), 0, 'Expected no fixed features') check.Eq(len(self._linked_feature_dims.items()), 2, 'Expected two linked features') check.In('sources', self._linked_feature_dims, 'Missing required linked feature') check.In('targets', self._linked_feature_dims, 'Missing required linked feature') self._source_dim = self._linked_feature_dims['sources'] self._target_dim = self._linked_feature_dims['targets'] # TODO(googleuser): Make parameter initialization configurable. self._weights = [] self._weights.append( tf.get_variable( 'weights_pair', [self._num_labels, self._source_dim, self._target_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._weights.append( tf.get_variable( 'weights_source', [self._num_labels, self._source_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._weights.append( tf.get_variable( 'weights_target', [self._num_labels, self._target_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._biases = [] self._biases.append( tf.get_variable( 'biases', [self._num_labels], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._params.extend(self._weights + self._biases) self._regularized_weights.extend(self._weights) self._layers.append( network_units.Layer(self, 'labels', self._num_labels))
def __init__(self, component): """Initializes weights and layers. Args: component: Parent ComponentBuilderBase object. """ super(BiaffineDigraphNetwork, self).__init__(component) check.Eq(len(self._fixed_feature_dims.items()), 0, 'Expected no fixed features') check.Eq(len(self._linked_feature_dims.items()), 2, 'Expected two linked features') check.In('sources', self._linked_feature_dims, 'Missing required linked feature') check.In('targets', self._linked_feature_dims, 'Missing required linked feature') self._source_dim = self._linked_feature_dims['sources'] self._target_dim = self._linked_feature_dims['targets'] # TODO(googleuser): Make parameter initialization configurable. self._weights = [] self._weights.append( tf.get_variable( 'weights_arc', [self._source_dim, self._target_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._weights.append( tf.get_variable( 'weights_source', [self._source_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._weights.append( tf.get_variable( 'root', [self._source_dim], tf.float32, tf.random_normal_initializer(stddev=1e-4, seed=self._seed))) self._params.extend(self._weights) self._regularized_weights.extend(self._weights) # Negative Layer.dim indicates that the dimension is dynamic. self._layers.append(network_units.Layer(self, 'adjacency', -1))
def __init__(self, component): """Initializes weights and layers. Args: component: Parent ComponentBuilderBase object. """ super(BiaffineDigraphNetwork, self).__init__(component) check.Eq(len(self._fixed_feature_dims.items()), 0, 'Expected no fixed features') check.Eq(len(self._linked_feature_dims.items()), 2, 'Expected two linked features') check.In('sources', self._linked_feature_dims, 'Missing required linked feature') check.In('targets', self._linked_feature_dims, 'Missing required linked feature') self._source_dim = self._linked_feature_dims['sources'] self._target_dim = self._linked_feature_dims['targets'] self._weights = [] self._weights.append( tf.get_variable('weights_arc', [self._source_dim, self._target_dim], tf.float32, tf.orthogonal_initializer())) self._weights.append( tf.get_variable('weights_source', [self._source_dim], tf.float32, tf.zeros_initializer())) self._weights.append( tf.get_variable('root', [self._source_dim], tf.float32, tf.zeros_initializer())) self._params.extend(self._weights) self._regularized_weights.extend(self._weights) # Add runtime hooks for pre-computed weights. self._derived_params.append(self._get_root_weights) self._derived_params.append(self._get_root_bias) # Negative Layer.dim indicates that the dimension is dynamic. self._layers.append(network_units.Layer(component, 'adjacency', -1))
def __init__(self, component): super(PairwiseBilinearLabelNetwork, self).__init__(component) parameters = component.spec.network_unit.parameters self._num_labels = int(parameters['num_labels']) self._source_dim = self._linked_feature_dims['sources'] self._target_dim = self._linked_feature_dims['targets'] self._weights = [] self._weights.append( network_units.add_var_initialized('bilinear', [self._source_dim, self._num_labels, self._target_dim], 'xavier')) self._params.extend(self._weights) self._regularized_weights.extend(self._weights) self._layers.append(network_units.Layer(component, name='bilinear_scores', dim=self._num_labels))
def __init__(self, component, additional_attr_defaults=None): """Initializes the LSTM base class. Parameters used: hidden_layer_sizes: Comma-delimited number of hidden units for each layer. input_dropout_rate (-1.0): Input dropout rate for each layer. If < 0.0, use the global |dropout_rate| hyperparameter. recurrent_dropout_rate (0.8): Recurrent dropout rate. If < 0.0, use the global |recurrent_dropout_rate| hyperparameter. layer_norm (True): Whether or not to use layer norm. Hyperparameters used: dropout_rate: Input dropout rate. recurrent_dropout_rate: Recurrent dropout rate. Args: component: parent ComponentBuilderBase object. additional_attr_defaults: Additional attributes for use by derived class. """ attr_defaults = additional_attr_defaults or {} attr_defaults.update({ 'layer_norm': True, 'input_dropout_rate': -1.0, 'recurrent_dropout_rate': 0.8, 'hidden_layer_sizes': '256', }) self._attrs = dragnn.get_attrs_with_defaults( component.spec.network_unit.parameters, defaults=attr_defaults) self._hidden_layer_sizes = map( int, self._attrs['hidden_layer_sizes'].split(',')) self._input_dropout_rate = self._attrs['input_dropout_rate'] if self._input_dropout_rate < 0.0: self._input_dropout_rate = component.master.hyperparams.dropout_rate self._recurrent_dropout_rate = self._attrs['recurrent_dropout_rate'] if self._recurrent_dropout_rate < 0.0: self._recurrent_dropout_rate = ( component.master.hyperparams.recurrent_dropout_rate) if self._recurrent_dropout_rate < 0.0: self._recurrent_dropout_rate = component.master.hyperparams.dropout_rate tf.logging.info('[%s] input_dropout_rate=%s recurrent_dropout_rate=%s', component.name, self._input_dropout_rate, self._recurrent_dropout_rate) layers, context_layers = self.create_hidden_layers( component, self._hidden_layer_sizes) last_layer_dim = layers[-1].dim layers.append( dragnn.Layer(component, name='last_layer', dim=last_layer_dim)) layers.append( dragnn.Layer(component, name='logits', dim=component.num_actions)) # Provide initial layers and context layers, so the base class constructor # can safely use accessors like get_layer_size(). super(BaseLSTMNetwork, self).__init__(component, init_layers=layers, init_context_layers=context_layers) # Allocate parameters for the softmax. self._params.append( tf.get_variable( 'weights_softmax', [last_layer_dim, component.num_actions], initializer=tf.random_normal_initializer(stddev=1e-4))) self._params.append( tf.get_variable('bias_softmax', [component.num_actions], initializer=tf.zeros_initializer()))
def create_hidden_layers(self, component, hidden_layer_sizes): """See base class.""" dim = 2 * hidden_layer_sizes[-1] return [dragnn.Layer(component, name='outputs', dim=dim)], []
def __init__(self, component): """Initializes parameters for this Transformer unit. Args: component: parent ComponentBuilderBase object. Parameters used to construct the network: num_layers: number of transformer layers (attention + MLP) hidden_size: size of hidden layers in MLPs filter_size: filter width for each attention head num_heads: number of attention heads residual_dropout: dropout keep rate for residual layers attention_dropout: dropout keep rate for attention weights mlp_dropout: dropout keep rate for mlp layers initialization: initialization scheme to use for model parameters bias_init: initial value for bias parameters scale_attention: whether to scale attention parameters by filter_size^-0.5 layer_norm_residuals: whether to perform layer normalization on residual layers timing_signal: whether to add a position-wise timing signal to the input kernel: kernel width in middle MLP layers mlp_layers: number of MLP layers. Must be >= 2. Raises: ValueError: if mlp_layers < 2. The input depth of the first layer is inferred from the total concatenated size of the input features, minus 1 to account for the sequence lengths. Hyperparameters used: dropout_rate: The probability that an input is not dropped. This is the default when the |dropout_keep_prob| parameter is unset. """ super(TransformerEncoderNetwork, self).__init__(component) default_dropout_rate = component.master.hyperparams.dropout_rate self._attrs = network_units.get_attrs_with_defaults( component.spec.network_unit.parameters, defaults={ 'num_layers': 4, 'hidden_size': 256, 'filter_size': 64, 'num_heads': 8, 'residual_drop': default_dropout_rate, 'attention_drop': default_dropout_rate, 'mlp_drop': default_dropout_rate, 'initialization': 'xavier', 'bias_init': 0.001, 'scale_attention': True, 'layer_norm_residuals': True, 'timing_signal': True, 'kernel': 1, 'mlp_layers': 2}) self._num_layers = self._attrs['num_layers'] self._hidden_size = self._attrs['hidden_size'] self._filter_size = self._attrs['filter_size'] self._num_heads = self._attrs['num_heads'] self._residual_dropout = self._attrs['residual_drop'] self._attention_dropout = self._attrs['attention_drop'] self._mlp_dropout = self._attrs['mlp_drop'] self._initialization = self._attrs['initialization'] self._bias_init = self._attrs['bias_init'] self._scale_attn = self._attrs['scale_attention'] self._layer_norm_res = self._attrs['layer_norm_residuals'] self._timing_signal = self._attrs['timing_signal'] self._kernel = self._attrs['kernel'] self._mlp_depth = self._attrs['mlp_layers'] if self._mlp_depth < 2: raise ValueError('TransformerEncoderNetwork needs mlp_layers >= 2') self._combined_filters = self._num_heads * self._filter_size self._weights = [] self._biases = [] self._layer_norms = {} # Hacky: one dimension comes from the lengths input; subtract it. self._concatenated_input_dim -= 1 # Initial projection of inputs, this is mainly to project input down to the # right size for residual layers proj_shape = [1, 1, self._concatenated_input_dim, self._combined_filters] self._weights.append( network_units.add_var_initialized('init_proj', proj_shape, self._initialization)) self._biases.append(tf.get_variable('init_bias', self._combined_filters, initializer=tf.constant_initializer( self._bias_init), dtype=tf.float32)) for i in range(self._num_layers): with tf.variable_scope('transform_%d' % i): # Attention weights: 3 * self.combined_filters = (q, k, v) # We assume that q, k and v all have the same dimension attn_shape = [1, 1, self._combined_filters, 3 * self._combined_filters] self._weights.append( network_units.add_var_initialized('attn_weights', attn_shape, self._initialization)) # Attention final projection weights proj_shape = [1, 1, self._combined_filters, self._combined_filters] self._weights.append( network_units.add_var_initialized('proj_weights', proj_shape, self._initialization)) # MLP weights with tf.variable_scope('mlp'): ff_shape = [1, 1, self._combined_filters, self._hidden_size] self._weights.append( network_units.add_var_initialized('ff_weights_0', ff_shape, self._initialization)) ff_shape = [1, self._kernel, self._hidden_size, self._hidden_size] for j in range(1, self._mlp_depth - 1): self._weights.append( network_units.add_var_initialized('ff_weights_%d' % j, ff_shape, self._initialization)) ff_shape = [1, 1, self._hidden_size, self._combined_filters] self._weights.append( network_units.add_var_initialized('ff_weights_%d' % (self._mlp_depth - 1), ff_shape, self._initialization)) # Layer normalization for residual layers if self._layer_norm_res: attn_layer_norm = network_units.LayerNorm(component, 'attn_layer_norm_%d' % i, self._combined_filters, tf.float32) self._layer_norms['attn_layer_norm_%d' % i] = attn_layer_norm ff_layer_norm = network_units.LayerNorm(component, 'ff_layer_norm_%d' % i, self._combined_filters, tf.float32) self._layer_norms['ff_layer_norm_%d' % i] = ff_layer_norm # Layer norm parameters are not added to self._weights, # which means that they are not l2 regularized self._params.extend(attn_layer_norm.params + ff_layer_norm.params) self._params.extend(self._weights) self._params.extend(self._biases) self._regularized_weights.extend(self._weights) self._layers.append( network_units.Layer(component, name='transformer_output', dim=self._combined_filters))