Ejemplo n.º 1
0
def conv_layer(inputs,
               kernel_shape,
               biases_shape,
               stride=1,
               padding='SAME',
               activation='relu',
               norm=None,
               dropout=False,
               dropout_rate=None,
               regularizer=None,
               initializer=None,
               dimensions=2,
               is_training=True):
    if initializer is not None:
        initializer_obj = get_initializer(initializer)
        weights = tf.get_variable('weights',
                                  initializer=initializer_obj(kernel_shape),
                                  regularizer=regularizer)
    else:
        if activation == 'relu':
            initializer = get_initializer('he_uniform')
        elif activation == 'sigmoid' or activation == 'tanh':
            initializer = get_initializer('glorot_uniform')
        # if initializer is None, tensorFlow seems to be using
        # a glorot uniform initializer
        weights = tf.get_variable('weights',
                                  kernel_shape,
                                  regularizer=regularizer,
                                  initializer=initializer)
    logger.debug('  conv_weights: {0}'.format(weights))

    biases = tf.get_variable('biases',
                             biases_shape,
                             initializer=tf.constant_initializer(0.01))
    logger.debug('  conv_biases: {0}'.format(biases))

    if dimensions == 1:
        return conv_1d(inputs,
                       weights,
                       biases,
                       stride=stride,
                       padding=padding,
                       activation=activation,
                       norm=norm,
                       dropout=dropout,
                       dropout_rate=dropout_rate,
                       is_training=is_training)
    elif dimensions == 2:
        return conv_2d(inputs,
                       weights,
                       biases,
                       stride=stride,
                       padding=padding,
                       activation=activation,
                       norm=norm,
                       dropout=dropout,
                       dropout_rate=dropout_rate,
                       is_training=is_training)
    else:
        raise Exception('Unsupported number of dimensions', dimensions)
Ejemplo n.º 2
0
def fc_layer(inputs,
             in_count,
             out_count,
             activation='relu',
             norm=None,
             is_training=True,
             weights=None,
             biases=None,
             dropout=False,
             dropout_rate=None,
             initializer=None,
             regularizer=None):
    if weights is None:
        if initializer is not None:
            initializer_obj = get_initializer(initializer)
            weights = tf.get_variable('weights',
                                      initializer=initializer_obj(
                                          [in_count, out_count]),
                                      regularizer=regularizer)
        else:
            if activation == 'relu':
                initializer = get_initializer('he_uniform')
            elif activation == 'sigmoid' or activation == 'tanh':
                initializer = get_initializer('glorot_uniform')
            # if initializer is None, tensorFlow seems to be using
            # a glorot uniform initializer
            weights = tf.compat.v1.get_variable('weights',
                                                [in_count, out_count],
                                                regularizer=regularizer,
                                                initializer=initializer)

    logger.debug('  fc_weights: {}'.format(weights))

    if biases is None:
        biases = tf.compat.v1.get_variable(
            'biases', [out_count], initializer=tf.constant_initializer(0.01))
    logger.debug('  fc_biases: {}'.format(biases))

    hidden = tf.matmul(inputs, weights) + biases

    if norm is not None:
        if norm == 'batch':
            hidden = tf.contrib.layers.batch_norm(hidden,
                                                  is_training=is_training)
        elif norm == 'layer':
            hidden = tf.contrib.layers.layer_norm(hidden)

    if activation:
        hidden = getattr(tf.nn, activation)(hidden)

    if dropout and dropout_rate is not None:
        hidden = tf.layers.dropout(hidden,
                                   rate=dropout_rate,
                                   training=is_training)
        logger.debug('  fc_dropout: {}'.format(hidden))

    return hidden
Ejemplo n.º 3
0
def embedding_matrix(vocab,
                     embedding_size,
                     representation='dense',
                     embeddings_trainable=True,
                     pretrained_embeddings=None,
                     force_embedding_size=False,
                     initializer=None,
                     regularizer=None):
    vocab_size = len(vocab)
    if representation == 'dense':
        if pretrained_embeddings is not None and pretrained_embeddings is not False:
            embeddings_matrix = load_pretrained_embeddings(
                pretrained_embeddings, vocab)
            if embeddings_matrix.shape[-1] != embedding_size:
                raise ValueError(
                    'The size of the pretrained embedding size is {}, '
                    'but the specified embedding_size is {}. '
                    'Please change the embedding_size accordingly.'.format(
                        embeddings_matrix.shape[-1], embedding_size))
            initializer_obj = tf.constant(embeddings_matrix, dtype=tf.float32)
        else:
            if vocab_size < embedding_size and not force_embedding_size:
                logger.info(
                    '  embedding_size ({}) is greater than vocab_size ({}). '
                    'Setting embedding size to be equal to vocab_size.'.format(
                        embedding_size, vocab_size))
                embedding_size = vocab_size

            if initializer is not None:
                initializer_obj_ref = get_initializer(initializer)
            else:
                initializer_obj_ref = get_initializer({
                    'type': 'uniform',
                    'minval': -1.0,
                    'maxval': 1.0
                })
            initializer_obj = initializer_obj_ref([vocab_size, embedding_size])

        embeddings = tf.compat.v1.get_variable('embeddings',
                                               initializer=initializer_obj,
                                               trainable=embeddings_trainable,
                                               regularizer=regularizer)

    elif representation == 'sparse':
        embedding_size = vocab_size
        embeddings = tf.compat.v1.get_variable(
            'embeddings',
            initializer=get_initializer('identity')(
                [vocab_size, embedding_size]),
            trainable=False)

    else:
        raise Exception('Embedding representation {} not supported.'.format(
            representation))

    return embeddings, embedding_size
Ejemplo n.º 4
0
    def _get_predictions(self, hidden, hidden_size, regularizer=None):
        if not self.regularize:
            regularizer = None

        with tf.variable_scope('predictions_{}'.format(self.name)):
            initializer_obj = get_initializer(self.initializer)
            weights = tf.get_variable('weights',
                                      initializer=initializer_obj(
                                          [hidden_size, self.num_classes]),
                                      regularizer=regularizer)
            logging.debug('  class_weights: {0}'.format(weights))

            biases = tf.get_variable('biases', [self.num_classes])
            logging.debug('  class_biases: {0}'.format(biases))

            logits = tf.matmul(hidden, weights) + biases
            logging.debug('  logits: {0}'.format(logits))

            probabilities = tf.nn.softmax(logits,
                                          name='probabilities_{}'.format(
                                              self.name))
            predictions = tf.argmax(logits,
                                    -1,
                                    name='predictions_{}'.format(self.name))

            with tf.device('/cpu:0'):
                top_k_predictions = tf.nn.top_k(
                    logits,
                    k=self.top_k,
                    sorted=True,
                    name='top_k_predictions_{}'.format(self.name))

        return (predictions, top_k_predictions, probabilities, logits, weights,
                biases)
Ejemplo n.º 5
0
    def vector_predictions(
        self,
        hidden,
        hidden_size,
        regularizer=None,
    ):
        with tf.variable_scope('predictions_{}'.format(self.name)):
            initializer_obj = get_initializer(self.initializer)
            weights = tf.compat.v1.get_variable(
                'weights',
                initializer=initializer_obj([hidden_size, self.vector_size]),
                regularizer=regularizer)
            logger.debug('  projection_weights: {0}'.format(weights))

            biases = tf.compat.v1.get_variable('biases', [self.vector_size])
            logger.debug('  projection_biases: {0}'.format(biases))

            logits = tf.matmul(hidden, weights) + biases
            logger.debug('  logits: {0}'.format(logits))

            if self.softmax:
                predictions = tf.nn.softmax(logits)
            else:
                predictions = logits

        return logits, self.vector_size, predictions
Ejemplo n.º 6
0
    def _get_predictions(self, hidden, hidden_size, regularizer=None):
        if not self.regularize:
            regularizer = None

        with tf.variable_scope('predictions_{}'.format(self.name)):
            initializer_obj = get_initializer(self.initializer)
            weights = tf.get_variable('weights',
                                      initializer=initializer_obj(
                                          [hidden_size, 1]),
                                      regularizer=regularizer)
            logger.debug('  regression_weights: {0}'.format(weights))

            biases = tf.get_variable('biases', [1])
            logger.debug('  regression_biases: {0}'.format(biases))

            logits = tf.reshape(tf.matmul(hidden, weights) + biases, [-1])
            logger.debug('  logits: {0}'.format(logits))

            probabilities = tf.nn.sigmoid(logits,
                                          name='probabilities_{}'.format(
                                              self.name))
            predictions = tf.greater_equal(probabilities,
                                           self.threshold,
                                           name='predictions_{}'.format(
                                               self.name))
        return predictions, probabilities, logits
Ejemplo n.º 7
0
    def _get_predictions(self, hidden, hidden_size, regularizer=None):
        if not self.regularize:
            regularizer = None

        with tf.variable_scope('predictions_{}'.format(self.name)):
            initializer_obj = get_initializer(self.initializer)
            weights = tf.get_variable('weights',
                                      initializer=initializer_obj(
                                          [hidden_size, 1]),
                                      regularizer=regularizer)
            logger.debug('  regression_weights: {0}'.format(weights))

            biases = tf.get_variable('biases', [1])
            logger.debug('  regression_biases: {0}'.format(biases))

            predictions = tf.reshape(tf.matmul(hidden, weights) + biases, [-1])
            logger.debug('  predictions: {0}'.format(predictions))

            if self.clip is not None:
                if isinstance(self.clip,
                              (list, tuple)) and len(self.clip) == 2:
                    predictions = tf.clip_by_value(predictions, self.clip[0],
                                                   self.clip[1])
                    logger.debug(
                        '  clipped_predictions: {0}'.format(predictions))
                else:
                    raise ValueError(
                        'The clip parameter of {} is {}. '
                        'It must be a list or a tuple of length 2.'.format(
                            self.name, self.clip))

        return predictions
Ejemplo n.º 8
0
    def _get_predictions(
            self,
            hidden,
            hidden_size,
            regularizer=None
    ):
        if not self.regularize:
            regularizer = None

        with tf.variable_scope('predictions_{}'.format(self.name)):
            initializer_obj = get_initializer(self.initializer)
            weights = tf.get_variable(
                'weights',
                initializer=initializer_obj([hidden_size, 1]),
                regularizer=regularizer
            )
            logging.debug('  regression_weights: {0}'.format(weights))

            biases = tf.get_variable('biases', [1])
            logging.debug('  regression_biases: {0}'.format(biases))

            predictions = tf.reshape(
                tf.matmul(hidden, weights) + biases,
                [-1]
            )
            logging.debug('  predictions: {0}'.format(predictions))

        return predictions
Ejemplo n.º 9
0
    def __call__(self,
                 output_feature,
                 targets,
                 hidden,
                 hidden_size,
                 regularizer,
                 is_timeseries=False):
        logging.info('  hidden shape: {0}'.format(hidden.shape))
        if len(hidden.shape) != 3:
            raise ValueError(
                'Decoder inputs rank is {}, but should be 3 [batch x sequence x hidden] '
                'when using a tagger sequential decoder. '
                'Consider setting reduce_output to null / None if a sequential encoder / combiner is used.'
                .format(len(hidden.shape)))

        if is_timeseries:
            output_feature['num_classes'] = 1

        if not self.regularize:
            regularizer = None

        sequence_length = tf.shape(hidden)[1]

        if self.attention:
            hidden, hidden_size = feed_forward_memory_attention(
                hidden, hidden, hidden_size)
        targets_sequence_length = sequence_length_2D(targets)

        initializer_obj = get_initializer(self.initializer)
        class_weights = tf.get_variable('weights',
                                        initializer=initializer_obj([
                                            hidden_size,
                                            output_feature['num_classes']
                                        ]),
                                        regularizer=regularizer)
        logging.debug('  weights: {0}'.format(class_weights))

        class_biases = tf.get_variable('biases',
                                       [output_feature['num_classes']])
        logging.debug('  biases: {0}'.format(class_biases))

        hidden_reshape = tf.reshape(hidden, [-1, hidden_size])
        logits_to_reshape = tf.matmul(hidden_reshape,
                                      class_weights) + class_biases
        logits = tf.reshape(
            logits_to_reshape,
            [-1, sequence_length, output_feature['num_classes']])
        logging.debug('  logits: {0}'.format(logits))

        if is_timeseries:
            probabilities_sequence = tf.zeros_like(logits)
            predictions_sequence = tf.reshape(logits, [-1, sequence_length])
        else:
            probabilities_sequence = tf.nn.softmax(
                logits, name='probabilities_{}'.format(output_feature['name']))
            predictions_sequence = tf.argmax(logits,
                                             -1,
                                             name='predictions_{}'.format(
                                                 output_feature['name']),
                                             output_type=tf.int32)

        predictions_sequence_length = sequence_length_3D(hidden)

        return predictions_sequence, probabilities_sequence, \
               predictions_sequence_length, \
               probabilities_sequence, targets_sequence_length, \
               logits, hidden, class_weights, class_biases
Ejemplo n.º 10
0
def recurrent_decoder(encoder_outputs,
                      targets,
                      max_sequence_length,
                      vocab_size,
                      cell_type='rnn',
                      state_size=256,
                      embedding_size=50,
                      num_layers=1,
                      attention_mechanism=None,
                      beam_width=1,
                      projection=True,
                      tied_target_embeddings=True,
                      embeddings=None,
                      initializer=None,
                      regularizer=None,
                      is_timeseries=False):
    with tf.variable_scope('rnn_decoder',
                           reuse=tf.AUTO_REUSE,
                           regularizer=regularizer):

        # ================ Setup ================
        if beam_width > 1 and is_timeseries:
            raise ValueError('Invalid beam_width: {}'.format(beam_width))

        GO_SYMBOL = vocab_size
        END_SYMBOL = 0
        batch_size = tf.shape(encoder_outputs)[0]

        # ================ Projection ================
        # Project the encoder outputs to the size of the decoder state
        encoder_outputs_size = encoder_outputs.shape[-1]
        if projection and encoder_outputs_size != state_size:
            with tf.variable_scope('projection'):
                encoder_output_rank = len(encoder_outputs.shape)
                if encoder_output_rank > 2:
                    sequence_length = tf.shape(encoder_outputs)[1]
                    encoder_outputs = tf.reshape(encoder_outputs,
                                                 [-1, encoder_outputs_size])
                    encoder_outputs = fc_layer(encoder_outputs,
                                               encoder_outputs.shape[-1],
                                               state_size,
                                               activation=None,
                                               initializer=initializer)
                    encoder_outputs = tf.reshape(
                        encoder_outputs, [-1, sequence_length, state_size])
                else:
                    encoder_outputs = fc_layer(encoder_outputs,
                                               encoder_outputs.shape[-1],
                                               state_size,
                                               activation=None,
                                               initializer=initializer)

        # ================ Targets sequence ================
        # Calculate the length of inputs and the batch size
        with tf.variable_scope('sequence'):
            targets_sequence_length = sequence_length_2D(targets)
            start_tokens = tf.tile([GO_SYMBOL], [batch_size])
            end_tokens = tf.tile([END_SYMBOL], [batch_size])
            if is_timeseries:
                start_tokens = tf.cast(start_tokens, tf.float32)
                end_tokens = tf.cast(end_tokens, tf.float32)
            targets_with_go_and_eos = tf.concat([
                tf.expand_dims(start_tokens, 1), targets,
                tf.expand_dims(end_tokens, 1)
            ], 1)
            logging.debug(
                '  targets_with_go: {0}'.format(targets_with_go_and_eos))
            targets_sequence_length_with_eos = targets_sequence_length + 1  # the EOS symbol is 0 so it's not increasing the real length of the sequence

        # ================ Embeddings ================
        if is_timeseries:
            targets_embedded = tf.expand_dims(targets_with_go_and_eos, -1)
            targets_embeddings = None
        else:
            with tf.variable_scope('embedding'):
                if embeddings is not None:
                    embedding_size = embeddings.shape.as_list()[-1]
                    if tied_target_embeddings:
                        state_size = embedding_size
                elif tied_target_embeddings:
                    embedding_size = state_size

                if embeddings is not None:
                    embedding_go = tf.get_variable(
                        'embedding_GO',
                        initializer=tf.random_uniform([1, embedding_size],
                                                      -1.0, 1.0))
                    targets_embeddings = tf.concat([embeddings, embedding_go],
                                                   axis=0)
                else:
                    initializer_obj = get_initializer(initializer)
                    targets_embeddings = tf.get_variable(
                        'embeddings',
                        initializer=initializer_obj(
                            [vocab_size + 1, embedding_size]),
                        regularizer=regularizer)
                logging.debug(
                    '  targets_embeddings: {0}'.format(targets_embeddings))

                targets_embedded = tf.nn.embedding_lookup(
                    targets_embeddings,
                    targets_with_go_and_eos,
                    name='decoder_input_embeddings')
        logging.debug('  targets_embedded: {0}'.format(targets_embedded))

        # ================ Class prediction ================
        if tied_target_embeddings:
            class_weights = tf.transpose(targets_embeddings)
        else:
            initializer_obj = get_initializer(initializer)
            class_weights = tf.get_variable('class_weights',
                                            initializer=initializer_obj(
                                                [state_size, vocab_size + 1]),
                                            regularizer=regularizer)
        logging.debug('  class_weights: {0}'.format(class_weights))
        class_biases = tf.get_variable('class_biases', [vocab_size + 1])
        logging.debug('  class_biases: {0}'.format(class_biases))
        projection_layer = Projection(class_weights, class_biases)

        # ================ RNN ================
        initial_state = encoder_outputs
        with tf.variable_scope('rnn_cells') as vs:
            # Cell
            cell_fun = get_cell_fun(cell_type)

            if num_layers == 1:
                cell = cell_fun(state_size)
                if cell_type.startswith('lstm'):
                    initial_state = LSTMStateTuple(c=initial_state,
                                                   h=initial_state)
            elif num_layers > 1:
                cell = MultiRNNCell(
                    [cell_fun(state_size) for _ in range(num_layers)],
                    state_is_tuple=True)
                if cell_type.startswith('lstm'):
                    initial_state = LSTMStateTuple(c=initial_state,
                                                   h=initial_state)
                initial_state = tuple([initial_state] * num_layers)
            else:
                raise ValueError(
                    'num_layers in recurrent decoser: {}. '
                    'Number of layers in a recurrenct decoder cannot be <= 0'.
                    format(num_layers))

            # Attention
            if attention_mechanism is not None:
                if attention_mechanism == 'bahdanau':
                    attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
                        num_units=state_size,
                        memory=encoder_outputs,
                        memory_sequence_length=sequence_length_3D(
                            encoder_outputs))
                elif attention_mechanism == 'luong':
                    attention_mechanism = tf.contrib.seq2seq.LuongAttention(
                        num_units=state_size,
                        memory=encoder_outputs,
                        memory_sequence_length=sequence_length_3D(
                            encoder_outputs))
                else:
                    raise ValueError(
                        'Attention mechanism {} not supported'.format(
                            attention_mechanism))
                cell = tf.contrib.seq2seq.AttentionWrapper(
                    cell, attention_mechanism, attention_layer_size=state_size)
                initial_state = cell.zero_state(dtype=tf.float32,
                                                batch_size=batch_size)
                initial_state = initial_state.clone(
                    cell_state=reduce_sequence(encoder_outputs, 'last'))

            for v in tf.global_variables():
                if v.name.startswith(vs.name):
                    logging.debug('  {}: {}'.format(v.name, v))

        # ================ Decoding ================
        def decode(initial_state,
                   cell,
                   helper,
                   beam_width=1,
                   projection_layer=None):
            # The decoder itself
            if beam_width > 1:
                # Tile inputs for beam search decoder
                beam_initial_state = tf.contrib.seq2seq.tile_batch(
                    initial_state, beam_width)
                decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                    cell=cell,
                    embedding=targets_embeddings,
                    start_tokens=start_tokens,
                    end_token=END_SYMBOL,
                    initial_state=beam_initial_state,
                    beam_width=beam_width,
                    output_layer=projection_layer)
            else:
                decoder = BasicDecoder(cell=cell,
                                       helper=helper,
                                       initial_state=initial_state,
                                       output_layer=projection_layer)

            # The decoding operation
            outputs = tf.contrib.seq2seq.dynamic_decode(
                decoder=decoder,
                output_time_major=False,
                impute_finished=False if beam_width > 1 else True,
                maximum_iterations=max_sequence_length)

            return outputs

        # ================ Decoding helpers ================
        if is_timeseries:
            train_helper = TimeseriesTrainingHelper(
                inputs=targets_embedded,
                sequence_length=targets_sequence_length_with_eos)
            final_outputs_pred, final_state_pred, final_sequence_lengths_pred = decode(
                initial_state,
                cell,
                train_helper,
                projection_layer=projection_layer)
            eval_logits = final_outputs_pred.rnn_output
            train_logits = final_outputs_pred.projection_input
            predictions_sequence = tf.reshape(eval_logits, [batch_size, -1])
            predictions_sequence_length_with_eos = final_sequence_lengths_pred

        else:
            train_helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=targets_embedded,
                sequence_length=targets_sequence_length_with_eos)
            final_outputs_train, final_state_train, final_sequence_lengths_train = decode(
                initial_state,
                cell,
                train_helper,
                projection_layer=projection_layer)
            eval_logits = final_outputs_train.rnn_output
            train_logits = final_outputs_train.projection_input
            # train_predictions = final_outputs_train.sample_id

            pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding=targets_embeddings,
                start_tokens=start_tokens,
                end_token=END_SYMBOL)
            final_outputs_pred, final_state_pred, final_sequence_lengths_pred = decode(
                initial_state,
                cell,
                pred_helper,
                beam_width,
                projection_layer=projection_layer)

            if beam_width > 1:
                predictions_sequence = final_outputs_pred.beam_search_decoder_output.predicted_ids[:, :,
                                                                                                   0]
                # final_outputs_pred..predicted_ids[:,:,0] would work too, but it contains -1s for padding
                predictions_sequence_scores = final_outputs_pred.beam_search_decoder_output.scores[:, :,
                                                                                                   0]
                predictions_sequence_length_with_eos = final_sequence_lengths_pred[:,
                                                                                   0]
            else:
                predictions_sequence = final_outputs_pred.sample_id
                predictions_sequence_scores = final_outputs_pred.rnn_output
                predictions_sequence_length_with_eos = final_sequence_lengths_pred

    logging.debug('  train_logits: {0}'.format(train_logits))
    logging.debug('  eval_logits: {0}'.format(eval_logits))
    logging.debug('  predictions_sequence: {0}'.format(predictions_sequence))
    logging.debug('  predictions_sequence_scores: {0}'.format(
        predictions_sequence_scores))

    return predictions_sequence, predictions_sequence_scores, predictions_sequence_length_with_eos, \
           targets_sequence_length_with_eos, eval_logits, train_logits, class_weights, class_biases