Beispiel #1
0
def cross_layer(x0, cross_layers, cross_op='better'):
    xl = x0
    if cross_op == 'better':
        cross_func = cross_op_better
    else:
        cross_func = cross_op_raw

    with tf.variable_scope('cross_layer'):
        feature_size = x0.get_shape().as_list()[
            -1]  # feature_size = n_feature * embedding_size
        for i in range(cross_layers):
            weight = tf.get_variable(
                shape=[feature_size],
                initializer=tf.truncated_normal_initializer(),
                name='cross_weight{}'.format(i))
            bias = tf.get_variable(
                shape=[feature_size],
                initializer=tf.truncated_normal_initializer(),
                name='cross_bias{}'.format(i))

            interaction = cross_func(xl, x0, weight, feature_size)

            xl = interaction + bias + xl  # add back previous layer  -> (batch, feature_size)
            add_layer_summary('cross_{}'.format(i), xl)
    return xl
Beispiel #2
0
    def decode(self, encoder_output, labels, mode):
        """
        Apply decoding func for target sequence. If train, use train decoder, else use infer decoder.
        Input
            encoder_output: ENCODER_OUTPUT
            features: {tokens:, seq_len:}
            labels: {tokens:, seq_len:}
            mode: tf.estimator.ModeKeys
        Return
            DECODER_OUTPUT
        """
        with tf.variable_scope('decoding'):
            if mode == tf.estimator.ModeKeys.TRAIN:
                seq_emb_output = tf.nn.embedding_lookup(
                    self.embedding,
                    labels['tokens'])  # batch_size * max_len * emb_size
                input_len = labels['seq_len']
            elif mode == tf.estimator.ModeKeys.EVAL:
                seq_emb_output = None
                input_len = labels['seq_len']
            else:
                seq_emb_output = None
                input_len = None

            decoder_output = decoder(encoder_output, seq_emb_output, input_len,\
                                     self.embedding, self.params, mode)

            add_layer_summary('decoder_output.state', decoder_output.state)
            add_layer_summary('decoder_output.output',
                              decoder_output.output.rnn_output)

        return decoder_output
Beispiel #3
0
    def encode(self, features, mode):
        """
        6 idential layer consisting of multiheaad attention + add&norm + feed forward + add&norm
        input
            features: dict {'tokens':, 'seq_len':}
        output
            encoder_output: dimension unchanged after transformation
        """
        with tf.variable_scope('encoding', reuse=tf.AUTO_REUSE):
            encoder_input = self.embedding_func(
                features['tokens'], mode)  # batch * seq_len * emb_size
            self_mask = seq_mask_gen(features, self.params)

            for i in range(self.params['encode_attention_layers']):
                with tf.variable_scope('self_attention_layer_{}'.format(i),
                                       reuse=tf.AUTO_REUSE):
                    encoder_input = multi_head_attention(key=encoder_input,
                                                         query=encoder_input,
                                                         value=encoder_input,
                                                         mask=self_mask,
                                                         params=self.params,
                                                         mode=mode)
                    add_layer_summary('output', encoder_input)

                    encoder_input = ffn(encoder_input, self.params, mode)
                    add_layer_summary('ffn', encoder_input)

        return ENCODER_OUTPUT(output=encoder_input,
                              state=encoder_input[:, -1, :])
Beispiel #4
0
    def input_encode(self, features):
        with tf.variable_scope('input_encoding', reuse=False):
            encoder_output = self.general_encoder(features)

            add_layer_summary('state', encoder_output.state)
            add_layer_summary('output', encoder_output.output)
        return encoder_output
Beispiel #5
0
def neighbour_cls_loss(encoder_output, decoder_output, labels, params):
    """
    Quick thought like loss function: source is continuous sentence, target are the same as input.
    positive samples are the pair within widonw_size around diagonal, all the other sample in batch are negative sample
    """
    sim_score = tf.matmul(encoder_output.state[0], decoder_output.state[0],
                          transpose_b=True)  # [batch, batch] sim score
    add_layer_summary(sim_score.name, sim_score)

    with tf.variable_scope('neighbour_similarity_loss'):
        batch_size = sim_score.get_shape().as_list()[0]
        sim_score = tf.matrix_set_diag(sim_score, np.zeros(batch_size))# ignore self-similarity

        # create targets: set element within diagonal offset to 1
        targets = np.zeros(shape=(batch_size, batch_size))
        offset = params['window_size']  ## offset of the diagonal
        for i in chain(range(1, 1+offset), range(-offset, -offset+1)):
            diag = np.diagonal(targets, offset=i)
            diag.setflags(write=True)
            diag.fill(1)

        targets = targets/np.sum(targets, axis=1, keepdims=True) # normalize target probability to 1

        targets = tf.constant(targets, dtype=params['dtype'])

        losses = tf.nn.softmax_cross_entropy_with_logits(labels=targets,
                                                         logits=sim_score)

        losses = tf.reduce_mean(losses)

    return losses
Beispiel #6
0
def scaled_dot_product_attention(key, value, query, mask):
    """
    apply dot product attention with mask
    input:
        key: batch_size * key_len * emb_size
        query: batch_size * query_len * emb_size
        value: batch_size * key_len * emb_size
        mask: batch_size * key_len
    output:
        weighted_val: batch_size * query_len * emb_size
    """
    with tf.variable_scope('scaled_dot_product_attention',
                           reuse=tf.AUTO_REUSE):
        # scalaed weight matrix : batch_size * query_len * key_len
        dk = tf.cast(key.shape.as_list()[-1], tf.float32)  # emb_size
        weight = tf.matmul(query, key, transpose_b=True) / (dk**0.5)

        # apply mask: large negative will become 0 in softmax[mask=0 ignore]
        weight += (1 - mask) * (-2**32 + 1)
        # normalize on axis key_len so that score add up to 1
        weight = tf.nn.softmax(weight, axis=-1)
        tf.summary.image("attention", tf.expand_dims(weight[:1],
                                                     -1))  # add channel dim
        add_layer_summary('attention', weight)
        # weighted value: batch_size * query_len * emb_size
        weighted_value = tf.matmul(weight, value)

        return weighted_value
Beispiel #7
0
def model_fn(features, labels, mode, params):
    feature_columns= build_features()

    input = tf.feature_column.input_layer(features, feature_columns)

    with tf.variable_scope('init_fm_embedding'):
        # method1: load from checkpoint directly
        embeddings = tf.Variable( tf.contrib.framework.load_variable(
            './checkpoint/FM',
            'fm_interaction/v'
        ) )
        weight = tf.Variable( tf.contrib.framework.load_variable(
            './checkpoint/FM',
            'linear/w'
        ) )
        dense = tf.add(tf.matmul(input, embeddings), tf.matmul(input, weight))
        add_layer_summary('input', dense)

    with tf.variable_scope( 'Dense' ):
        for i, unit in enumerate( params['hidden_units'] ):
            dense = tf.layers.dense( dense, units=unit, activation='relu', name='dense{}'.format( i ) )
            dense = tf.layers.batch_normalization( dense, center=True, scale=True, trainable=True,
                                                   training=(mode == tf.estimator.ModeKeys.TRAIN) )
            dense = tf.layers.dropout( dense, rate=params['dropout_rate'],
                                       training=(mode == tf.estimator.ModeKeys.TRAIN) )
            add_layer_summary( dense.name, dense )

    with tf.variable_scope('output'):
        y = tf.layers.dense(dense, units= 1, name = 'output')
        tf.summary.histogram(y.name, y)

    return y
Beispiel #8
0
def multi_head_attention(key, value, query, mask, params, mode):
    """
    Mutlihead attention with mask
    input:
        key: batch_size * key_len * emb_size
        query: batch_size * query_len * emb_size
        value: batch_size * key_len * emb_size
        mask: batch_size * key_len
    output:
        weighted_val: batch_size * query_len * emb_size
    """
    with tf.variable_scope('multi_head_attention', reuse=tf.AUTO_REUSE):
        d_model = value.shape.as_list()[-1]  # emb_size
        # linear projection with dimension unchaangned
        new_key = tf.layers.dense(
            key, units=d_model,
            activation=None)  # batch_size * key_len * emb_size
        new_value = tf.layers.dense(value, units=d_model, activation=None)
        new_query = tf.layers.dense(query, units=d_model, activation=None)

        # split d_model by num_head and compute attention in parallel
        # (batch_size * num_head) * key_len * (emb_size/num_head)
        new_key = tf.concat(tf.split(new_key,
                                     num_or_size_splits=params['num_head'],
                                     axis=-1),
                            axis=0)
        new_value = tf.concat(tf.split(new_value,
                                       num_or_size_splits=params['num_head'],
                                       axis=-1),
                              axis=0)
        new_query = tf.concat(tf.split(new_query,
                                       num_or_size_splits=params['num_head'],
                                       axis=-1),
                              axis=0)

        # calculate dot-product attention
        weighted_val = scaled_dot_product_attention(
            new_key, new_value, new_query,
            tf.tile(mask, [params['num_head'], 1, 1]))

        # concat num_head back
        # (batch_size * num_head) * query_len * (emb_size/num_head) -> batch_size * query_len * emb_size
        weighted_val = tf.concat(tf.split(
            weighted_val, num_or_size_splits=params['num_head'], axis=0),
                                 axis=-1)

        # Linear projection
        weighted_val = tf.layers.dense(weighted_val,
                                       units=d_model,
                                       activation=None)
        # Do dropout
        weighted_val = tf.layers.dropout(
            weighted_val,
            rate=params['dropout_rate'],
            training=(mode == tf.estimator.ModeKeys.TRAIN))
        add_layer_summary('raw_multi_head', weighted_val)
        weighted_val = add_and_norm_layer(query, weighted_val)

    return weighted_val
Beispiel #9
0
    def encode(self, features):
        with tf.variable_scope('encoding'):
            encoder_output = self.general_encoder(features)

            add_layer_summary('encoder_output.state', encoder_output.state)
            add_layer_summary('encoder_output.output', encoder_output.output)

        return encoder_output
Beispiel #10
0
    def init(self):
        with tf.variable_scope('embedding', reuse=tf.AUTO_REUSE):
            self.embedding = tf.get_variable(
                dtype=self.params['dtype'],
                initializer=tf.constant(self.params['pretrain_embedding']),
                name='word_embedding')

            add_layer_summary(self.embedding.name, self.embedding)
Beispiel #11
0
def model_fn(features, labels, mode, params):
    sparse_columns, dense_columns = build_features(params['numeric_handle'])

    with tf.variable_scope('EmbeddingInput'):
        embedding_input = []
        for f_sparse in sparse_columns:
            sparse_input = tf.feature_column.input_layer(features, f_sparse)

            input_dim = sparse_input.get_shape().as_list()[-1]

            init = tf.random_normal(shape=[input_dim, params['embedding_dim']])

            weight = tf.get_variable('w_{}'.format(f_sparse.name),
                                     dtype=tf.float32,
                                     initializer=init)

            add_layer_summary(weight.name, weight)

            embedding_input.append(tf.matmul(sparse_input, weight))

        dense = tf.concat(embedding_input, axis=1, name='embedding_concat')
        add_layer_summary(dense.name, dense)

        # if treat numeric feature as dense feature, then concatenate with embedding. else concatenate wtih sparse input
        if params['numeric_handle'] == 'dense':
            numeric_input = tf.feature_column.input_layer(
                features, dense_columns)

            numeric_input = tf.layers.batch_normalization(
                numeric_input,
                center=True,
                scale=True,
                trainable=True,
                training=(mode == tf.estimator.ModeKeys.TRAIN))
            add_layer_summary(numeric_input.name, numeric_input)
            dense = tf.concat([dense, numeric_input],
                              axis=1,
                              name='numeric_concat')
            add_layer_summary(dense.name, dense)

    with tf.variable_scope('MLP'):
        for i, unit in enumerate(params['hidden_units']):
            dense = tf.layers.dense(dense,
                                    units=unit,
                                    activation='relu',
                                    name='Dense_{}'.format(i))
            if mode == tf.estimator.ModeKeys.TRAIN:
                add_layer_summary(dense.name, dense)
                dense = tf.layers.dropout(
                    dense,
                    rate=params['dropout_rate'],
                    training=(mode == tf.estimator.ModeKeys.TRAIN))

    with tf.variable_scope('output'):
        y = tf.layers.dense(dense, units=1, name='output')

    return y
Beispiel #12
0
    def encode(self, features, mode):
        """
        RNN Encoder
        """
        with tf.variable_scope('encoding', reuse=tf.AUTO_REUSE):
            encoder_input = self.embedding_func(features['tokens'])

            encoder_output = rnn_encoder(encoder_input, features['seq_len'], self.params)

            add_layer_summary('encoder_output.state', encoder_output.state)
            add_layer_summary('encoder_output.output', encoder_output.output)

        return encoder_output
Beispiel #13
0
def sparse_embedding(feature_size, embedding_size, field_size, feat_ids, feat_vals, add_summary):
    with tf.variable_scope('Sparse_Embedding'):
        v = tf.get_variable( shape=[feature_size, embedding_size],
                             initializer=tf.truncated_normal_initializer(),
                             name='embedding_weight' )

        embedding_matrix = tf.nn.embedding_lookup( v, feat_ids ) # batch * field_size * embedding_size
        embedding_matrix = tf.multiply( embedding_matrix, tf.reshape(feat_vals, [-1, field_size,1] ) )

        if add_summary:
            add_layer_summary( 'embedding_matrix', embedding_matrix )

    return embedding_matrix
Beispiel #14
0
def model_fn_sparse(features, labels, mode, params):
    # hyper parameter
    data_params = params['data_params']
    field_size = data_params['field_size']
    feature_size = data_params['feature_size']
    embedding_size = data_params['embedding_size']

    # extract feature
    feat_ids = tf.reshape(features['feat_ids'],
                          shape=[-1, field_size])  # batch * field_size
    feat_vals = tf.reshape(features['feat_vals'],
                           shape=[-1, field_size])  # batch * field_size

    # extract embedding
    with tf.variable_scope('extract_embedding'):
        embedding_matrix = sparse_embedding(
            feature_size,
            embedding_size,
            field_size,
            feat_ids,
            feat_vals,
            add_summary=True)  # (batch, field_size, embedding_size)
        dense_input = tf.reshape(embedding_matrix,
                                 [-1, field_size * embedding_size
                                  ])  # (batch, field_size * embedding_size)

    # linear part
    linear_output = sparse_linear(feature_size,
                                  feat_ids,
                                  feat_vals,
                                  add_summary=True)

    # Deep part
    dense_output = stack_dense_layer(dense_input,
                                     params['hidden_units'],
                                     params['dropout_rate'],
                                     params['batch_norm'],
                                     mode,
                                     add_summary=True)
    # CIN part
    cin_output = cin_layer(embedding_matrix, params['cin_layer_size'],
                           embedding_size, field_size)

    # concat and output
    with tf.variable_scope('output'):
        y = tf.concat([dense_output, cin_output, linear_output], axis=1)
        y = tf.layers.dense(y, units=1)
        add_layer_summary('output', y)

    return y
Beispiel #15
0
def model_fn_dense(features, labels, mode, params):
    dense_feature, sparse_feature = build_features()
    dense = tf.feature_column.input_layer(features, dense_feature)
    sparse = tf.feature_column.input_layer(features, sparse_feature)

    with tf.variable_scope('FM_component'):
        with tf.variable_scope('Linear'):
            linear_output = tf.layers.dense(sparse, units=1)
            add_layer_summary('linear_output', linear_output)

        with tf.variable_scope('second_order'):
            # reshape (batch_size, n_feature * emb_size) -> (batch_size, n_feature, emb_size)
            emb_size = dense_feature[0].variable_shape.as_list()[
                0]  # all feature has same emb dimension
            embedding_matrix = tf.reshape(dense,
                                          (-1, len(dense_feature), emb_size))
            add_layer_summary('embedding_matrix', embedding_matrix)
            # Compared to FM embedding here is flatten(x * v) not v
            sum_square = tf.pow(tf.reduce_sum(embedding_matrix, axis=1), 2)
            square_sum = tf.reduce_sum(tf.pow(embedding_matrix, 2), axis=1)

            fm_output = tf.reduce_sum(tf.subtract(sum_square, square_sum) *
                                      0.5,
                                      axis=1,
                                      keepdims=True)
            add_layer_summary('fm_output', fm_output)

    with tf.variable_scope('Deep_component'):
        for i, unit in enumerate(params['hidden_units']):
            dense = tf.layers.dense(dense,
                                    units=unit,
                                    activation='relu',
                                    name='dense{}'.format(i))
            dense = tf.layers.batch_normalization(
                dense,
                center=True,
                scale=True,
                trainable=True,
                training=(mode == tf.estimator.ModeKeys.TRAIN))
            dense = tf.layers.dropout(
                dense,
                rate=params['dropout_rate'],
                training=(mode == tf.estimator.ModeKeys.TRAIN))
            add_layer_summary(dense.name, dense)

    with tf.variable_scope('output'):
        y = dense + fm_output + linear_output
        add_layer_summary('output', y)

    return y
Beispiel #16
0
def model_fn_dense(features, labels, mode, params):
    dense_feature, sparse_feature = build_features()
    dense = tf.feature_column.input_layer(
        features, dense_feature)  # lz linear concat of embedding
    sparse = tf.feature_column.input_layer(features, sparse_feature)

    field_size = len(dense_feature)
    embedding_size = dense_feature[0].variable_shape.as_list()[-1]
    embedding_matrix = tf.reshape(
        dense,
        [-1, field_size, embedding_size])  # batch * field_size *emb_size

    with tf.variable_scope('Linear_part'):
        linear_output = tf.layers.dense(sparse, units=1)
        add_layer_summary('linear_output', linear_output)

    with tf.variable_scope('Elementwise_Interaction'):
        elementwise_list = []
        for i in range(field_size):
            for j in range(i + 1, field_size):
                vi = tf.gather(embedding_matrix,
                               indices=i,
                               axis=1,
                               batch_dims=0,
                               name='vi')  # batch * emb_size
                vj = tf.gather(embedding_matrix,
                               indices=j,
                               axis=1,
                               batch_dims=0,
                               name='vj')
                elementwise_list.append(tf.multiply(vi,
                                                    vj))  # batch * emb_size
        elementwise_matrix = tf.stack(
            elementwise_list)  # (N*(N-1)/2) * batch * emb_size
        elementwise_matrix = tf.transpose(
            elementwise_matrix, [1, 0, 2])  # batch * (N*(N-1)/2) * emb_size

    with tf.variable_scope('Attention_Net'):
        # 2 fully connected layer
        dense = tf.layers.dense(elementwise_matrix,
                                units=params['attention_factor'],
                                activation='relu')  # batch * (N*(N-1)/2) * t
        add_layer_summary(dense.name, dense)
        attention_weight = tf.layers.dense(
            dense, units=1, activation='softmax')  # batch *(N*(N-1)/2) * 1
        add_layer_summary(attention_weight.name, attention_weight)

    with tf.variable_scope('Attention_pooling'):
        interaction_output = tf.reduce_sum(tf.multiply(elementwise_matrix,
                                                       attention_weight),
                                           axis=1)  # batch * emb_size
        interaction_output = tf.layers.dense(interaction_output,
                                             units=1)  # batch * 1

    with tf.variable_scope('output'):
        y = interaction_output + linear_output
        add_layer_summary('output', y)

    return y
Beispiel #17
0
    def output_encode(self, features, labels, mode):
        """
        For quick thought, decode will be another encoder with different parameters and do inner-product with encoder
        Return
            [batch, batch] inner product of encoder_state * decoder_state
        """
        with tf.variable_scope('output_encoding', reuse=False):
            if mode == tf.estimator.ModeKeys.PREDICT:
                encoder_output = self.general_encoder(features)
            else:
                encoder_output = self.general_encoder(labels)

            add_layer_summary('state', encoder_output.state)
            add_layer_summary('output', encoder_output.output)
        return encoder_output
Beispiel #18
0
def model_fn_dense(features, labels, mode, params):
    dense_feature, sparse_feature = build_features()
    dense_input = tf.feature_column.input_layer(features, dense_feature)
    sparse_input = tf.feature_column.input_layer(features, sparse_feature)

    # Linear part
    with tf.variable_scope('Linear_component'):
        linear_output = tf.layers.dense(sparse_input, units=1)
        add_layer_summary('linear_output', linear_output)

    field_size = len(dense_feature)
    emb_size = dense_feature[0].variable_shape.as_list()[-1]
    embedding_matrix = tf.reshape(dense_input, [-1, field_size, emb_size])

    # SENET_layer to get new embedding matrix
    senet_embedding_matrix = SENET_layer(embedding_matrix,
                                         field_size,
                                         emb_size,
                                         pool_op=params['pool_op'],
                                         ratio=params['senet_ratio'])

    # combination layer & BI_interaction
    BI_org = Bilinear_layer(embedding_matrix,
                            field_size,
                            emb_size,
                            type=params['model_type'],
                            name='org')
    BI_senet = Bilinear_layer(senet_embedding_matrix,
                              field_size,
                              emb_size,
                              type=params['model_type'],
                              name='senet')

    combination_layer = tf.concat([BI_org, BI_senet], axis=1)

    # Deep part
    dense_output = stack_dense_layer(combination_layer,
                                     params['hidden_units'],
                                     params['dropout_rate'],
                                     params['batch_norm'],
                                     mode,
                                     add_summary=True)

    with tf.variable_scope('output'):
        y = dense_output + linear_output
        add_layer_summary('output', y)

    return y
Beispiel #19
0
def ffn(x, params, mode):
    """
    feed forward after add & norm
    """
    with tf.variable_scope('ffn', reuse=tf.AUTO_REUSE):
        d_model = x.shape.as_list()[-1]  # emb_size
        y = tf.layers.dense(x, units=params['ffn_hidden'], activation='relu')

        add_layer_summary('ffn_hidden1', y)
        y = tf.layers.dense(y, units=d_model, activation=None)
        y = tf.layers.dropout(y,
                              rate=params['dropout_rate'],
                              training=(mode == tf.estimator.ModeKeys.TRAIN))
        add_layer_summary('ffn_hidden2', y)
        y = add_and_norm_layer(x, y)
    return y
Beispiel #20
0
def model_fn_sparse(features, labels, mode, params):
    # hyper parameter
    data_params = params['data_params']
    field_size = data_params['field_size']
    feature_size = data_params['feature_size']
    embedding_size = data_params['embedding_size']

    # extract feature
    feat_ids = tf.reshape(features['feat_ids'],
                          shape=[-1, field_size])  # batch * field_size
    feat_vals = tf.reshape(features['feat_vals'],
                           shape=[-1, field_size])  # batch * field_size

    # extract embedding
    embedding_matrix = sparse_embedding(feature_size,
                                        embedding_size,
                                        field_size,
                                        feat_ids,
                                        feat_vals,
                                        add_summary=True)

    # linear output
    linear_output = sparse_linear(feature_size,
                                  feat_ids,
                                  feat_vals,
                                  add_summary=True)

    with tf.variable_scope('BI_Pooling'):
        sum_square = tf.pow(tf.reduce_sum(embedding_matrix, axis=1), 2)
        square_sum = tf.reduce_sum(tf.pow(embedding_matrix, 2), axis=1)
        dense = tf.subtract(sum_square, square_sum)
        add_layer_summary(dense.name, dense)

    # fully connected stacked dense layers
    dense = stack_dense_layer(dense,
                              params['hidden_units'],
                              dropout_rate=params['dropout_rate'],
                              batch_norm=params['batch_norm'],
                              mode=mode,
                              add_summary=True)

    with tf.variable_scope('output'):
        y = linear_output + dense
        add_layer_summary('output', y)

    return y
Beispiel #21
0
def stack_dense_layer(dense, hidden_units, dropout_rate, batch_norm, mode, add_summary):
    with tf.variable_scope('Dense'):
        for i, unit in enumerate(hidden_units):
            dense = tf.layers.dense(dense, units = unit, activation = 'relu',
                                    name = 'dense{}'.format(i))
            if batch_norm:
                dense = tf.layers.batch_normalization(dense, center = True, scale = True,
                                                      trainable = True,
                                                      training = (mode == tf.estimator.ModeKeys.TRAIN))
            if dropout_rate > 0:
                dense = tf.layers.dropout(dense, rate = dropout_rate,
                                          training = (mode == tf.estimator.ModeKeys.TRAIN))

            if add_summary:
                add_layer_summary(dense.name, dense)

    return dense
Beispiel #22
0
def sparse_linear(feature_size, feat_ids, feat_vals, add_summary):
    with tf.variable_scope('Linear_output'):
        weight = tf.get_variable( shape=[feature_size],
                             initializer=tf.truncated_normal_initializer(),
                             name='linear_weight' )
        bias = tf.get_variable( shape=[1],
                             initializer=tf.glorot_uniform_initializer(),
                             name='linear_bias' )

        linear_output = tf.nn.embedding_lookup( weight, feat_ids )
        linear_output = tf.reduce_sum( tf.multiply( linear_output, feat_vals ), axis=1, keepdims=True )
        linear_output = tf.add( linear_output, bias )

        if add_summary:
            add_layer_summary('linear_output', linear_output)

    return linear_output
Beispiel #23
0
def Bilinear_layer(embedding_matrix, field_size, emb_size, type, name):
    # Bilinear_layer: combine inner and element-wise product
    interaction_list = []
    with tf.variable_scope('BI_interaction_{}'.format(name)):
        if type == 'field_all':
            weight = tf.get_variable(
                shape=(emb_size, emb_size),
                initializer=tf.truncated_normal_initializer(),
                name='Bilinear_weight_{}'.format(name))
        for i in range(field_size):
            if type == 'field_each':
                weight = tf.get_variable(
                    shape=(emb_size, emb_size),
                    initializer=tf.truncated_normal_initializer(),
                    name='Bilinear_weight_{}_{}'.format(i, name))
            for j in range(i + 1, field_size):
                if type == 'field_interaction':
                    weight = tf.get_variable(
                        shape=(emb_size, emb_size),
                        initializer=tf.truncated_normal_initializer(),
                        name='Bilinear_weight_{}_{}_{}'.format(i, j, name))
                vi = tf.gather(embedding_matrix,
                               indices=i,
                               axis=1,
                               batch_dims=0,
                               name='v{}'.format(i))  # batch * emb_size
                vj = tf.gather(embedding_matrix,
                               indices=j,
                               axis=1,
                               batch_dims=0,
                               name='v{}'.format(j))  # batch * emb_size
                pij = tf.matmul(tf.multiply(vi, vj),
                                weight)  # bilinear : vi * wij \odot vj
                interaction_list.append(pij)

        combination = tf.stack(
            interaction_list,
            axis=1)  # batch * emb_size * (Field_size * (Field_size-1)/2)
        combination = tf.reshape(
            combination,
            shape=[-1, int(emb_size * (field_size * (field_size - 1) / 2))
                   ])  # batch * ~
        add_layer_summary('bilinear_output', combination)

    return combination
Beispiel #24
0
    def build_model(self, features, labels, mode):
        """
        Build model_fn for Quick Thought
        Input
            features: {tokens:, seq_len:}
            labels: {tokens:, seq_len:}
        Return
            tf.estimator.EstimatorSpec
        """

        input_encode = self.input_encode(features)

        output_encode = self.output_encode(features, labels, mode)

        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = self.vectorize(
                [input_encode.state[0], output_encode.state[0]], features)
            return tf.estimator.EstimatorSpec(
                mode=tf.estimator.ModeKeys.PREDICT, predictions=predictions)

        sim_score = tf.matmul(input_encode.state[0],
                              output_encode.state[0],
                              transpose_b=True)  # [batch, batch] sim score
        add_layer_summary('sim_score', sim_score)

        loss = self.compute_loss(sim_score)

        if mode == tf.estimator.ModeKeys.TRAIN:
            optimizer = tf.train.AdamOptimizer(
                learning_rate=get_learning_rate(self.params))

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

            with tf.control_dependencies(update_ops):
                if self.params['clip_gradient']:
                    train_op = gradient_clipping(optimizer, loss,
                                                 self.params['lower_gradient'],
                                                 self.params['upper_gradient'])
                else:
                    train_op = optimizer.minimize(
                        loss, global_step=tf.train.get_global_step())

            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
Beispiel #25
0
def cross_op(xk, x0, layer_size_prev, layer_size_curr, layer, emb_size,
             field_size):
    # Hamard product: ( batch * D * HK-1 * 1) * (batch * D * 1* H0) -> batch * D * HK-1 * H0
    zk = tf.matmul(tf.expand_dims(tf.transpose(xk, perm=(0, 2, 1)), 3),
                   tf.expand_dims(tf.transpose(x0, perm=(0, 2, 1)), 2))

    zk = tf.reshape(zk, [-1, emb_size, field_size * layer_size_prev
                         ])  # batch * D * HK-1 * H0 -> batch * D * (HK-1 * H0)
    add_layer_summary('zk_{}'.format(layer), zk)

    # Convolution with channel = HK: (batch * D * (HK-1*H0)) * ((HK-1*H0) * HK)-> batch * D * HK
    kernel = tf.get_variable(name='kernel{}'.format(layer),
                             shape=(field_size * layer_size_prev,
                                    layer_size_curr))
    xkk = tf.matmul(zk, kernel)
    xkk = tf.transpose(xkk, perm=[0, 2, 1])  # batch * HK * D
    add_layer_summary('Xk_{}'.format(layer), xkk)
    return xkk
Beispiel #26
0
def model_fn_sparse(features, labels, mode, params):
    # hyper parameter
    data_params = params['data_params']
    field_size = data_params['field_size']
    feature_size = data_params['feature_size']
    embedding_size = data_params['embedding_size']

    # extract feature
    feat_ids = tf.reshape(features['feat_ids'],
                          shape=[-1, field_size])  # (batch, field_size)
    feat_vals = tf.reshape(features['feat_vals'],
                           shape=[-1, field_size])  # (batch, field_size)

    # extract embedding
    with tf.variable_scope('extract_embedding'):
        embedding_matrix = sparse_embedding(
            feature_size,
            embedding_size,
            field_size,
            feat_ids,
            feat_vals,
            add_summary=True)  # (batch, field_size, embedding_size)
        dense_input = tf.reshape(embedding_matrix,
                                 [-1, field_size * embedding_size
                                  ])  # (batch, field_size * embedding_size)
    # deep part
    dense = stack_dense_layer(dense_input,
                              params['hidden_units'],
                              params['dropout_rate'],
                              params['batch_norm'],
                              mode,
                              add_summary=True)

    # cross part
    xl = cross_layer(dense_input, params['cross_layers'])

    with tf.variable_scope('stack'):
        x_stack = tf.concat([dense, xl], axis=1)

    with tf.variable_scope('output'):
        y = tf.layers.dense(x_stack, units=1)
        add_layer_summary('output', y)

    return y
Beispiel #27
0
def model_fn(features, labels, mode, params):
    dense_feature = build_features()
    dense = tf.feature_column.input_layer(features, dense_feature)

    # stacked residual layer
    with tf.variable_scope('Residual_layers'):
        for i, unit in enumerate(params['hidden_units']):
            dense = residual_layer(dense,
                                   unit,
                                   dropout_rate=params['dropout_rate'],
                                   batch_norm=params['batch_norm'],
                                   mode=mode)
            add_layer_summary('residual_layer{}'.format(i), dense)

    with tf.variable_scope('output'):
        y = tf.layers.dense(dense, units=1)
        add_layer_summary('output', y)

    return y
Beispiel #28
0
def layer_norm(x):
    """
    layer normalization from Jimmy, apply normalization along feature and apply transformation
    """
    with tf.variable_scope('layer_normalization', reuse=tf.AUTO_REUSE):
        d_model = x.shape.as_list()[-1]
        epsilon = tf.constant(np.finfo(np.float32).eps)
        mean, variance = tf.nn.moments(x, axes=-1, keep_dims=True)
        x = (x - mean) / ((variance + epsilon)**0.5)  # do layer norm
        add_layer_summary('norm', x)

        kernel = tf.get_variable('norm_kernel',
                                 shape=(d_model, ),
                                 initializer=tf.ones_initializer())
        bias = tf.get_variable('norm_bias',
                               shape=(d_model, ),
                               initializer=tf.zeros_initializer())
        x = tf.multiply(kernel, x) + bias
        add_layer_summary('norm_transform', x)
    return x
Beispiel #29
0
def model_fn_dense(features, labels, mode, params):
    dense_feature, sparse_feature = build_features()
    dense = tf.feature_column.input_layer(features, dense_feature)
    sparse = tf.feature_column.input_layer(features, sparse_feature)

    field_size = len(dense_feature)
    embedding_size = dense_feature[0].variable_shape.as_list()[-1]
    embedding_matrix = tf.reshape(
        dense,
        [-1, field_size, embedding_size])  # batch * field_size *emb_size

    with tf.variable_scope('Linear_output'):
        linear_output = tf.layers.dense(sparse, units=1)
        add_layer_summary('linear_output', linear_output)

    with tf.variable_scope('BI_Pooling'):
        sum_square = tf.pow(tf.reduce_sum(embedding_matrix, axis=1), 2)
        square_sum = tf.reduce_sum(tf.pow(embedding_matrix, 2), axis=1)
        dense = tf.subtract(sum_square, square_sum)
        add_layer_summary(dense.name, dense)

    dense = stack_dense_layer(dense,
                              params['hidden_units'],
                              dropout_rate=params['dropout_rate'],
                              batch_norm=params['batch_norm'],
                              mode=mode,
                              add_summary=True)

    with tf.variable_scope('output'):
        y = linear_output + dense
        add_layer_summary('output', y)

    return y
Beispiel #30
0
def model_fn_dense(features, labels, mode, params):
    dense_feature, sparse_feature = build_features()
    dense_input = tf.feature_column.input_layer(features, dense_feature)
    sparse_input = tf.feature_column.input_layer(features, sparse_feature)

    # Linear part
    with tf.variable_scope('Linear_component'):
        linear_output = tf.layers.dense(sparse_input, units=1)
        add_layer_summary('linear_output', linear_output)

    # Deep part
    dense_output = stack_dense_layer(dense_input,
                                     params['hidden_units'],
                                     params['dropout_rate'],
                                     params['batch_norm'],
                                     mode,
                                     add_summary=True)
    # CIN part
    emb_size = dense_feature[0].variable_shape.as_list()[-1]
    field_size = len(dense_feature)
    embedding_matrix = tf.reshape(
        dense_input,
        [-1, field_size, emb_size])  # batch * field_size * emb_size
    add_layer_summary('embedding_matrix', embedding_matrix)

    cin_output = cin_layer(embedding_matrix, params['cin_layer_size'],
                           emb_size, field_size)

    with tf.variable_scope('output'):
        y = tf.concat([dense_output, cin_output, linear_output], axis=1)
        y = tf.layers.dense(y, units=1)
        add_layer_summary('output', y)

    return y