예제 #1
0
def data_pipeline_calls(
    features,
    params,
    mode,
    GPU
    ):

    init = tf.initializers.truncated_normal(0.0, 0.01)

    # if you want to train the embeds from scratch
    # embedding_vectors = tf.get_variable(name='embedding_vectors', shape=(params['vocab_size'], 512), initializer=init)
    # embedded_input = tf.nn.embedding_lookup(embedding_vectors, features['encoder_inputs'])

    # If you don't want to train the embeddings:
    lookup_table, emb_vectors = load_embeddings(params['embedding_vectors'], params['vocab'])
    embedded_input = tf.nn.embedding_lookup(emb_vectors, features['encoder_inputs'])

    forget_bias = get_forget_bias(params, mode)
    with tf.device(GPU['sidekick']):
        # high_dim_embedding_vecs = tf.layers.dense(embedded_input, units=512, activation=tf.nn.relu)

        positional_embeddings = tf.get_variable('position_embedding', shape=(params['input_max_length'], 50))

        positions = tf.range(params['input_max_length'])
        positions = tf.reshape(tf.tile(positions, [params['batch_size']]), (-1, params['input_max_length']))
        position_embeddings = tf.cast(tf.nn.embedding_lookup(positional_embeddings, positions), tf.float32)


    transformer_input = tf.add(embedded_input, position_embeddings)
    transformer_input = tf.nn.dropout(transformer_input, keep_prob=0.5)

    return transformer_input
예제 #2
0
def model_fn(features, labels, mode, params):
    # particular to this project
    word2index = params['word2index']
    # index2word = params['index2word']

    GPUs = get_available_gpus()
    GPU = {
        'titan': GPUs[1],
        'sidekick': GPUs[0]}

    lookup_table, emb_vectors = load_embeddings(params['embedding_vectors'], params['vocab'])
    embedded_enc_input = tf.nn.embedding_lookup(emb_vectors, features['encoder_inputs'])
    forget_bias = get_forget_bias(params, mode)

    num_units = [2048, 2048]
    init = tf.initializers.truncated_normal(0.0, 0.01)

    with tf.device(GPU['titan']):
        encoder_cells = [tf.nn.rnn_cell.LSTMCell(num_units=num, forget_bias=forget_bias, initializer=init) for num in num_units]
        encoder_stacked_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(encoder_cells)

        enc_outputs, enc_final_state = tf.nn.dynamic_rnn(encoder_stacked_rnn_cell,
                                                         embedded_enc_input,
                                                         sequence_length=features['encoder_input_lengths'],
                                                         dtype=tf.float32)
    # Decoder model
    with tf.device(GPU['sidekick']):
        partial_embedding_helper = partial(embedding_helper, emb_vectors=emb_vectors)
        if mode == tf.estimator.ModeKeys.TRAIN:
            embed_dec_inputs = tf.nn.embedding_lookup(emb_vectors, features['decoder_inputs'])
            helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=embed_dec_inputs,
                sequence_length=features['decoder_input_lengths'],
            )
        else:
            helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding=partial_embedding_helper,
                start_tokens=tf.tile([word2index['<GO>']],
                                     [tf.shape(features['encoder_inputs'])[0]]),
                end_token=word2index['<EOS>'])

        dec_cell = tf.nn.rnn_cell.LSTMCell(num_units=num_units[-1],  # needs to match size of last layer of encoder
                                           forget_bias=forget_bias,
                                           initializer=init)

        decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=dec_cell,
            helper=helper,
            initial_state=enc_final_state[-1],
            output_layer=Dense(params['vocab_size'], use_bias=False))
        dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder=decoder,
            output_time_major=False,
            impute_finished=True,
            maximum_iterations=params['output_max_length'])
    logits = tf.identity(dec_outputs.rnn_output, 'logits')

    if mode == tf.estimator.ModeKeys.PREDICT:
        indices = predict_words(logits)
        predictions = {'sentence_tokens': indices}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    training_labels = labels['target_sequences']
    weights = tf.cast(tf.cast(tf.not_equal(training_labels, tf.constant(word2index['<PAD>'])), tf.bool), tf.float32)
    sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=training_labels, weights=weights)

    tf.summary.scalar('sequence_loss', sequence_loss)

    if mode == tf.estimator.ModeKeys.EVAL:
        metrics = {'loss': sequence_loss}
        return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, eval_metric_ops=metrics)

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(sequence_loss, global_step=tf.train.get_global_step())

    return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, train_op=train_op)
예제 #3
0
def model_fn(features, labels, mode, params):
    # particular to this project
    word2index = params['word2index']


    GPUs = get_available_gpus()
    CPUs = get_available_cpus()

    GPU = {
        'titan': GPUs[0],
        'sidekick': GPUs[1]}
    CPU = {
        'main_cpu': CPUs[0]
    }

    lookup_table, emb_vectors = load_embeddings(params['embedding_vectors'], params['vocab'])
    embedded_input = tf.nn.embedding_lookup(emb_vectors, features['encoder_inputs'])
    forget_bias = get_forget_bias(params, mode)

    positional_embeddings = tf.get_variable('position_embedding', shape=(params['input_max_length'], 50))
    positions = tf.range(features['encoder_input_lengths'])
    position_embeddings = tf.cast(tf.nn.embedding_lookup(positional_embeddings, positions), tf.float32)

    num_units = [256]
    init = tf.initializers.truncated_normal(0.0, 0.01)

    embedded_enc_input = tf.add(embedded_input, position_embeddings)



    with tf.device(GPU['titan']):
        init = tf.initializers.truncated_normal(0.0, 0.01)

        three_channel = tf.expand_dims(embedded_enc_input, axis=3)
        conv = tf.layers.conv2d(tf.cast(three_channel, tf.float32), 126, (5, 5), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv1')
        conv = tf.layers.conv2d(conv, 32,  (3, 3), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv2')
        conv = tf.layers.conv2d(conv, 16,  (3, 3), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv3')
        conv = tf.layers.conv2d(conv, 8,  (3, 3), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv4')
        conv = tf.layers.conv2d(conv, 16,  (3, 3), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv5')
        conv = tf.layers.conv2d(conv, 32,  (3, 3), activation=tf.nn.relu, use_bias=True, kernel_initializer=init, name='conv6')


        # start_flat = tf.reduce_mean(start_transformed, axis=1, name='encoded_text')
        start_flat = tf.layers.flatten(conv6, name='start_flatten')
        # end_flat = tf.layers.flatten(end_transformed, name='end_flatten')

        # start pred
        start_hidden = tf.layers.dense(start_flat, units=params['input_max_length'])
        start_predictions = tf.layers.dense(start_hidden, units=1, activation=tf.nn.sigmoid, kernel_initializer=init())
        start_predictions_transformed = transform_to_range(start_predictions, min_value=0, max_value=params['input_max_length'])

        # end pred
        # end_input = tf.concat((start_predictions, end_flat), 1, name='end_input')
        # end_hidden = tf.layers.dense(end_input, units=params['input_max_length'])
        # end_predictions = tf.layers.dense(end_hidden, activation=tf.nn.sigmoid, use_bias=True, units=1)
            # end_predictions_transformed = transform_to_range(end_predictions, min_value=0, max_value=params['input_max_length'])


    if mode == tf.estimator.ModeKeys.PREDICT:

        starts = tf.to_int32(start_predictions_transformed)
        ends = tf.to_int32(end_predictions_transformed)

        predictions = {'question_starts': starts}#, 'question_ends': ends}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    starts = labels['starts']

    # compute losses
    print(labels['starts'])
    question_start_labels = tf.reshape(tf.to_float(labels['starts']), ( -1, 1))
    # question_end_labels = tf.reshape(tf.to_float(labels['stops']), (-1, 1))

    start_loss = tf.losses.mean_squared_error(labels=question_start_labels, predictions=start_predictions_transformed)
    # end_loss = tf.losses.mean_squared_error(labels=question_end_labels, predictions=end_predictions_transformed)

    # order_penalty = tf.cast(
    #     tf.divide(
    #         tf.cast(
    #             tf.nn.relu(start_predictions_transformed - end_predictions_transformed),
    #             tf.float32),
    #         tf.constant(10.0, tf.float32)
    #     ), tf.float32
    # )
    # zero_spread_penalty = tf.cast(tf.reduce_sum(tf.abs(start_predictions_transformed - end_predictions_transformed)), tf.float32)

    combined_loss = start_loss# + end_loss + tf.reduce_mean(order_penalty)# + zero_spread_penalty

    tf.summary.scalar('start_loss', start_loss)
    # tf.summary.scalar('end_loss', end_loss)
    # tf.summary.scalar('penalty_loss', tf.reduce_mean(order_penalty))
    # tf.summary.scalar('zero_spread_penalty', zero_spread_penalty)

    if mode == tf.estimator.ModeKeys.EVAL:
        metrics = {
            'start_loss': combined_loss
            }
        return tf.estimator.EstimatorSpec(mode, loss=combined_loss, eval_metric_ops=metrics)


    global_step = tf.train.get_global_step()

    # starter_learning_rate = 0.1
    # learning_rate = tf.train.exponential_decay(
    #     learning_rate=starter_learning_rate,
    #     global_step=global_step,
    #     decay_steps=100000,
    #     decay_rate=0.96,
    #     staircase=False,
    #     name='lr_decay_rate')
    learning_rate = 0.0001
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsiolon=1e-09)
    gvs = optimizer.compute_gradients(combined_loss)
    capped_gvs = [(tf.clip_by_value(grad, -.5, .5), var) for grad, var in gvs]
    train_op = optimizer.apply_gradients(capped_gvs, global_step=global_step)

    return tf.estimator.EstimatorSpec(mode, loss=combined_loss, train_op=train_op)
예제 #4
0
def model_fn(features, labels, mode, params):
    word2index = params['word2index']
    # index2word = params['index2word']
    lookup_table, emb_vectors = load_embeddings(params['embedding_vectors'], params['vocab'])
    embedded_enc_input = tf.nn.embedding_lookup(emb_vectors, features['encoder_inputs'])
    forget_bias = get_forget_bias(params, mode)

    init = tf.initializers.truncated_normal(0.0, 0.01)
    num_units = [params['num_rnn_units_1'], params['num_rnn_units_2']]
    cells = [tf.nn.rnn_cell.LSTMCell(num_units=n, forget_bias=forget_bias, initializer=init) for n in num_units]
    stacked_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(cells)

    enc_outputs, enc_final_state = tf.nn.dynamic_rnn(stacked_rnn_cell,
                                                     embedded_enc_input,
                                                     sequence_length=features['encoder_input_lengths'],
                                                     dtype=tf.float32)

    # Classifier
    init = tf.initializers.truncated_normal(0.0, .001)
    average_outputs = tf.reduce_mean(enc_outputs, axis=1)
    fcl = tf.layers.dense(average_outputs,
                          params['dense_1'],
                          activation=tf.nn.relu,
                          kernel_initializer=init)
    fc2 = tf.layers.dense(fcl,
                          params['dense_2'],
                          activation=tf.nn.relu,
                          kernel_initializer=init)
    class_logits = tf.layers.dense(fc2,
                                   params['num_classes'],
                                   kernel_initializer=init)
    probabilities = tf.nn.softmax(class_logits)
    pred_num_q = tf.argmax(probabilities, axis=1)

    one_hot_labels = tf.one_hot(labels['num_questions_labels'], params['num_classes'])
    crossentropy_loss = tf.reduce_sum(
        tf.nn.softmax_cross_entropy_with_logits_v2(labels=one_hot_labels,
                                                   logits=class_logits))
    accuracy = tf.metrics.accuracy(labels=labels['num_questions_labels'], predictions=pred_num_q, name='accuracy_op')

    # Decoder model
    partial_embedding_helper = partial(embedding_helper, emb_vectors=emb_vectors)
    if mode == tf.estimator.ModeKeys.TRAIN:
        embed_dec_inputs = tf.nn.embedding_lookup(emb_vectors, features['decoder_inputs'])
        helper = tf.contrib.seq2seq.TrainingHelper(
            inputs=embed_dec_inputs,
            sequence_length=features['decoder_input_lengths'],
        )
    else:
        helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
            embedding=partial_embedding_helper,
            start_tokens=tf.tile([word2index['<GO>']],
                                 [tf.shape(features['encoder_inputs'])[0]]),
            end_token=word2index['<EOS>'])

    dec_cell = tf.nn.rnn_cell.LSTMCell(num_units=params['num_units'],
                                       forget_bias=forget_bias,
                                       initializer=init
                                       )
    decoder = tf.contrib.seq2seq.BasicDecoder(
        cell=dec_cell,
        helper=helper,
        initial_state=enc_final_state[1],
        output_layer=Dense(params['vocab_size'], use_bias=False))
    dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
        decoder=decoder,
        output_time_major=False,
        impute_finished=True,
        maximum_iterations=params['output_max_length'])
    logits = tf.identity(dec_outputs.rnn_output, 'logits')

    if mode == tf.estimator.ModeKeys.PREDICT:
        indices = predict_words(logits)
        predictions = {'sentence_tokens': indices}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    training_labels = labels['target_sequences']
    weights = tf.cast(tf.cast(tf.not_equal(training_labels, tf.constant(word2index['<PAD>'])), tf.bool), tf.float32)
    sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=training_labels, weights=weights)

    tf.summary.scalar('sequence_loss', sequence_loss)
    tf.summary.scalar('crossentropy_loss', crossentropy_loss)
    tf.summary.scalar('accuracy', accuracy)
    if mode == tf.estimator.ModeKeys.EVAL:
        metrics = {
            'accuracy': accuracy,
            'sequence_loss': sequence_loss,
            'cross_entropy_loss': crossentropy_loss
        }
        return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, eval_metric_ops=metrics)

    total_loss = sequence_loss + crossentropy_loss
    optimizer = tf.train.AdamOptimizer(learning_rate=params['learning_rate'])
    train_op = optimizer.minimize(total_loss, global_step=tf.train.get_global_step())

    return tf.estimator.EstimatorSpec(mode, loss=sequence_loss, train_op=train_op)
예제 #5
0
def model_fn(features, labels, mode, params):
    # particular to this project
    word2index = params['word2index']
    # index2word = params['index2word']

    GPUs = get_available_gpus()
    GPU = {'titan': GPUs[1], 'sidekick': GPUs[0]}

    lookup_table, emb_vectors = load_embeddings(params['embedding_vectors'],
                                                params['vocab'])
    embedded_enc_input = tf.nn.embedding_lookup(emb_vectors,
                                                features['encoder_inputs'])
    forget_bias = get_forget_bias(params, mode)

    num_units = [1024, 512]
    init = tf.initializers.truncated_normal(0.0, 0.01, seed=42)

    # forward_cells = [ for num in num_units]
    # reverse_cells = [tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(num, initializer=init), input_keep_prob=forget_bias) for num in num_units]
    def make_cell(size, scope):
        with tf.variable_scope(scope):
            cell = tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(
                size, initializer=init),
                                                 input_keep_prob=forget_bias)
        return cell

    with tf.device(GPU['titan']):
        output = embedded_enc_input
        for count, units in enumerate(num_units, 1):
            cell = make_cell(units, scope='BLSTM_' + str(count))
            # cell_bw = reverse_cells[count]
            state_fw = cell.zero_state(params['batch_size'], tf.float32)
            # state_bw = cell_bw.zero_state(params['batch_size'], tf.float32)

            ((encoder_fw_outputs, encoder_bw_outputs),
             (encoder_fw_state,
              encoder_bw_state)) = tf.nn.bidirectional_dynamic_rnn(
                  cell,
                  cell,
                  output,
                  initial_state_fw=state_fw,
                  initial_state_bw=state_fw,
                  scope='bRNN_' + str(count),
                  dtype=tf.float32)
            output = tf.concat([encoder_fw_outputs, encoder_bw_outputs],
                               axis=2)

    encoder_state_c = tf.concat((encoder_fw_state.c, encoder_bw_state.c),
                                1,
                                name='bidirectional_concat_c')
    encoder_state_h = tf.concat((encoder_fw_state.h, encoder_bw_state.h),
                                1,
                                name='bidirectional_concat_h')

    encoder_state = tf.nn.rnn_cell.LSTMStateTuple(c=encoder_state_c,
                                                  h=encoder_state_h)

    # Decoder model
    with tf.device(GPU['sidekick']):
        partial_embedding_helper = partial(embedding_helper,
                                           emb_vectors=emb_vectors)
        if mode == tf.estimator.ModeKeys.TRAIN:
            embed_dec_inputs = tf.nn.embedding_lookup(
                emb_vectors, features['decoder_inputs'])
            helper = tf.contrib.seq2seq.TrainingHelper(
                inputs=embed_dec_inputs,
                sequence_length=features['decoder_input_lengths'],
            )
        else:
            helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(
                embedding=partial_embedding_helper,
                start_tokens=tf.tile(
                    [word2index['<GO>']],
                    [tf.shape(features['encoder_inputs'])[0]]),
                end_token=word2index['<EOS>'])

        dec_cell = tf.nn.rnn_cell.LSTMCell(
            num_units=num_units[-1] *
            2,  # needs to match size of last layer of encoder
            forget_bias=forget_bias,
            initializer=init)

        decoder = tf.contrib.seq2seq.BasicDecoder(cell=dec_cell,
                                                  helper=helper,
                                                  initial_state=encoder_state,
                                                  output_layer=Dense(
                                                      params['vocab_size'],
                                                      use_bias=False))

        dec_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
            decoder=decoder,
            output_time_major=False,
            impute_finished=True,
            maximum_iterations=params['output_max_length'])
    logits = tf.identity(dec_outputs.rnn_output, 'logits')

    if mode == tf.estimator.ModeKeys.PREDICT:
        indices = predict_words(logits)
        predictions = {'sentence_tokens': indices}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    training_labels = labels['target_sequences']
    weights = tf.cast(
        tf.cast(
            tf.not_equal(training_labels, tf.constant(word2index['<PAD>'])),
            tf.bool), tf.float32)
    sequence_loss = tf.contrib.seq2seq.sequence_loss(logits=logits,
                                                     targets=training_labels,
                                                     weights=weights)

    tf.summary.scalar('sequence_loss', sequence_loss)

    if mode == tf.estimator.ModeKeys.EVAL:
        metrics = {'loss': sequence_loss}
        return tf.estimator.EstimatorSpec(mode,
                                          loss=sequence_loss,
                                          eval_metric_ops=metrics)

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(sequence_loss,
                                  global_step=tf.train.get_global_step())

    return tf.estimator.EstimatorSpec(mode,
                                      loss=sequence_loss,
                                      train_op=train_op)