Ejemplo n.º 1
0
def trim_model_checkpoint(parameters_filepath, dataset_filepath, input_checkpoint_filepath, output_checkpoint_filepath):
    '''
    Remove all token embeddings except UNK.
    '''
    parameters, _ = main.load_parameters(parameters_filepath=parameters_filepath)
    dataset = pickle.load(open(dataset_filepath, 'rb'))
    model = EntityLSTM(dataset, parameters) 
    with tf.Session() as sess:
        model_saver = tf.train.Saver()  # defaults to saving all variables
        
        # Restore the pretrained model
        model_saver.restore(sess, input_checkpoint_filepath) # Works only when the dimensions of tensor variables are matched.
        
        # Get pretrained embeddings
        token_embedding_weights = sess.run(model.token_embedding_weights) 
    
        # Restore the sizes of token embedding weights
        utils_tf.resize_tensor_variable(sess, model.token_embedding_weights, [1, parameters['token_embedding_dimension']]) 
            
        initial_weights = sess.run(model.token_embedding_weights)
        initial_weights[dataset.UNK_TOKEN_INDEX] = token_embedding_weights[dataset.UNK_TOKEN_INDEX]
        sess.run(tf.assign(model.token_embedding_weights, initial_weights, validate_shape=False))
    
        token_embedding_weights = sess.run(model.token_embedding_weights) 
        print("token_embedding_weights: {0}".format(token_embedding_weights))
        
        model_saver.save(sess, output_checkpoint_filepath)
            
    dataset.__dict__['vocabulary_size'] = 1
    pickle.dump(dataset, open(dataset_filepath, 'wb'))
    pprint(dataset.__dict__)
Ejemplo n.º 2
0
 def update_model(self, sess, pathfile, vocab):
     utils_tf.resize_tensor_variable(sess, self.token_embedding_weights,
                                     [None, None])
     utils_tf.resize_tensor_variable(sess, self.character_embedding_weights,
                                     [None, None])
     self.saver.restore(sess, pathfile)
     token_embedding_weights = sess.run(
         self.token_embedding_weights.read_value())
Ejemplo n.º 3
0
    def restore_from_pretrained_model(self,
                                      parameters,
                                      dataset,
                                      sess,
                                      token_to_vector=None):
        pretraining_dataset = pickle.load(
            open(
                os.path.join(parameters['pretrained_model_folder'],
                             'dataset.pickle'), 'rb'))
        pretrained_model_checkpoint_filepath = os.path.join(
            parameters['pretrained_model_folder'], 'model.ckpt')

        # Assert that the label sets are the same
        # Test set should have the same label set as the pretrained dataset
        assert pretraining_dataset.index_to_label == dataset.index_to_label

        # If the token and character mappings are exactly the same
        if pretraining_dataset.index_to_token == dataset.index_to_token and pretraining_dataset.index_to_character == dataset.index_to_character:

            # Restore the pretrained model
            self.saver.restore(
                sess, pretrained_model_checkpoint_filepath
            )  # Works only when the dimensions of tensor variables are matched.

        # If the token and character mappings are different between the pretrained model and the current model
        else:

            # Resize the token and character embedding weights to match them with the pretrained model (required in order to restore the pretrained model)
            utils_tf.resize_tensor_variable(
                sess, self.character_embedding_weights, [
                    pretraining_dataset.alphabet_size,
                    parameters['character_embedding_dimension']
                ])
            utils_tf.resize_tensor_variable(
                sess, self.token_embedding_weights, [
                    pretraining_dataset.vocabulary_size,
                    parameters['token_embedding_dimension']
                ])

            # Restore the pretrained model
            self.saver.restore(
                sess, pretrained_model_checkpoint_filepath
            )  # Works only when the dimensions of tensor variables are matched.

            # Get pretrained embeddings
            character_embedding_weights, token_embedding_weights = sess.run([
                self.character_embedding_weights, self.token_embedding_weights
            ])

            # Restore the sizes of token and character embedding weights
            utils_tf.resize_tensor_variable(
                sess, self.character_embedding_weights, [
                    dataset.alphabet_size,
                    parameters['character_embedding_dimension']
                ])
            utils_tf.resize_tensor_variable(
                sess, self.token_embedding_weights, [
                    dataset.vocabulary_size,
                    parameters['token_embedding_dimension']
                ])

            # Re-initialize the token and character embedding weights
            sess.run(
                tf.variables_initializer([
                    self.character_embedding_weights,
                    self.token_embedding_weights
                ]))

            # Load embedding weights from pretrained token embeddings first
            self.load_pretrained_token_embeddings(
                sess, dataset, parameters, token_to_vector=token_to_vector)

            # Load embedding weights from pretrained model
            self.load_embeddings_from_pretrained_model(sess,
                                                       dataset,
                                                       pretraining_dataset,
                                                       token_embedding_weights,
                                                       embedding_type='token')
            self.load_embeddings_from_pretrained_model(
                sess,
                dataset,
                pretraining_dataset,
                character_embedding_weights,
                embedding_type='character')

            del pretraining_dataset
            del character_embedding_weights
            del token_embedding_weights

        # Get transition parameters
        transition_params_trained = sess.run(self.transition_parameters)

        if not parameters['reload_character_embeddings']:
            sess.run(
                tf.variables_initializer([self.character_embedding_weights]))
        if not parameters['reload_character_lstm']:
            sess.run(tf.variables_initializer(self.character_lstm_variables))
        if not parameters['reload_token_embeddings']:
            sess.run(tf.variables_initializer([self.token_embedding_weights]))
        if not parameters['reload_token_lstm']:
            sess.run(tf.variables_initializer(self.token_lstm_variables))
        if not parameters['reload_feedforward']:
            sess.run(tf.variables_initializer(self.feedforward_variables))
        if not parameters['reload_crf']:
            sess.run(tf.variables_initializer(self.crf_variables))

        return transition_params_trained
Ejemplo n.º 4
0
def restore_pretrained_model(parameters, dataset, sess, model, model_saver):
    pretraining_dataset = pickle.load(
        open(
            os.path.join(parameters['pretrained_model_folder'],
                         'dataset.pickle'), 'rb'))
    pretrained_model_checkpoint_filepath = os.path.join(
        parameters['pretrained_model_folder'], 'best_model.ckpt')

    pretraining_parameters = \
        main.load_parameters(parameters_filepath=os.path.join(parameters['pretrained_model_folder'], 'parameters.ini'),
                             verbose=False)[0]
    for name in [
            'use_character_lstm', 'character_embedding_dimension',
            'character_lstm_hidden_state_dimension',
            'token_embedding_dimension', 'token_lstm_hidden_state_dimension',
            'use_crf'
    ]:
        if parameters[name] != pretraining_parameters[name]:
            print("Parameters of the pretrained model:")
            pprint(pretraining_parameters)
            raise AssertionError(
                "The parameter {0} ({1}) is different from the pretrained model ({2})."
                .format(name, parameters[name], pretraining_parameters[name]))

    if pretraining_dataset.index_to_token == dataset.index_to_token and pretraining_dataset.index_to_character == dataset.index_to_character:

        # Restore the pretrained model
        model_saver.restore(sess, pretrained_model_checkpoint_filepath)
    else:

        utils_tf.resize_tensor_variable(
            sess, model.character_embedding_weights, [
                pretraining_dataset.alphabet_size,
                parameters['character_embedding_dimension']
            ])
        utils_tf.resize_tensor_variable(sess, model.token_embedding_weights, [
            pretraining_dataset.vocabulary_size,
            parameters['token_embedding_dimension']
        ])

        # Restore the pretrained model
        model_saver.restore(sess, pretrained_model_checkpoint_filepath)
        # Get pretrained embeddings
        character_embedding_weights, token_embedding_weights = sess.run(
            [model.character_embedding_weights, model.token_embedding_weights])

        # Restore the sizes of token and character embedding weights
        utils_tf.resize_tensor_variable(
            sess, model.character_embedding_weights, [
                dataset.alphabet_size,
                parameters['character_embedding_dimension']
            ])
        utils_tf.resize_tensor_variable(
            sess, model.token_embedding_weights,
            [dataset.vocabulary_size, parameters['token_embedding_dimension']])

        # Re-initialize the token and character embedding weights
        sess.run(
            tf.variables_initializer([
                model.character_embedding_weights,
                model.token_embedding_weights
            ]))

        # Load embedding weights from pretrained token embeddings first
        model.load_pretrained_token_embeddings(sess, dataset, parameters)

        # Load embedding weights from pretrained model
        model.load_embeddings_from_pretrained_model(sess,
                                                    dataset,
                                                    pretraining_dataset,
                                                    token_embedding_weights,
                                                    embedding_type='token')
        model.load_embeddings_from_pretrained_model(
            sess,
            dataset,
            pretraining_dataset,
            character_embedding_weights,
            embedding_type='character')

        del pretraining_dataset
        del character_embedding_weights
        del token_embedding_weights

    # Get transition parameters
    transition_params_trained = sess.run(model.transition_parameters)

    if not parameters['reload_character_embeddings']:
        sess.run(tf.variables_initializer([model.character_embedding_weights]))
    if not parameters['reload_character_lstm']:
        sess.run(tf.variables_initializer(model.character_lstm_variables))
    if not parameters['reload_token_embeddings']:
        sess.run(tf.variables_initializer([model.token_embedding_weights]))
    if not parameters['reload_token_lstm']:
        sess.run(tf.variables_initializer(model.token_lstm_variables))
    if not parameters['reload_feedforward']:
        sess.run(tf.variables_initializer(model.feedforward_variables))
    if not parameters['reload_crf']:
        sess.run(tf.variables_initializer(model.crf_variables))

    return transition_params_trained
Ejemplo n.º 5
0
def restore_model_parameters_from_pretrained_model(parameters, dataset, sess,
                                                   model, model_saver):
    pretraining_dataset = pickle.load(
        open(
            os.path.join(parameters['pretrained_model_folder'],
                         'dataset.pickle'), 'rb'))
    pretrained_model_checkpoint_filepath = os.path.join(
        parameters['pretrained_model_folder'], 'best_model.ckpt')
    print(len(pretraining_dataset.index_to_label))
    print(len(dataset.index_to_label))
    if not parameters['add_class']:
        assert len(pretraining_dataset.index_to_label) == len(
            dataset.index_to_label)
    elif parameters['tagging_format'] == 'bioes' and parameters['add_class']:
        assert len(pretraining_dataset.index_to_label) + 4 == len(
            dataset.index_to_label)
    else:
        assert len(pretraining_dataset.index_to_label) + 2 == len(
            dataset.index_to_label)
    pretraining_parameters = \
        main.load_parameters(parameters_filepath=os.path.join(parameters['pretrained_model_folder'],
                                                              'parameters.ini'), verbose=False)[0]
    for name in [
            'use_character_lstm', 'character_embedding_dimension',
            'character_lstm_hidden_state_dimension',
            'token_embedding_dimension', 'token_lstm_hidden_state_dimension',
            'use_crf'
    ]:
        if parameters[name] != pretraining_parameters[name]:
            print("Parameters of the pretrained model:")
            pprint(pretraining_parameters)
            raise AssertionError(
                "The parameter {0} ({1}) is different from the pretrained model ({2})."
                .format(name, parameters[name], pretraining_parameters[name]))

    # If the token and character mappings are exactly the same
    if pretraining_dataset.index_to_token == dataset.index_to_token and \
            pretraining_dataset.index_to_character == dataset.index_to_character:

        model_saver = tf.train.import_meta_graph(
            pretrained_model_checkpoint_filepath)
        model_saver.restore(sess, pretrained_model_checkpoint_filepath_weights)
        last_layer = tf.get_collection('crf/transitions')[0]
        last_layer_shape = last_layer.get_shape().as_list()

        if parameters['tagging_format'] == 'bioes':
            number_of_classes_new = 17
        else:
            number_of_classes_new = 9

        weights_new = tf.Variable(
            tf.truncated_normal([last_layer_shape[1], number_of_classes_new],
                                stddev=0.05))
        biases_new = tf.Variable(
            tf.constant(0.05, shape=[number_of_classes_new]))
        output_new = tf.matmul(last_layer, weights_new) + biases_new
        pred = tf.nn.softmax(output_new)

    else:
        # Resize the token and character embedding weights to match them with the pretrained model
        # (required in order to restore the pretrained model)
        utils_tf.resize_tensor_variable(
            sess, model.character_embedding_weights, [
                pretraining_dataset.alphabet_size,
                parameters['character_embedding_dimension']
            ])
        utils_tf.resize_tensor_variable(sess, model.token_embedding_weights, [
            pretraining_dataset.vocabulary_size,
            parameters['token_embedding_dimension']
        ])

        if parameters['tagging_format'] == 'bioes':
            n_new_neuron = 4  # four times the number of new class
        else:
            n_new_neuron = 2
        print("number of new neuron: {:d}".format(n_new_neuron))
        number_of_classes_new = dataset.number_of_classes + n_new_neuron
        print("number of classes (new): {:d}".format(number_of_classes_new))
        model_saver.restore(sess,
                            pretrained_model_checkpoint_filepath)  # [15 x 15]
        graph = tf.get_default_graph()

        model.input_label_indices_vector = tf.placeholder(
            tf.float32, [None, number_of_classes_new],
            name="input_label_indices_vector")
        model.input_label_indices_flat = tf.placeholder(
            tf.int32, [None], name="input_label_indices_flat")

        if parameters['use_adapter']:
            model.input_label_adapter_indices_vector = tf.placeholder(
                tf.float32, [None, len(dataset.index_to_label_adapter)],
                name="input_label_adapter_indices_vector")
            model.input_label_adapter_indices_flat = tf.placeholder(
                tf.int32, [None], name="input_label_adapter_indices_flat")
            model.adapter_keep_prob = tf.placeholder(tf.float32,
                                                     name="adapter_keep_prob")

            if parameters['include_pos']:
                model.input_label_pos_indices_vector = tf.placeholder(
                    tf.float32, [None, len(dataset.index_to_label_pos)],
                    name="input_label_pos_indices_vector")
                model.input_label_pos_indices_flat = tf.placeholder(
                    tf.int32, [None], name="input_label_pos_indices_flat")

        old_outputs_w_gradient = graph.get_tensor_by_name(
            'feedforward_after_lstm/output_after_tanh:0')
        old_outputs = old_outputs_w_gradient
        old_last_layer_W = graph.get_tensor_by_name(
            'feedforward_before_crf/W:0')
        old_last_layer_b = graph.get_tensor_by_name(
            'feedforward_before_crf/bias:0')
        print("old last layer W shape")
        print(old_last_layer_W.get_shape())  # .as_list())
        print("old last layer b shape")
        print(old_last_layer_b.get_shape())  # .as_list())

        last_layer_W_mean = np.mean(old_last_layer_W.eval())
        last_layer_W_stddev = np.std(old_last_layer_W.eval())
        last_layer_b_mean = np.mean(old_last_layer_b.eval())
        last_layer_b_stddev = np.std(old_last_layer_b.eval())

        old_W_width, old_W_height = old_last_layer_W.get_shape().as_list(
        )[0], old_last_layer_W.get_shape().as_list()[1]

        last_layer_b_new_col = tf.truncated_normal([n_new_neuron],
                                                   mean=last_layer_b_mean,
                                                   stddev=last_layer_b_stddev)
        last_layer_b_new = tf.concat([old_last_layer_b, last_layer_b_new_col],
                                     0)

        adapter_new_dim = parameters['token_lstm_hidden_state_dimension']
        if parameters['use_adapter']:
            last_layer_W_new_row_adapter = tf.truncated_normal(
                [adapter_new_dim, dataset.number_of_classes])
            last_layer_W_new_rows = tf.concat(
                [old_last_layer_W, last_layer_W_new_row_adapter], 0)
            last_layer_W_new_col = tf.truncated_normal(
                [old_W_width + adapter_new_dim, n_new_neuron])
            last_layer_W_new = tf.concat(
                [last_layer_W_new_rows, last_layer_W_new_col], 1)

        else:
            last_layer_W_new_col = tf.truncated_normal(
                [old_W_width, n_new_neuron],
                mean=last_layer_W_mean,
                stddev=last_layer_W_stddev)
            last_layer_W_new = tf.concat(
                [old_last_layer_W, last_layer_W_new_col], 1)

        print("new last layer W shape")
        print(last_layer_W_new.get_shape())  # .as_list())
        print("new last layer b shape")
        print(last_layer_b_new.get_shape())  # .as_list())

        if parameters['hard_freeze']:
            last_layer_W_orig = last_layer_W_new[:,
                                                 0:dataset.number_of_classes]
            last_layer_b_orig = last_layer_b_new[0:dataset.number_of_classes]
            print("original last layer W (for resetting at each step) shape:")
            print(last_layer_W_orig.get_shape())  # .as_list())
            print("original last layer b (for resetting at each step) shape:")
            print(last_layer_b_orig.get_shape())  # .as_list())
        old_transition_parameters = graph.get_tensor_by_name(
            'crf/transitions:0')
        transition_parameters_mean = np.mean(old_transition_parameters.eval())
        transition_parameters_stddev = np.std(old_transition_parameters.eval())
        trainsition_new_col = tf.truncated_normal(
            [old_transition_parameters.get_shape().as_list()[0], n_new_neuron],
            mean=transition_parameters_mean,
            stddev=transition_parameters_stddev)
        trainsition_new_col = tf.concat(
            [old_transition_parameters, trainsition_new_col], axis=1)
        trainsition_new_row = tf.truncated_normal(
            [n_new_neuron,
             trainsition_new_col.get_shape().as_list()[1]],
            mean=transition_parameters_mean,
            stddev=transition_parameters_stddev)
        new_transition_parameters = tf.concat(
            [trainsition_new_col, trainsition_new_row], axis=0)
        if parameters['use_adapter']:
            with tf.variable_scope("concat_before_adapter") as vs:
                token_embedded = graph.get_tensor_by_name(
                    'concatenate_token_and_character_vectors/token_lstm_input:0'
                )
                if parameters['include_pos']:
                    embed_label_concat = tf.concat([
                        token_embedded,
                        model.input_label_adapter_indices_vector,
                        model.input_label_pos_indices_vector
                    ],
                                                   axis=-1,
                                                   name='embed_label_concat')
                else:
                    embed_label_concat = tf.concat([
                        token_embedded,
                        model.input_label_adapter_indices_vector
                    ],
                                                   axis=-1,
                                                   name='embed_label_concat')
                embed_label_concat_expanded = tf.expand_dims(
                    embed_label_concat,
                    axis=0,
                    name='embed_label_concat_expanded')
            with tf.variable_scope("adapter") as vs:
                initializer = tf.contrib.layers.xavier_initializer()

                adapter_lstm_output = entity_lstm.bidirectional_LSTM(
                    embed_label_concat_expanded,
                    parameters['token_lstm_hidden_state_dimension'],
                    initializer=initializer,
                    output_sequence=True,
                    sum_fw_bw=True)

                adapter_lstm_output_squeezed = tf.squeeze(
                    adapter_lstm_output,
                    axis=0,
                    name='adapter_lstm_output_squeezed')
                old_outputs_before_drop = tf.concat(
                    [old_outputs, adapter_lstm_output_squeezed], axis=-1)

                old_outputs = tf.nn.dropout(old_outputs_before_drop,
                                            model.adapter_keep_prob,
                                            name='old_outputs_drop')
        for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='adapter'):
            sess.run(tf.variables_initializer([var]))

        with tf.variable_scope("feedforward_before_crf") as vs:
            model.b_before_crf = tf.get_variable("b_new",
                                                 initializer=last_layer_b_new)
            model.W_before_crf = tf.get_variable("W_new",
                                                 initializer=last_layer_W_new)

            print("W after renewal:")
            print(
                graph.get_tensor_by_name(
                    'feedforward_before_crf/W_new:0').get_shape().as_list())
            print("b after renewal:")
            print(
                graph.get_tensor_by_name(
                    'feedforward_before_crf/b_new:0').get_shape().as_list())

            new_scores = tf.nn.xw_plus_b(old_outputs,
                                         model.W_before_crf,
                                         model.b_before_crf,
                                         name="scores_new")
            model.unary_scores = new_scores
            print("new scores shape")
            print(model.unary_scores.get_shape().as_list())
            model.predictions = tf.argmax(model.unary_scores,
                                          1,
                                          name="predictions_new")
            print("new prediction shape")
            print(model.predictions.get_shape().as_list())

        if parameters['use_crf']:
            with tf.variable_scope("crf") as vs:

                print(
                    "new number of class: {:d}".format(number_of_classes_new))
                small_score = -1000.0
                large_score = 0.0
                sequence_length = tf.shape(model.unary_scores)[0]
                unary_scores_with_start_and_end = tf.concat([
                    model.unary_scores,
                    tf.tile(tf.constant(small_score, shape=[1, 2]),
                            [sequence_length, 1])
                ], 1)
                start_unary_scores = [[small_score] * number_of_classes_new +
                                      [large_score, small_score]]
                end_unary_scores = [[small_score] * number_of_classes_new +
                                    [small_score, large_score]]
                model.unary_scores = tf.concat([
                    start_unary_scores, unary_scores_with_start_and_end,
                    end_unary_scores
                ], 0)
                start_index = number_of_classes_new
                end_index = number_of_classes_new + 1
                input_label_indices_flat_with_start_and_end = tf.concat([
                    tf.constant(start_index, shape=[1]),
                    model.input_label_indices_flat,
                    tf.constant(end_index, shape=[1])
                ], 0)
                # Apply CRF layer
                sequence_length = tf.shape(model.unary_scores)[0]
                sequence_lengths = tf.expand_dims(sequence_length,
                                                  axis=0,
                                                  name='sequence_lengths_new')
                new_unary_scores_expanded = tf.expand_dims(
                    model.unary_scores,
                    axis=0,
                    name='unary_scores_expanded_new')
                new_input_label_indices_flat_batch = tf.expand_dims(
                    input_label_indices_flat_with_start_and_end,
                    axis=0,
                    name='input_label_indices_flat_batch_new')

                print('new unary_scores_expanded: {0}'.format(
                    new_unary_scores_expanded))
                print('new input_label_indices_flat_batch: {0}'.format(
                    new_input_label_indices_flat_batch))
                print("new sequence_lengths: {0}".format(sequence_lengths))

                log_likelihood, model.transition_parameters = tf.contrib.crf.crf_log_likelihood(
                    new_unary_scores_expanded,
                    new_input_label_indices_flat_batch,
                    sequence_lengths,
                    transition_params=new_transition_parameters)
                model.loss = tf.reduce_mean(-log_likelihood,
                                            name='cross_entropy_mean_loss_new')
                model.accuracy = tf.constant(1)

        else:  # not using crf
            with tf.variable_scope("crf") as vs:
                print(
                    "new number of class: {:d}".format(number_of_classes_new))
                model.transition_parameters = tf.get_variable(
                    "transitions_new", initializer=new_transition_parameters)
            with tf.variable_scope("loss"):
                losses = tf.nn.softmax_cross_entropy_with_logits(
                    logits=model.unary_scores,
                    labels=model.input_label_indices_vector,
                    name='softmax_new')
                model.loss = tf.reduce_mean(losses,
                                            name='cross_entropy_mean_loss_new')
            with tf.variable_scope("accuracy"):
                correct_predictions = tf.equal(
                    model.predictions,
                    tf.argmax(model.input_label_indices_vector, 1))
                model.accuracy = tf.reduce_mean(tf.cast(
                    correct_predictions, 'float'),
                                                name='accuracy_new')

        if not parameters['use_crf']:
            sess.run(tf.variables_initializer([model.transition_parameters]))
        sess.run(tf.variables_initializer([model.b_before_crf]))
        sess.run(tf.variables_initializer([model.W_before_crf]))

        model.optimizer = tf.train.MomentumOptimizer(
            parameters['learning_rate'], 0.8)
        if parameters['hard_freeze']:
            glo_step = model.define_training_procedure(
                parameters,
                dataset=dataset,
                last_layer_W_orig=last_layer_W_orig,
                last_layer_b_orig=last_layer_b_orig)
        else:
            glo_step = model.define_training_procedure(parameters)

        model.summary_op = tf.summary.merge_all()

        # Get pretrained embeddings
        character_embedding_weights, token_embedding_weights = sess.run(
            [model.character_embedding_weights, model.token_embedding_weights])

        # Restore the sizes of token and character embedding weights
        utils_tf.resize_tensor_variable(
            sess, model.character_embedding_weights, [
                dataset.alphabet_size,
                parameters['character_embedding_dimension']
            ])
        utils_tf.resize_tensor_variable(
            sess, model.token_embedding_weights,
            [dataset.vocabulary_size, parameters['token_embedding_dimension']])

        # Re-initialize the token and character embedding weights
        sess.run(
            tf.variables_initializer([
                model.character_embedding_weights,
                model.token_embedding_weights
            ]))

        # Load embedding weights from pretrained token embeddings first
        model.load_pretrained_token_embeddings(sess, dataset, parameters)

        # Load embedding weights from pretrained model
        model.load_embeddings_from_pretrained_model(sess,
                                                    dataset,
                                                    pretraining_dataset,
                                                    token_embedding_weights,
                                                    embedding_type='token')
        model.load_embeddings_from_pretrained_model(
            sess,
            dataset,
            pretraining_dataset,
            character_embedding_weights,
            embedding_type='character')

        del pretraining_dataset
        del character_embedding_weights
        del token_embedding_weights

    # Get transition parameters
    transition_params_trained = sess.run(model.transition_parameters)

    if not parameters['reload_character_embeddings']:
        sess.run(tf.variables_initializer([model.character_embedding_weights]))
    if not parameters['reload_character_lstm']:
        sess.run(tf.variables_initializer(model.character_lstm_variables))
    if not parameters['reload_token_embeddings']:
        sess.run(tf.variables_initializer([model.token_embedding_weights]))
    if not parameters['reload_token_lstm']:
        sess.run(tf.variables_initializer(model.token_lstm_variables))
    if not parameters['reload_feedforward']:
        sess.run(tf.variables_initializer(model.feedforward_variables))
    if not parameters['reload_crf']:
        sess.run(tf.variables_initializer(model.crf_variables))

    return transition_params_trained, model, glo_step
Ejemplo n.º 6
0
def restore_model_parameters_from_pretrained_model(parameters, dataset, sess, model, model_saver):
    print("Restoring parameters from pretrained model")
    pretrained_model_folder = os.path.dirname(parameters['pretrained_model_checkpoint_filepath'])
    pretraining_dataset = pickle.load(open(os.path.join(pretrained_model_folder, 'dataset.pickle'), 'rb')) 
    
    # Assert that the label sets are the same
    # Test set should have the same label set as the pretrained dataset
    assert pretraining_dataset.index_to_label == dataset.index_to_label
    
    # Assert that the model hyperparameters are the same
    pretraining_parameters = load_parameters(parameters_filepath=os.path.join(pretrained_model_folder, 'parameters.ini'), verbose=False)[0]
    for name in ['use_character_lstm', 'character_embedding_dimension', 'character_lstm_hidden_state_dimension', 'embedding_dimension', 'token_lstm_hidden_state_dimension', 'use_crf']:
        if parameters[name] != pretraining_parameters[name]:
            print("Parameters of the pretrained model:")
            pprint(pretraining_parameters)
            raise AssertionError("The parameter {0} ({1}) is different from the pretrained model ({2}).".format(name, parameters[name], pretraining_parameters[name]))
    
    #print_tensors_in_checkpoint_file(parameters['pretrained_model_checkpoint_filepath'], tensor_name='', all_tensors=True)
    
    # If the token and character mappings are exactly the same
    if pretraining_dataset.index_to_token == dataset.index_to_token and pretraining_dataset.index_to_character == dataset.index_to_character:
        
        # Restore the pretrained model
        model_saver.restore(sess, parameters['pretrained_model_checkpoint_filepath']) # Works only when the dimensions of tensor variables are matched.
    
    # If the token and character mappings are different between the pretrained model and the current model
    else:
        
        # Resize the token and character embedding weights to match them with the pretrained model (required in order to restore the pretrained model)
        utils_tf.resize_tensor_variable(sess, model.character_embedding_weights, [pretraining_dataset.alphabet_size, parameters['character_embedding_dimension']])
        utils_tf.resize_tensor_variable(sess, model.token_embedding_weights, [pretraining_dataset.vocabulary_size, parameters['embedding_dimension']])
    
        # Restore the pretrained model
        model_saver.restore(sess, parameters['pretrained_model_checkpoint_filepath']) # Works only when the dimensions of tensor variables are matched.
        
        # Get pretrained embeddings
        character_embedding_weights, token_embedding_weights = sess.run([model.character_embedding_weights, model.token_embedding_weights]) 
        
        # Restore the sizes of token and character embedding weights
        utils_tf.resize_tensor_variable(sess, model.character_embedding_weights, [dataset.alphabet_size, parameters['character_embedding_dimension']])
        utils_tf.resize_tensor_variable(sess, model.token_embedding_weights, [dataset.vocabulary_size, parameters['embedding_dimension']])
        
        # Re-initialize the token and character embedding weights
        sess.run(tf.variables_initializer([model.character_embedding_weights, model.token_embedding_weights]))
        
        # Load embedding weights from pretrained token embeddings first
        model.load_pretrained_token_embeddings(sess, dataset, parameters) 
        
        # Load embedding weights from pretrained model
        model.load_embeddings_from_pretrained_model(sess, dataset, pretraining_dataset, token_embedding_weights, embedding_type='token')
        model.load_embeddings_from_pretrained_model(sess, dataset, pretraining_dataset, character_embedding_weights, embedding_type='character') 
        
        del pretraining_dataset
        del character_embedding_weights
        del token_embedding_weights
    
    # Get transition parameters
    transition_params_trained = sess.run(model.transition_parameters)
    
    if not parameters['reload_character_embeddings']:
        sess.run(tf.variables_initializer([model.character_embedding_weights]))
    if not parameters['reload_character_lstm']:
        sess.run(tf.variables_initializer(model.character_lstm_variables))
    if not parameters['reload_token_embeddings']:
        sess.run(tf.variables_initializer([model.token_embedding_weights]))
    if not parameters['reload_token_lstm']:
        sess.run(tf.variables_initializer(model.token_lstm_variables))
    if not parameters['reload_feedforward']:
        sess.run(tf.variables_initializer(model.feedforward_variables))
    if not parameters['reload_crf']:
        sess.run(tf.variables_initializer(model.crf_variables))

    return transition_params_trained
Ejemplo n.º 7
0
 def load_model(self, sess, pathfile):
     utils_tf.resize_tensor_variable(sess, self.token_embedding_weights,
                                     [None, None])
     utils_tf.resize_tensor_variable(sess, self.character_embedding_weights,
                                     [None, None])
     self.saver.restore(sess, pathfile)
Ejemplo n.º 8
0
    def restore_from_pretrained_model(self,
                                      parameters,
                                      dataset,
                                      sess,
                                      token_to_vector=None,
                                      pretrained_dataset=None):

        temp_pretrained_dataset_adress = parameters[
            'model_folder'] + os.sep + "dataset.pickle"
        temp_pretrained_model_adress = parameters[
            'model_folder'] + os.sep + parameters['model_name']

        print(temp_pretrained_model_adress)

        if pretrained_dataset == None:
            pretraining_dataset = pickle.load(
                open(temp_pretrained_dataset_adress, 'rb'))
        else:
            print("PRETRAINING HERE")
            pretraining_dataset = pretrained_dataset

        pretrained_model_checkpoint_filepath = temp_pretrained_model_adress

        assert pretraining_dataset.index_to_label == dataset.index_to_label  # DEBUG fron  F&J

        # If the token and character mappings are exactly the same
        if pretraining_dataset.index_to_token == dataset.index_to_token and pretraining_dataset.index_to_character == dataset.index_to_character:

            # Restore the pretrained model
            self.saver.restore(
                sess, pretrained_model_checkpoint_filepath
            )  # Works only when the dimensions of tensor variables are matched.
            del pretraining_dataset

        # If the token and character mappings are different between the pretrained model and the current model
        else:
            print("INDEX TO TOKEN DO NOT MATCH")

            # Resize the token and character embedding weights to match them with the pretrained model (required in order to restore the pretrained model)
            utils_tf.resize_tensor_variable(
                sess, self.character_embedding_weights, [
                    pretraining_dataset.alphabet_size,
                    parameters['character_embedding_dimension']
                ])
            utils_tf.resize_tensor_variable(
                sess, self.token_embedding_weights, [
                    pretraining_dataset.vocabulary_size,
                    parameters['token_embedding_dimension']
                ])

            # Restore the pretrained model
            self.saver.restore(
                sess, pretrained_model_checkpoint_filepath
            )  # Works only when the dimensions of tensor variables are matched.

            # Get pretrained embeddings
            character_embedding_weights, token_embedding_weights = sess.run([
                self.character_embedding_weights, self.token_embedding_weights
            ])

            # Restore the sizes of token and character embedding weights
            utils_tf.resize_tensor_variable(
                sess, self.character_embedding_weights, [
                    dataset.alphabet_size,
                    parameters['character_embedding_dimension']
                ])
            utils_tf.resize_tensor_variable(
                sess, self.token_embedding_weights, [
                    dataset.vocabulary_size,
                    parameters['token_embedding_dimension']
                ])

            # Re-initialize the token and character embedding weights
            sess.run(
                tf.variables_initializer([
                    self.character_embedding_weights,
                    self.token_embedding_weights
                ]))

            # Load embedding weights from pretrained token embeddings first
            self.load_pretrained_token_embeddings(
                sess, dataset, parameters, token_to_vector=token_to_vector)
            self.load_embeddings_from_pretrained_model(sess,
                                                       dataset,
                                                       pretraining_dataset,
                                                       token_embedding_weights,
                                                       embedding_type='token')
            self.load_embeddings_from_pretrained_model(
                sess,
                dataset,
                pretraining_dataset,
                character_embedding_weights,
                embedding_type='character')

            del pretraining_dataset
            del character_embedding_weights
            del token_embedding_weights

        # Get transition parameters
        transition_params_trained = sess.run(self.transition_parameters)

        parameters = {
            'reload_character_embeddings': True,
            'reload_character_lstm': True,
            'reload_token_embeddings': True,
            'reload_token_lstm': True,
            'reload_feedforward': True,
            'reload_crf': True
        }
        if not parameters['reload_character_embeddings']:
            sess.run(
                tf.variables_initializer([self.character_embedding_weights]))
        if not parameters['reload_character_lstm']:
            sess.run(tf.variables_initializer(self.character_lstm_variables))
        if not parameters['reload_token_embeddings']:
            sess.run(tf.variables_initializer([self.token_embedding_weights]))
        if not parameters['reload_token_lstm']:
            sess.run(tf.variables_initializer(self.token_lstm_variables))
        if not parameters['reload_feedforward']:
            sess.run(tf.variables_initializer(self.feedforward_variables))
        if not parameters['reload_crf']:
            sess.run(tf.variables_initializer(self.crf_variables))

        return transition_params_trained
Ejemplo n.º 9
0
 def resize_without_redoing_model(self, parameters, new_dataset_vocab_size,
                                  sess):
     ""
     utils_tf.resize_tensor_variable(
         sess, self.token_embedding_weights,
         [new_dataset_vocab_size, parameters['token_embedding_dimension']])