Exemple #1
0
def bielugru(word_input_size, word_embedding_size, sequence_embedding_size,
             n_tags, word_dropout, rnn_dropout_W, rnn_dropout_U, l2,
             word_embedding_weights, **kwargs):
    # define network inputs: words only
    text_input = Input(shape=(None, ), dtype='int32', name='text_input')

    # map word indices to vector representation
    word_embeddings = Embedding(input_dim=word_input_size,
                                output_dim=word_embedding_size,
                                weights=word_embedding_weights,
                                name="word_embeddings")(text_input)
    # drop small portion of input vectors
    word_embeddings = Dropout(word_dropout)(word_embeddings)
    sequence_embedding = word_embeddings

    # apply text level BIGRU
    bidirectional_tag_sequence_output = Bidirectional(
        ELUGRU(sequence_embedding_size / 2,
               return_sequences=True,
               dropout=rnn_dropout_W,
               recurrent_dropout=rnn_dropout_U),
        merge_mode="concat")(sequence_embedding)

    # project hidden states to IOB tags
    tag_sequence_output = TimeDistributed(
        Dense(n_tags, activation='softmax', kernel_regularizer=L1L2(l2=l2)),
        name="aspect_output")(bidirectional_tag_sequence_output)

    # construct Model object and compile
    model = Model(inputs=[text_input], outputs=[tag_sequence_output])
    adam = Adam()
    model.compile(optimizer=adam,
                  loss={'aspect_output': "categorical_crossentropy"},
                  sample_weight_mode="temporal")
    model._make_train_function()
    model._make_predict_function()
    return model,
class CTCModel:
    def __init__(self,
                 inputs,
                 outputs,
                 greedy=True,
                 beam_width=100,
                 top_paths=1,
                 padding=-1,
                 charset=None):
        """
        A override ou réécrire. C'est dans cette fonction qu'il faudra
        affecter self.inputs et self.outputs avec les listes des layers
        d'entrée et de sortie du réseau
        """
        self.model_train = None
        self.model_pred = None
        self.model_eval = None
        self.inputs = inputs
        self.outputs = outputs

        self.greedy = greedy
        self.beam_width = beam_width
        self.top_paths = top_paths
        self.padding = padding
        self.charset = charset

    def compile(self, optimizer):
        """
        A appeler une fois le modèle créé. Compile le modèle en ajoutant
        la loss CTC

        :param optimizer: L'optimizer a utiliser pendant l'apprentissage
        """
        # Calcul du CTC
        labels = Input(name='labels', shape=[None])
        input_length = Input(name='input_length', shape=[1])
        label_length = Input(name='label_length', shape=[1])

        # Lambda layer for computing the loss function
        loss_out = Lambda(self.ctc_loss_lambda_func,
                          output_shape=(1, ),
                          name='CTCloss')(self.outputs +
                                          [labels, input_length, label_length])

        # Lambda layer for the decoding function
        out_decoded_dense = Lambda(self.ctc_complete_decoding_lambda_func,
                                   output_shape=(None, None),
                                   name='CTCdecode',
                                   arguments={
                                       'greedy': self.greedy,
                                       'beam_width': self.beam_width,
                                       'top_paths': self.top_paths
                                   },
                                   dtype="float32")(self.outputs +
                                                    [input_length])

        #Lambda layer for computing the label error rate
        out_analysis = Lambda(
            self.ctc_complete_analysis_lambda_func,
            output_shape=(None, ),
            name='CTCanalysis',
            arguments={
                'greedy': self.greedy,
                'beam_width': self.beam_width,
                'top_paths': self.top_paths
            },
            dtype="float32")(self.outputs +
                             [labels, input_length, label_length])

        # create Keras models
        self.model_init = Model(inputs=self.inputs, outputs=self.outputs)
        self.model_train = Model(inputs=self.inputs +
                                 [labels, input_length, label_length],
                                 outputs=loss_out)
        self.model_pred = Model(inputs=self.inputs + [input_length],
                                outputs=out_decoded_dense)
        self.model_eval = Model(inputs=self.inputs +
                                [labels, input_length, label_length],
                                outputs=out_analysis)

        # Compile models
        self.model_train.compile(loss={
            'CTCloss': lambda yt, yp: yp
        },
                                 optimizer=optimizer)
        self.model_pred.compile(loss={
            'CTCdecode': lambda yt, yp: yp
        },
                                optimizer=optimizer)
        self.model_eval.compile(loss={
            'CTCanalysis': lambda yt, yp: yp
        },
                                optimizer=optimizer)

    def get_model_train(self):
        """
        :return: Modèle utilisé en interne pour l'entraînement
        """
        return self.model_train

    def get_model_pred(self):
        """
        :return: Modèle utilisé en interne pour la prédiction
        """
        return self.model_pred

    def get_model_eval(self):
        """
        :return: Model used to evaluate a data set
        """
        return self.model_eval

    def fit(self,
            x=None,
            y=None,
            batch_size=None,
            epochs=1,
            verbose=1,
            callbacks=None,
            validation_split=0.0,
            validation_data=None,
            shuffle=True,
            class_weight=None,
            sample_weight=None,
            initial_epoch=0,
            steps_per_epoch=None,
            validation_steps=None):
        """

                Permet de lire les données sur un device (CPU) et en
                parallèle de s'entraîner sur un autre device (GPU)

                Les données d'entrée doivent être de la forme :
                  [input_sequences, label_sequences, inputs_lengths, labels_length]

                :param: Paramètres identiques à ceux de keras.engine.Model.fit()
                :return: L'objet History correspondant à l'entrainement
        """

        out = self.model_train.fit(x=x,
                                   y=y,
                                   batch_size=batch_size,
                                   epochs=epochs,
                                   verbose=verbose,
                                   callbacks=callbacks,
                                   validation_split=validation_split,
                                   validation_data=validation_data,
                                   shuffle=shuffle,
                                   class_weight=class_weight,
                                   sample_weight=sample_weight,
                                   initial_epoch=initial_epoch,
                                   steps_per_epoch=steps_per_epoch,
                                   validation_steps=validation_steps)

        self.model_pred.set_weights(self.model_train.get_weights())
        self.model_eval.set_weights(self.model_train.get_weights())
        return out

    def predict(self, x, batch_size=None, verbose=0):
        """ CTC prediction

        Inputs:
            x = Input data as a 3D Tensor (batch_size, max_input_len, dim_features)
            x_len = 1D array with the length of each data in batch_size
            y = Input data as a 2D Tensor (batch_size, max_label_len)
            y_len = 1D array with the length of each labeling
            label_array = list of labels
            pred = return predictions from the ctc (from model_pred)
            eval = return an analysis of ctc prediction (from model_eval)

        Outputs: a list containing:
            out_pred = output of model_pred
            out_eval = output of model_eval
        """

        #model_out = self.model_pred.evaluate(x=x, y=np.zeros(x[0].shape[0]), batch_size=batch_size, verbose=verbose)
        model_out = self.model_pred.predict(x,
                                            batch_size=batch_size,
                                            verbose=verbose)

        return model_out

    def predict2(self, x, batch_size=None, verbose=0, steps=None):
        """
        The same function as in the Keras Model but with a different function predict_loop for dealing with variable length predictions

        Generates output predictions for the input samples.

                Computation is done in batches.

                # Arguments
                    x: The input data, as a Numpy array
                        (or list of Numpy arrays if the model has multiple outputs).
                    batch_size: Integer. If unspecified, it will default to 32.
                    verbose: Verbosity mode, 0 or 1.
                    steps: Total number of steps (batches of samples)
                        before declaring the prediction round finished.
                        Ignored with the default value of `None`.

                # Returns
                    Numpy array(s) of predictions.

                # Raises
                    ValueError: In case of mismatch between the provided
                        input data and the model's expectations,
                        or in case a stateful model receives a number of samples
                        that is not a multiple of the batch size.
                """
        #[x, x_len] = x
        # Backwards compatibility.
        if batch_size is None and steps is None:
            batch_size = 32
        if x is None and steps is None:
            raise ValueError('If predicting from data tensors, '
                             'you should specify the `steps` '
                             'argument.')
        # Validate user data.
        x = _standardize_input_data(x,
                                    self.model_pred._feed_input_names,
                                    self.model_pred._feed_input_shapes,
                                    check_batch_axis=False)
        if self.model_pred.stateful:
            if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0:
                raise ValueError('In a stateful network, '
                                 'you should only pass inputs with '
                                 'a number of samples that can be '
                                 'divided by the batch size. Found: ' +
                                 str(x[0].shape[0]) + ' samples. '
                                 'Batch size: ' + str(batch_size) + '.')

        # Prepare inputs, delegate logic to `_predict_loop`.
        if self.model_pred.uses_learning_phase and not isinstance(
                K.learning_phase(), int):
            #ins = [x, x_len] + [0.]
            ins = x + [0.]
        else:
            #ins = [x, x_len]
            ins = x
        self.model_pred._make_predict_function()
        f = self.model_pred.predict_function
        out_pred = self._predict_loop(f,
                                      ins,
                                      batch_size=batch_size,
                                      verbose=verbose,
                                      steps=steps)

        list_pred = []
        for elmt in out_pred:
            pred = []
            for val in elmt:
                if val != -1:
                    pred.append(val)
            list_pred.append(pred)

        return list_pred

    @staticmethod
    def ctc_loss_lambda_func(args):
        """
        Function for computing the ctc loss (can be put in a Lambda layer)
        :param args:
            y_pred, labels, input_length, label_length
        :return: CTC loss
        """

        y_pred, labels, input_length, label_length = args
        return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

    @staticmethod
    def ctc_complete_decoding_lambda_func(args, **arguments):
        """
        Complete CTC decoding using Keras (function K.ctc_decode)
        :param args:
            y_pred, input_length
        :param arguments:
            greedy, beam_width, top_paths
        :return:
            K.ctc_decode with dtype='float32'
        """

        y_pred, input_length = args
        my_params = arguments

        assert (K.backend() == 'tensorflow')

        return K.cast(K.ctc_decode(y_pred,
                                   tf.squeeze(input_length),
                                   greedy=my_params['greedy'],
                                   beam_width=my_params['beam_width'],
                                   top_paths=my_params['top_paths'])[0][0],
                      dtype='float32')

    @staticmethod
    def ctc_complete_analysis_lambda_func(args, **arguments):
        """
        Complete CTC analysis using Keras and tensorflow
        WARNING : tf is required
        :param args:
            y_pred, labels, input_length, label_len
        :param arguments:
            greedy, beam_width, top_paths
        :return:
            ler = label error rate
        """

        y_pred, labels, input_length, label_len = args
        my_params = arguments

        assert (K.backend() == 'tensorflow')

        batch = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + 1e-8)
        input_length = tf.to_int32(tf.squeeze(input_length))

        greedy = my_params['greedy']
        beam_width = my_params['beam_width']
        top_paths = my_params['top_paths']

        if greedy:
            (decoded,
             log_prob) = ctc.ctc_greedy_decoder(inputs=batch,
                                                sequence_length=input_length)
        else:
            (decoded, log_prob) = ctc.ctc_beam_search_decoder(
                inputs=batch,
                sequence_length=input_length,
                beam_width=beam_width,
                top_paths=top_paths)

        cast_decoded = tf.cast(decoded[0], tf.float32)

        sparse_y = K.ctc_label_dense_to_sparse(
            labels, tf.cast(tf.squeeze(label_len), tf.int32))
        ed_tensor = tf_edit_distance(cast_decoded, sparse_y, norm=True)
        ler_per_seq = Kreshape_To1D(ed_tensor)

        return K.cast(ler_per_seq, dtype='float32')

    def _predict_loop(self,
                      f,
                      ins,
                      max_len=20,
                      max_value=-1,
                      batch_size=32,
                      verbose=0,
                      steps=None):
        """Abstract method to loop over some data in batches.

        # Arguments
            f: Keras function returning a list of tensors.
            ins: list of tensors to be fed to `f`.
            batch_size: integer batch size.
            verbose: verbosity mode.
            steps: Total number of steps (batches of samples)
                before declaring `_predict_loop` finished.
                Ignored with the default value of `None`.

        # Returns
            Array of predictions (if the model has a single output)
            or list of arrays of predictions
            (if the model has multiple outputs).
        """
        num_samples = self.model_pred._check_num_samples(
            ins, batch_size, steps, 'steps')

        if steps is not None:
            # Step-based predictions.
            # Since we do not know how many samples
            # we will see, we cannot pre-allocate
            # the returned Numpy arrays.
            # Instead, we store one array per batch seen
            # and concatenate them upon returning.
            unconcatenated_outs = []
            for step in range(steps):
                batch_outs = f(ins)
                if not isinstance(batch_outs, list):
                    batch_outs = [batch_outs]
                if step == 0:
                    for batch_out in batch_outs:
                        unconcatenated_outs.append([])
                for i, batch_out in enumerate(batch_outs):
                    unconcatenated_outs[i].append(batch_out)

            if len(unconcatenated_outs) == 1:
                return np.concatenate(unconcatenated_outs[0], axis=0)
            return [
                np.concatenate(unconcatenated_outs[i], axis=0)
                for i in range(len(unconcatenated_outs))
            ]
        else:
            # Sample-based predictions.
            outs = []
            batches = _make_batches(num_samples, batch_size)
            index_array = np.arange(num_samples)
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                if ins and isinstance(ins[-1], float):
                    # Do not slice the training phase flag.
                    ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]]
                else:
                    ins_batch = _slice_arrays(ins, batch_ids)
                batch_outs = f(ins_batch)
                if not isinstance(batch_outs, list):
                    batch_outs = [batch_outs]
                if batch_index == 0:
                    # Pre-allocate the results arrays.
                    for batch_out in batch_outs:
                        #shape = (num_samples, ) + batch_out.shape[1:] # WARNING  10)
                        shape = (num_samples, max_len)
                        outs.append(np.zeros(shape, dtype=batch_out.dtype))
                        #outs.append(np.zeros(shape, dtype=batch_out.dtype))#batch_out.dtype))# WARNING CHANGE FROM THE MAIN CODE
                for i, batch_out in enumerate(batch_outs):
                    #outs[i][batch_start:batch_end] = batch_out # WARNING
                    outs[i][batch_start:batch_end] = sequence.pad_sequences(
                        batch_out,
                        value=float(max_value),
                        maxlen=max_len,
                        dtype=batch_out.dtype,
                        padding="post")

            if len(outs) == 1:
                return outs[0]
            return outs

    def save_model(self, path_dir, charset=None):
        """ Save a model in path_dir
        save model_train, model_pred and model_eval in json
        save inputs and outputs in json
        save model CTC parameters in a pickle """

        model_json = self.model_train.to_json()
        with open(path_dir + "/model_train.json", "w") as json_file:
            json_file.write(model_json)

        model_json = self.model_pred.to_json()
        with open(path_dir + "/model_pred.json", "w") as json_file:
            json_file.write(model_json)

        model_json = self.model_eval.to_json()
        with open(path_dir + "/model_eval.json", "w") as json_file:
            json_file.write(model_json)

        model_json = self.model_init.to_json()
        with open(path_dir + "/model_init.json", "w") as json_file:
            json_file.write(model_json)

        param = {
            'greedy': self.greedy,
            'beam_width': self.beam_width,
            'top_paths': self.top_paths,
            'charset': self.charset
        }

        output = open(path_dir + "/model_param.pkl", 'wb')
        p = pickle.Pickler(output)
        p.dump(param)
        output.close()

    def load_model(self, path_dir, optimizer, initial_epoch=None):
        """ Load a model in path_dir
        load model_train, model_pred and model_eval from json
        load inputs and outputs from json
        load model CTC parameters from a pickle """

        json_file = open(path_dir + '/model_train.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        self.model_train = model_from_json(loaded_model_json)

        json_file = open(path_dir + '/model_pred.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        self.model_pred = model_from_json(loaded_model_json,
                                          custom_objects={"tf": tf})

        json_file = open(path_dir + '/model_eval.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        self.model_eval = model_from_json(loaded_model_json,
                                          custom_objects={
                                              "tf": tf,
                                              "ctc": ctc,
                                              "tf_edit_distance":
                                              tf_edit_distance,
                                              "Kreshape_To1D": Kreshape_To1D
                                          })

        json_file = open(path_dir + '/model_init.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        self.model_init = model_from_json(loaded_model_json,
                                          custom_objects={"tf": tf})

        self.inputs = self.model_init.inputs
        self.outputs = self.model_init.outputs

        input = open(path_dir + "/model_param.pkl", 'rb')
        p = pickle.Unpickler(input)
        param = p.load()
        input.close()

        self.greedy = param['greedy'] if 'greedy' in param.keys(
        ) else self.greedy
        self.beam_width = param['beam_width'] if 'beam_width' in param.keys(
        ) else self.beam_width
        self.top_paths = param['top_paths'] if 'top_paths' in param.keys(
        ) else self.top_paths
        self.charset = param['charset'] if 'charset' in param.keys(
        ) else self.charset

        self.compile(optimizer)

        if initial_epoch:
            file_weight = path_dir + 'weights.' + "%02d" % (
                initial_epoch) + '.hdf5'
            print(file_weight)
            if os.path.exists(file_weight):
                self.model_train.load_weights(file_weight)
                self.model_pred.set_weights(self.model_train.get_weights())
                self.model_eval.set_weights(self.model_train.get_weights())
            else:
                print("Weights for epoch ", initial_epoch,
                      " can not be loaded.")
        else:
            print("Training will be start at the beginning.")
Exemple #3
0
def char_bielugru(word_input_size, word_embedding_size, char_input_size,
                  char_embedding_size, sequence_embedding_size, n_tags,
                  word_dropout, rnn_dropout_W, rnn_dropout_U, dropout, l2,
                  word_embedding_weights, **kwargs):
    # define network inputs: words and character indices
    text_input = Input(shape=(None, ), dtype='int32', name='text_input')
    char_input = Input(shape=(
        None,
        None,
    ), dtype='int32', name='char_input')

    # map word indices to vector representations
    word_embeddings = Embedding(input_dim=word_input_size,
                                output_dim=word_embedding_size,
                                weights=word_embedding_weights,
                                name="word_embeddings")(text_input)
    word_embeddings = Dropout(word_dropout)(word_embeddings)

    # map each character for each word to its vector representation
    char_embedding_layer = Embedding(input_dim=char_input_size,
                                     output_dim=char_embedding_size,
                                     name="char_embeddings")
    char_embeddings = BetterTimeDistributed(char_embedding_layer)(char_input)
    char_embeddings = Dropout(word_dropout)(char_embeddings)

    ##################
    # apply char-level BiGRU to every word
    char_word_model = Bidirectional(ELUGRU(char_embedding_size,
                                           return_sequences=False),
                                    merge_mode="concat")
    char_word_embeddings = BetterTimeDistributed(char_word_model)(
        char_embeddings)
    char_word_embeddings = Dropout(dropout)(char_word_embeddings)

    # project final states to fixed size representation
    char_word_embeddings = BetterTimeDistributed(
        Dense(char_embedding_size,
              kernel_regularizer=L1L2(l2=l2)))(char_word_embeddings)
    ##################

    # combine word and character emebeddings
    sequence_embedding = concatenate([word_embeddings, char_word_embeddings])

    # apply text level BIGRU
    bidirectional_tag_sequence_output = Bidirectional(
        ELUGRU(sequence_embedding_size / 2,
               return_sequences=True,
               dropout=rnn_dropout_W,
               recurrent_dropout=rnn_dropout_U),
        merge_mode="concat")(sequence_embedding)

    # project hidden states to IOB tags
    tag_sequence_output = TimeDistributed(
        Dense(n_tags, activation='softmax', kernel_regularizer=L1L2(l2=l2)),
        name="aspect_output")(bidirectional_tag_sequence_output)

    # construct Model object and compile
    model = Model(inputs=[text_input, char_input],
                  outputs=[tag_sequence_output])
    adam = Adam()
    model.compile(optimizer=adam,
                  loss={'aspect_output': "categorical_crossentropy"},
                  sample_weight_mode="temporal")
    model._make_train_function()
    model._make_predict_function()

    # construct Model object to obtain the character-level verctor representation for a single word
    char_word_input = Input(shape=(None, ),
                            dtype='int32',
                            name='char_word_input')
    char_word_embedding = char_embedding_layer(char_word_input)
    char_word_embedding = char_word_model(char_word_embedding)

    char_word_model = Model(input=[char_word_input],
                            output=[char_word_embedding])
    char_word_model._make_predict_function()
    return model, char_word_model