Ejemplo n.º 1
0
def fit_lstm_model(X_train, y_train, n_words, n_tags, seq_len, class_weights,
                   epochs):
    '''Set up LSTM model with one input - equal length sequences of encoded text'''
    input_seq = Input(shape=(seq_len, ))
    '''Pass the GloVe pretrained model weights into the embedding layer'''
    embedding = Embedding(input_dim=n_words,
                          output_dim=300,
                          weights=[embedding_matrix],
                          trainable=True)(input_seq)
    embedding = Dropout(0.1)(embedding)
    '''Add Bidirectional LSTM layer, dense hidden layer, and final output layer'''
    model = Bidirectional(
        LSTM(units=64, return_sequences=True,
             recurrent_dropout=0.1))(embedding)
    model = TimeDistributed(Dense(64, activation='relu'))(model)
    output = Dense(n_tags, activation="softmax")(model)
    '''Compile and fit deep neural network'''
    model = Model(inputs=input_seq, outputs=output)
    model.compile(optimizer="adam",
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    history = model.fit(X_train,
                        y_train,
                        epochs=epochs,
                        batch_size=32,
                        validation_split=0.1,
                        verbose=1,
                        class_weight=[class_weights])
    '''Create simple performance report for the model'''
    val_loss, val_acc = model.evaluate(X_test, y_test)
    print(f'Model validation loss was {val_loss}')
    print(f'Model validation accuracy was {val_acc}')
    return model, history
Ejemplo n.º 2
0
def build_qt_inference_model(model_settings):
    # architecture
    _input = Input(shape=(model_settings['max_len'], model_settings['emb_dim']), name='input')
    model = Bidirectional(LSTM(units=100, return_sequences=True, dropout=0.5,
                               recurrent_dropout=0.5), name='bilstm1')(_input)  # biLSTM
    model = Bidirectional(LSTM(units=100, return_sequences=False, dropout=0.5,
                               recurrent_dropout=0.5), name='bilstm2')(model)  # 2nd biLSTM
    _output = Dense(model_settings['n_tags'], activation='softmax', name='output')(model)  # a dense layer
    model = Model(_input, _output)
    model.compile(optimizer=Nadam(clipnorm=1), loss='categorical_crossentropy', metrics=['accuracy']) 
    model.summary()
    return model
Ejemplo n.º 3
0
def build_ep_inference_model(model_settings):
    # architecture
    input = Input(shape=(model_settings['max_len'], model_settings['emb_dim']), name='input')
    model = Bidirectional(LSTM(units=100, return_sequences=True), name='bilstm1')(input)  # biLSTM
    model = Bidirectional(LSTM(units=100, return_sequences=True), name='bilstm2')(model)  # 2nd biLSTM
    model = TimeDistributed(Dense(model_settings['n_tags'], activation=None), name='td')(model)  # a dense layer
    crf = CRF(model_settings['n_tags'], name='crf')  # CRF layer
    out = crf(model)  # output
    model = Model(input, out)
    model.compile(optimizer=Nadam(lr=0.01, clipnorm=1), loss=losses.crf_loss, metrics=[metrics.crf_accuracy]) 
    model.summary()
    return model
Ejemplo n.º 4
0
    def get_model():
        """ Create and return the model. """

        # INPUTS
        inputs = []
        models = []
        for embed_mod_id, _ in enumerate(embedding_models):
            inputs.append(
                Input(shape=(
                    seq_max_length,
                    feature_vec_lengths[embed_mod_id],
                )))

            models.append(
                Masking(mask_value=[0] * feature_vec_lengths[embed_mod_id])(
                    inputs[-1]))

        if args.pos:
            inputs.append(Input(shape=(
                seq_max_length,
                len(Utils.poss) + 2,
            )))
            models.append(
                Masking(mask_value=[0] * (len(Utils.poss) + 2))(inputs[-1]))

        # Combinde INPUTS (including masks)
        if len(models) > 1:
            model = concatenate(models)
        else:
            model = models[0]

        # CORE MODEL
        model = Bidirectional(
            LSTM(
                50,
                return_sequences=True,
                dropout=0,  # !
                # dropout=0.1,
                # dropout=0.25,
                recurrent_dropout=recurrent_dropout))(model)

        # (unfold LSTM and)
        # one-hot encode binary label
        outputs = TimeDistributed(Dense(2, activation="softmax"))(model)

        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
        return model
Ejemplo n.º 5
0
    def initialize(self, ner_corpus, parameters):
        '''
        A method to initialize the NER model.

        Attributes
        ----------
        ner_corpus : NER_Corpus
            | Fully developed NER corpus.
        parameters : dict
            | Hyperparameters for Bi-LSTM layers.
        '''

        self.word2vector = ner_corpus.word2vector
        self.max_sent_len = ner_corpus.max_sent_len
        self.feature_size = ner_corpus.feature_size
        self.ner_labels = ner_corpus.ner_labels
        self.word2id = ner_corpus.word2id
        self.id2word = ner_corpus.id2word
        self.X = ner_corpus.X_embedded
        self.Y = ner_corpus.Y_embedded
        del ner_corpus

        self.lstm_units = parameters.get('lstm_units')
        self.lstm_return_sequences = parameters.get('lstm_return_sequences')
        self.lstm_recurrent_dropout = parameters.get('lstm_recurrent_dropout')
        self.dense_units = parameters.get('dense_units')
        self.dense_activation = parameters.get('dense_activation')

        _input = Input(shape=(self.max_sent_len, self.feature_size))
        model = Bidirectional(
            LSTM(units=self.lstm_units,
                 return_sequences=self.lstm_return_sequences,
                 recurrent_dropout=self.lstm_recurrent_dropout))(_input)
        model = TimeDistributed(
            Dense(units=self.dense_units,
                  activation=self.dense_activation))(model)
        crf = CRF(len(self.ner_labels))
        _output = crf(model)

        model = Model(inputs=_input, outputs=_output)
        model.compile(optimizer='rmsprop',
                      loss=crf.loss_function,
                      metrics=[crf.accuracy])

        self.model = model
def boundary_sensitive_TCN(k):

    # shot feature extraction

    inputs = Input(shape=())
    model = inputs

    model = Bidirectional(
        LSTM(k, dropout=0.25, recurrent_dropout=0.25,
             return_sequences=True))(model)

    model = TimeDistributed(Dense(2, activation='softmax'))(model)
    model = Model(inputs=inputs, outputs=model)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model
Ejemplo n.º 7
0
	def train_model(self,
					X,
					y,
					labels,
					word_index,
					MAX_SEQUENCE_LENGTH,
					model_save_directory='./models/'):
		"""
			Train deep learning model
		"""
		
		embedding_matrix, nb_words = get_embedding('glove',word_index)
		
		input1 = Input(shape=(MAX_SEQUENCE_LENGTH,))
		embedding = Embedding(input_dim=len(embedding_matrix), 
								output_dim=self.embedding_dim, 
								weights=[embedding_matrix], 
								input_length=MAX_SEQUENCE_LENGTH, 
								trainable=False)(input1)
#         embedding = Dropout(self.drop_rate_embedding)(embedding)
		model = Bidirectional(LSTM(units=self.num_lstm_units,
								  return_sequences=True,
								  recurrent_dropout=self.drop_rate_lstm))(embedding)
		
		model = TimeDistributed(Dense(units=self.num_lstm_units,
									 activation=self.activation_function))(model)
		crf = CRF(units=len(labels))
		output1 = crf(model)
		
		model = Model(input1,output1)        
		model.compile(optimizer='rmsprop',\
					  loss=crf.loss_function,\
					 metrics=[crf.accuracy])
				
		print(model.summary())

		early_stopping = EarlyStopping(monitor='val_loss', patience=3)
		STAMP = 'lstm_%f_%.2f' % (self.num_lstm_units, self.drop_rate_lstm)
		checkpoint_dir = model_save_directory + 'checkpoints/' + str(int(time())) + '/'

		if not os.path.exists(checkpoint_dir):
			os.makedirs(checkpoint_dir)

		with open(bst_model_path+".json", "w") as json_file: json_file.write(model.to_json())
Ejemplo n.º 8
0
def BidirLSTM(
        n_nodes,
        n_classes,
        n_feat,
        max_len=None,
        causal=True,
        loss=loss_fct,
        optimizer="adam",  #adam
        return_param_str=False):
    inputs = Input(shape=(None, n_feat))

    model = Bidirectional(
        LSTM(n_nodes,
             return_sequences=True,
             kernel_initializer=glorot_normal()))(inputs)
    # model = LSTM(n_nodes, return_sequences=True)(inputs)
    #
    # # Birdirectional LSTM
    # if not causal:
    #     print("---------------------  Bi-directional  ------------------------------")
    #     model_backwards = LSTM(n_nodes, return_sequences=True, go_backwards=True)(inputs)
    #     # model = Merge(mode="concat")([model, model_backwards]) # deprecated
    #     model = Concatenate(axis=2)([model, model_backwards])
    #     #model = concatenate([model, model_backwards], axis=2)
    model = TimeDistributed(Dense(n_classes, activation="softmax"))(model)

    model = Model(input=inputs, output=model)
    model.compile(optimizer=optimizer,
                  loss=loss_fct,
                  sample_weight_mode="temporal",
                  metrics=['accuracy'])

    if return_param_str:
        param_str = "LSTM_N{}".format(n_nodes)
        if causal:
            param_str += "_causal"

        return model, param_str
    else:
        return model
def define_model():
    wordseq = Input(shape=(max_sent_length, ))
    charSeq = Input(shape=(max_sent_length, max_wrd_len))
    wm = wordmodel(max_sent_length)(wordseq)
    cm = TimeDistributed(charmodel(max_wrd_len))(charSeq)
    cm = Reshape((max_sent_length, -1))(cm)
    combined_input = concatenate([wm, cm])
    model = Bidirectional(LSTM(units=100,
                               recurrent_dropout=0.1))(combined_input)
    out = Dense(10, activation="softmax")(model)  # softmax output layer

    model = Model([wordseq, charSeq], out)

    #load existing weight if exist
    if os.path.isfile(outFileName + "-best.hdf5"):
        model.load_weights(outFileName + "-best.hdf5")
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    print(model.summary())
    #plot_model(model, show_shapes=True, to_file=outFileName+'-plot.png')
    return model
Ejemplo n.º 10
0
    def draw(self):
        with open(self.conf.train_dict, "rb") as fp:
            vocabulary = pickle.load(fp)
        fp.close()

        # Model Configuration
        input_character = Input(shape=(None, ), name="character")
        feature_character = Embedding(len(vocabulary.keys()) + 1, self.embedding_dim, mask_zero=True)(input_character)
        feature_character = Dropout(0.1)(feature_character)
        feature_character = Bidirectional(LSTM(self.bi_rnn_units // 2, return_sequences=True, recurrent_dropout=0.1))(feature_character)

        input_construction = Input(shape=(None, 4), name="cxn")

        model = concatenate([feature_character, input_construction])
        model = Bidirectional(LSTM(self.bi_rnn_units // 2, return_sequences=True, recurrent_dropout=0.6))(model)
        output = TimeDistributed(Dense(5, activation="softmax"))(model)

        model = Model(inputs=[input_character, input_construction], outputs=output)
        plot_model(model, self.conf.model_image.format("multi_input_and_output_model.png"), show_shapes=True)

        model.compile("rmsprop", loss="categorical_crossentropy", metrics=["accuracy"])
        model.summary()

        return model
Ejemplo n.º 11
0
    def train(self, epochs, embedding=None):
        # Embedded Words
        txt_input = Input(shape=(None, ), name='txt_input')
        txt_embed = Embedding(input_dim=self.num_words,
                              output_dim=MAX_LEN,
                              input_length=None,
                              name='txt_embedding',
                              trainable=False,
                              weights=([embedding]))(txt_input)
        txt_drpot = Dropout(0.1, name='txt_dropout')(txt_embed)

        # Embedded Part of Speech
        pos_input = Input(shape=(None, ), name='pos_input')
        pos_embed = Embedding(input_dim=self.num_pos,
                              output_dim=MAX_LEN,
                              input_length=None,
                              name='pos_embedding')(pos_input)
        pos_drpot = Dropout(0.1, name='pos_dropout')(pos_embed)

        # Embedded Characters
        char_in = Input(shape=(
            None,
            MAX_LEN_CHAR,
        ), name="char_input")
        emb_char = TimeDistributed(
            Embedding(input_dim=self.num_chars,
                      output_dim=MAX_LEN_CHAR,
                      input_length=None))(char_in)
        char_enc = TimeDistributed(
            LSTM(units=20, return_sequences=False,
                 recurrent_dropout=0.5))(emb_char)

        # Concatenate inputs
        x = concatenate([txt_drpot, pos_drpot, char_enc], axis=2)
        x = SpatialDropout1D(0.3)(x)

        # Deep Layers
        model = Bidirectional(
            LSTM(units=100, return_sequences=True, recurrent_dropout=0.1))(x)
        model = Bidirectional(
            LSTM(units=100, return_sequences=True,
                 recurrent_dropout=0.1))(model)

        # Output
        out = TimeDistributed(Dense(self.num_entities,
                                    activation="softmax"))(model)
        model = Model(inputs=[txt_input, pos_input, char_in], outputs=[out])

        model.compile(optimizer="rmsprop",
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        plot_model(model, to_file=self.save_path + 'model_structure.png')
        print(model.summary())

        history = model.fit(
            [self.X_train, self.train_pos, self.train_characters],
            np.array(self.Y_train),
            batch_size=32,
            epochs=epochs,
            validation_data=([
                self.X_validation, self.valid_pos, self.valid_characters
            ], np.array(self.Y_validation)),
            verbose=1)

        model.save(self.save_path + 'model_ner')

        test_eval = model.evaluate(
            [self.X_test, self.test_pos, self.test_characters],
            np.array(self.Y_test))

        print('Test loss:', test_eval[0])
        print('Test accuracy:', test_eval[1])

        return model, history
crf = CRF(n_tags)  # CRF layer
# crf = CRF(n_tags,sparse_target=True)  # CRF layer
out = crf(dense)  # output
# out = CRF(n_tags)(dense)

### 3. Build Model
model = Model(inputs=[word_input,char_input ],outputs=out)
batch_size = 32
ephochs = 15
# learning rate decay
dataset_size = train_data.shape[0]
batches_per_epoch = dataset_size/batch_size
lr_decay = (1./(1/32) -1)/batches_per_epoch
model.compile(
    optimizer=Adam(lr=0.012, decay=lr_decay),
    loss=crf.loss_function,
    metrics=[crf.accuracy]
)
model.summary()
from keras.utils.vis_utils import plot_model


history = model.fit([X_w_tr,np.array(X_c_tr).reshape((len(X_c_tr), max_len, max_len_char))], np.array(y_tr),
                    batch_size=batch_size,
                    epochs=ephochs,
                    validation_data = ([X_w_v,np.array(X_c_v).reshape((len(X_c_v), max_len, max_len_char))], np.array(y_v)),
                    verbose=1,
                    )


# # history is a dictionary,keys are val_loss,val_acc,loss,acc
Ejemplo n.º 13
0
X_train, X_test = data.split_train_test(features)
y_train, _ = data.split_train_test(y)

input = Input(shape=(
    features.shape[1],
    features.shape[2],
))
model = Bidirectional(
    LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(input)
model = TimeDistributed(Dense(50, activation="relu"))(model)
crf = CRF_2nd(len(data.tag_to_index))
out_layer = crf(model)

model = Model(input, out_layer)
model.compile(optimizer="rmsprop",
              loss=crf.loss_function,
              metrics=[crf.accuracy])

model.summary()
BATCH_SIZE = 64
EPOCHS = 10
history = model.fit(X_train,
                    np.array(y_train),
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_split=0.1,
                    verbose=2)


def pred2label(pred):
    out = []
def bilstm(X_train, X_test, Y_train, Y_test, wordembeddings):
    np.random.seed(1234)
    tf.random.set_seed(1234)
    random.seed(1234)

    max_length_sentence = X_train.str.split().str.len().max()
    tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n\'',
                          lower=True)
    tokenizer.fit_on_texts(X_train)
    word_index = tokenizer.word_index
    EMBEDDING_DIM = 300
    vocabulary_size = len(word_index) + 1
    print('Found %s unique tokens.' % len(word_index))

    sequences_train = tokenizer.texts_to_sequences(X_train)
    sequences_valid = tokenizer.texts_to_sequences(X_test)
    X_train = pad_sequences(sequences_train, maxlen=max_length_sentence)
    X_val = pad_sequences(sequences_valid, maxlen=X_train.shape[1])
    y_train = np.asarray(Y_train)
    y_val = np.asarray(Y_test)
    #print(word_index)
    '''
    print('Shape of data tensor:', X_train.shape)
    print('Shape of data tensor:', X_val.shape)
    print('Shape of data tensor:', y_train.shape)
    print('Shape of data tensor:', y_val.shape)
    
    print(X_train)
    print("*"*100)
    print(X_val)
    print("*"*100)
    print(y_train)
    print("*"*100)
    print(y_val)
    '''

    embedding_matrix = np.zeros((vocabulary_size, EMBEDDING_DIM))
    for word, i in word_index.items():
        if (word in wordembeddings.keys()):
            embedding_vector = wordembeddings[word]
            if len(embedding_vector) == 0:  #if array is empty
                embedding_vector = wordembeddings[word.title()]
                if len(embedding_vector) == 0:
                    embedding_vector = wordembeddings[word.upper()]
                    if len(embedding_vector) == 0:
                        embedding_vector = np.array([
                            round(np.random.rand(), 8) for i in range(0, 300)
                        ])

        else:
            #print("WORD NOT IN DICT",word)
            embedding_vector = np.array(
                [round(np.random.rand(), 8) for i in range(0, 300)])

        if len(embedding_vector) != 0:
            embedding_matrix[i] = embedding_vector

    embedding_layer = Embedding(vocabulary_size,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                trainable=False)  #Try with True

    inputs = Input(shape=(X_train.shape[1], ))
    model = (Embedding(vocabulary_size,
                       EMBEDDING_DIM,
                       input_length=max_length_sentence,
                       weights=[embedding_matrix]))(inputs)

    model = Bidirectional(GRU(64))(
        model)  # !!!!!!! CHANGE THIS FOR OTHER MODELS
    model = (Dense(900, activation='relu'))(model)
    model = (Dense(400, activation='relu'))(model)
    model = (Dense(250, activation='relu'))(model)
    model = (Dense(204, activation='softmax'))(model)
    model = Model(inputs=inputs, outputs=model)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.summary()

    callbacks = [EarlyStopping(monitor='val_loss')]
    hist_adam = model.fit(
        X_train,
        y_train,
        batch_size=1000,
        epochs=200,
        verbose=1,
        validation_data=(X_val, y_val),
        callbacks=callbacks
    )  #!!!!!!!!!!!!!!!!!!!!!!!CHANGE BATCH SIZE TO 1000 #change epochs to 200

    model.save(config.bigru_prepocessed_dataset1_chai
               )  # !!!!!!! CHANGE THIS FOR OTHER MODELS

    y_pred = model.predict(X_val)
    print(y_pred)

    y_val_class = pd.DataFrame(y_val).idxmax(axis=1)
    print(y_val_class)

    y_val_class_argmax = np.argmax(y_val, axis=1)
    y_pred_class_argmax = np.argmax(y_pred, axis=1)

    y_pred_class = pd.DataFrame(y_pred).idxmax(axis=1)
    print(y_pred_class)

    print(classification_report(y_val_class, y_pred_class))

    plt.suptitle('Optimizer : Adam', fontsize=10)
    plt.ylabel('Loss', fontsize=16)
    plt.xlabel('Epoch', fontsize=14)
    plt.plot(hist_adam.history['loss'], color='b', label='Training Loss')
    plt.plot(hist_adam.history['val_loss'], color='r', label='Validation Loss')
    plt.legend(loc='upper right')

    plt.savefig(
        '/home/ubuntu/asset_classification/results/bigru_model_dataset1_preprocessed_chai.png'
    )  # !!!!!!! CHANGE THIS FOR OTHER MODELS

    tf.keras.utils.plot_model(
        model, to_file=config.bigru_architecture,
        show_shapes=True)  # !!!!!!! CHANGE THIS FOR OTHER MODELS

    return (y_pred, y_val_class, y_pred_class, y_val_class_argmax,
            y_pred_class_argmax)
#              input_shape=(batch_size, None, n_feat),
#              dropout=0.5,
#              name='bilstm',
#              recurrent_dropout=0.25)(inputs)

# attention layer
# model = attention_3d_block(model)

# Output FC layer
model = TimeDistributed(Dense(nb_classes, activation="softmax"))(model)

model = Model(inputs=inputs, outputs=model)
# model = multi_gpu_model(model, gpus=2)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              sample_weight_mode="temporal",
              metrics=['accuracy'])
model.summary()

# train on videos with sample weighting
# model.fit(x=X_train_m,
#           y=Y_train_,
#           validation_data=(X_vali_m, Y_vali_, M_vali[:, :, 0]),
#           epochs=nb_epoch,
#           batch_size=batch_size,
#           verbose=1,
#           # sample_weight=M_train[:, :, 0],
#           sample_weight=sample_weights,
#           callbacks=[lr_reducer, early_stopper, tensor_board, checkpointer])

Ejemplo n.º 16
0
    def main(self, glove):
        # get word embeddings
        utils = wordUtils.Utils()

        if glove:
            # use glove
            self.words_list, self.embedding_matrix = utils.load_glove()
            unword_n = len(self.words_list)

        else:
            self.words_list, self.embedding_matrix = utils.load_word2vec()
            unword_n = len(self.words_list)

        # get the training corpus
        cr = corpusreader.CorpusReader(self.textfile, self.annotfile)
        corpus = cr.trainseqs
        print(len(corpus))
        train = []
        print("Processing training data", datetime.now())
        for doc in corpus:
            tmp_dic = {}

            tmp_dic['tokens'] = doc['tokens']

            # convert SOBIE tags to numbers
            tags = doc['bio']
            tags = [self.lablist[i] for i in tags]
            tmp_dic['bion'] = tags
            train.append(tmp_dic)


        n_emb = 0
        n_unk = 0

        # get the number of the embedding
        for idx in range(len(train)):
            words = train[idx]['tokens']
            words_id = []
            for i in words:
                # get the number of the embedding
                try:
                    # the index of the word in the embedding matrix
                    index = self.words_list.index(i)
                    n_emb = n_emb + 1
                except ValueError:
                    # use the embedding full of zeros to identify an unknown word
                    n_unk = n_unk + 1
                    index = unword_n

                # the index of the word in the embedding matrix
                words_id.append(index)

            train[idx]['tokens'] = words_id


        # get all sizes from the sequences with training data
        train_l_d = {}
        train_l_labels = {}
        for seq in train:
            # corpus
            l = len(seq['tokens'])
            if l not in train_l_d: train_l_d[l] = []
            train_l_d[l].append(seq['tokens'])

            # labels
            l1 = len(seq['bion'])
            if l1 not in train_l_labels: train_l_labels[l1] = []
            train_l_labels[l1].append(seq['bion'])

        sizes = list(train_l_d.keys())
        for i in sizes:
            if len(train_l_d[i]) != len(train_l_labels[i]):
                print("merda")

            for m in range(len(train_l_d[i])):
                if len(train_l_d[i][m]) != len(train_l_labels[i][m]):
                    print("XXX")

        input = Input(shape=(None,))
        el = Embedding(len(self.words_list) + 1, 200, weights=[self.embedding_matrix], trainable=False)(input)
        model = Bidirectional(LSTM(units=50, return_sequences=True, recurrent_dropout = 0.1))(el)  # variational biLSTM
        model = TimeDistributed(Dense(50, activation="relu"))(model)  # a dense layer as suggested by neuralNer
        crf = CRF(self.lab_len)  # CRF layer
        out = crf(model)  # output

        model = Model(input, out)
        model.compile(optimizer="rmsprop", loss=crf.loss_function, metrics=[crf.accuracy])
        model.summary()

        f_best = -1
        f_index = -1
        # OK, start actually training
        for epoch in range(self.epochsN):
            print("Epoch", epoch, "start at", datetime.now())
            # Train in batches of different sizes - randomize the order of sizes
            # Except for the first few epochs
            if epoch > 2:
                random.shuffle(sizes)
            for size in sizes:
                batch = train_l_d[size]
                labs = train_l_labels[size]

                tx = np.array([seq for seq in batch])
                y = [seq for seq in labs]

                ty = [to_categorical(i, num_classes=self.lab_len) for i in y]

                # This trains in mini-batches
                model.fit(tx, np.array(ty), verbose=0, epochs=1)
            print("Trained at", datetime.now())

            # save all epochs
            save_load_utils.save_all_weights(model, 'words-results/epoch_%s.h5' % epoch)
            # test the results
            test_data = 'corpus_char/tmVarCorpus/treated/test_data.txt'
            test_labels = 'corpus_char/tmVarCorpus/treated/test_labels.tsv'
            self.test_model(test_data, test_labels, model, glove)
            f = self.eval()

            if f > f_best:
                f_best = f
                f_index = epoch


        # Pick the best model, and save it with a useful name
        print("Choosing the best epoch")
        shutil.copyfile("words-results/epoch_%s.h5" % f_index, "words_glove_%s.h5" % f_index)
Ejemplo n.º 17
0
# pu.db
# Y = list(df("X"))

input = Input(shape=(110, 100))
model = Bidirectional(
    LSTM(units=50, return_sequences=True,
         recurrent_dropout=0.1))(input)  # variational biLSTM
model = TimeDistributed(Dense(50, activation="relu"))(
    model)  # a dense layer as suggested by neuralNer
crf = CRF(1)  # CRF layer
out = crf(model)  # output

model = Model(input, out)

model.compile(optimizer="rmsprop", loss=crf.loss_function)
model.summary()
# pu.db

Y = keras.utils.to_categorical(Y, num_classes=110)
Y = Y.reshape((Y.shape[0], Y.shape[1], 1))

all_train = all[:int(0.8 * all.shape[0]), ...]
Y_train = Y[:int(0.8 * all.shape[0]), ...]
Y_train_dense = np.reshape(Y_train, (Y_train.shape[0], Y_train.shape[1]))
Y_train_dense = np.argmax(Y_train_dense, axis=-1)

all_test = all[int(0.8 * all.shape[0]):, ...]
Y_test = Y[int(0.8 * all.shape[0]):, ...]
# pu.db
Y_test_dense = np.reshape(Y_test, (Y_test.shape[0], Y_test.shape[1]))