Example #1
0
    def train_charcnn(self, excel_dir, checkpoint_path=None):
        train_dict, test_dict = data_generator(excel_dir)
        optimizer = Adam()
        self.charcnn_model.compile(loss='binary_crossentropy',
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        if checkpoint_path and os.path.exists(checkpoint_path):
            self.charcnn_model.load_weights(checkpoint_path)
            print("Load model weights...")

        checkPointer = ModelCheckpoint(filepath=self.charcnnCheckpointPath,
                                       monitor='val_acc',
                                       mode='max',
                                       verbose=1,
                                       save_best_only=True)
        tensorboard = TensorBoard(log_dir=self.logDir)

        x_train = train_dict['char_idx_inputs']
        y_train = train_dict['char_labels']
        x_test = test_dict['char_idx_inputs']
        y_test = test_dict['char_labels']

        self.charcnn_model.fit(x_train,
                               y_train,
                               batch_size=Config.trainingConfig.batchSize,
                               validation_data=[x_test, y_test],
                               epochs=Config.trainingConfig.epoches,
                               callbacks=[checkPointer, tensorboard])
Example #2
0
 def predict_lstm(self, excel_dir, checkpoint_path):
     if checkpoint_path and os.path.exists(checkpoint_path):
         self.bilstm_model.load_weights(checkpoint_path)
         print("Load model weights...")
     train_dict, test_dict = data_generator(excel_dir)
     res = self.bilstm_model.predict_on_batch(test_dict['char_idx_inputs'])
     for i in range(len(res)):
         print(test_dict['char_inputs'][i], res[i],
               test_dict['char_labels'][i])
Example #3
0
    def train(self, excel_dir, is_enhance_pos=True):
        train_dict, test_dict = data_generator(excel_dir, train_rate=0.95)

        if is_enhance_pos:
            train_dict = my_smote(train_dict)

        self.merge_model.compile(optimizer='adam',
                                 loss={
                                     'class_pred': 'binary_crossentropy',
                                     'res_pred': 'binary_crossentropy'
                                 },
                                 metrics=['accuracy'],
                                 loss_weights={
                                     'class_pred': 0.5,
                                     'res_pred': 5.0
                                 })

        checkPointer = ModelCheckpoint(filepath=self.checkpointPath,
                                       monitor='val_res_pred_acc',
                                       mode='max',
                                       verbose=1,
                                       save_best_only=True,
                                       save_weights_only=False
                                       #period=5
                                       )
        tensorboard = TensorBoard(log_dir=self.logDir)

        self.merge_model.fit(
            {
                'char_input': train_dict['char_idx_inputs'],
                'loc_input': train_dict['loc_inputs']
            }, {
                'class_pred': train_dict['char_labels'],
                'res_pred': train_dict['res_labels']
            },
            shuffle=True,
            epochs=Config.trainingConfig.epoches,
            batch_size=Config.trainingConfig.batchSize,
            validation_data=({
                'char_input': test_dict['char_idx_inputs'],
                'loc_input': test_dict['loc_inputs']
            }, {
                'class_pred': test_dict['char_labels'],
                'res_pred': test_dict['res_labels']
            }),
            callbacks=[checkPointer, tensorboard])
Example #4
0
 def predict_merge(self, excel_dir, checkpoint_path):
     if checkpoint_path and os.path.exists(checkpoint_path):
         self.merge_model.load_weights(checkpoint_path)
         print("Load model weights...")
     train_dict, test_dict = data_generator(excel_dir)
     res = self.merge_model.predict_on_batch({
         'char_input':
         test_dict['char_idx_inputs'],
         'loc_input':
         test_dict['loc_inputs']
     })
     print(len(res), len(res[0]))
     count = 0
     for i in range(len(res[1])):
         if round(res[1][i][0]) == int(test_dict['res_labels'][i]):
             count += 1
         print(test_dict['char_inputs'][i], res[0][i], round(res[1][i][0]),
               test_dict['res_labels'][i])
     print(count)
Example #5
0
def train_test(epochs, batch_size):
    file_dir = "F:\\data\\machine_learning\\THUCNews\\THUCNews"
    paths = get_all_apths(file_dir)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    words, corpus, id2word, word2id = get_corpus(paths)

    x, y, z = data_generator(corpus, word2id, id2word)
    print(x.shape, y.shape, z.shape)
    train_dataloader, test_dataloader = get_train_test_dataloader(
        x, y, z, batch_size=batch_size)
    loss_fun = CrossEntropyLoss()
    cbow = Word2VecCBOW(window, id2word, nb_negative, word_size)
    cbow.to(device)
    optimizer = SGD(cbow.parameters(), lr=0.01)

    print("------开始训练------:", device)
    for epoch in range(1, epochs + 1):
        train_loss = train(cbow, train_dataloader, device, optimizer, loss_fun)
        test_loss = test(cbow, test_dataloader, device, loss_fun)
        print("epoch %d, train loss: %.2f, test loss:%.2f" %
              (epoch, train_loss, test_loss))

    torch.save(cbow, "../models/cbow_w2v.pkl")
            [softmax_weights, input_vecs_sum])  # shape=(,nb_negative + 1,1)

    add_biases = keras.layers.Lambda(
        lambda x: tf.reshape(x[0] + x[1], shape=(-1, nb_negative + 1)))(
            [input_vecs_sum_dot, softmax_biases])
    softmax = keras.layers.Lambda(lambda x: tf.nn.softmax(x))(add_biases)

    # 模型编译
    model = keras.layers.Model(inputs=[input_words, samples], outputs=softmax)
    model.compile(loss="categorical_crossentropy",
                  optimizer="adam",
                  metrics=["accuracy"])
    print(model.summary())
    return model


if __name__ == '__main__':
    file_dir = "F:\\data\\machine_learning\\THUCNews\\THUCNews"
    paths = get_all_apths(file_dir)
    print(len(paths), paths[0:10])

    words, corpus, id2word, word2id = get_corpus(paths)

    # print(words)
    # print(id2word)
    x, y, z = data_generator(corpus, word2id, id2word)
    print(x.shape, y.shape, z.shape)

    model = build_model()
    model.fit([x, y], z, epochs=nb_epoch, batch_size=512)