def main(): # Directory Setting train_dir = "./data/multi_train.csv" test_dir = "./data/multi_test.csv" model_dir = "./model_save" embedding_dir = "./glove.6B.50d.txt" # HyperParameter epoch = 1 batch = 256 embedding_dim = 50 target_names = ['0', '1', '2', '3'] # Flow print("0. Setting Environment") set_env() print("1. load data") train_x, train_y, test_x, test_y, val_x, val_y = load_data( train_dir, test_dir, len(target_names)) print("2. pre processing") train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x) print("3. text to vector") embedding_matrix = text_to_vector(tokenizer.word_index, embedding_dir, word_dimension=embedding_dim) print("4. build model") model = TextCNN(sequence_len=train_x.shape[1], embedding_matrix=embedding_matrix, embedding_dim=embedding_dim, filter_sizes=[3, 4, 5], flag="pre_training", data_type="multi", category_num=len(target_names)) model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy']) callbacks = create_callbacks(model_dir) model.fit(x=train_x, y=train_y, epochs=epoch, batch_size=batch, validation_data=(val_x, val_y), callbacks=callbacks) print("5. evaluation") evaluation = Evaluation(model, test_x, test_y) accuracy, cf_matrix, report = evaluation.eval_classification( data_type="multi") print("## Target Names : ", target_names) print("## Classification Report \n", report) print("## Confusion Matrix \n", cf_matrix) print("## Accuracy \n", accuracy)
def main(): # Directory Setting train_dir = "../data/binary_train.csv" test_dir = "../data/binary_test.csv" model_dir = "./model_save" # HyperParameter epoch = 2 batch = 256 # Flow print("0. Setting Environment") set_env() print("1. load data") train_x, train_y, test_x, test_y, val_x, val_y = load_data( train_dir, test_dir) print("2. pre processing") train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x) print("3. build model") model = TextCNN(sequence_len=train_x.shape[1], embedding_matrix=len(tokenizer.word_index) + 1, embedding_dim=300, filter_sizes=[3, 4, 5], flag="self_training", data_type="binary") model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) callbacks = create_callbacks(model_dir) model.fit(x=train_x, y=train_y, epochs=epoch, batch_size=batch, validation_data=(val_x, val_y), callbacks=callbacks) print("4. evaluation") evaluation = Evaluation(model, test_x, test_y) accuracy, cf_matrix, report = evaluation.eval_classification( data_type="binary") print("## Classification Report \n", report) print("## Confusion Matrix \n", cf_matrix) print("## Accuracy \n", accuracy)
def __init__(self, rnn_type, **kwargs): self.rnn_type = rnn_type TextCNN.__init__(self, **kwargs)
maxlen = 40 batch_size = 16 embedding_dims = 20 epochs = 1 print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)...') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) print('Build model...') model = TextCNN(maxlen, max_features, embedding_dims) model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) print('Train...') early_stopping = EarlyStopping(monitor='val_accuracy', patience=3, mode='max') model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[early_stopping], validation_data=(x_test, y_test)) print('Test...') result = model.predict(x_test)
batch_size=BATCH_SIZE, collate_fn=generate_batch) valid_data = _create_dataset(VALID_PATH) valid_loader = DataLoader(valid_data, batch_size=BATCH_SIZE, collate_fn=generate_batch) # test_data = _create_dataset(TEST_PATH) # test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, collate_fn=generate_batch) model_input = InputSpec([None, MAX_SEQUENCE_LENGTH], 'int64', 'input') model_label = InputSpec([None, 1], 'int64', 'label') network = TextCNN(vocab_size=VOCAB_SIZE, embed_dim=EMBED_DIM, num_class=NUM_CLASS, kernel_num=KERNEL_NUM, kernel_sizes=KERNEL_SIZES, dropout=DROPOUT, embeddings=embeddings) # Model 方式训练 # model = paddle.Model(network, inputs=model_input, labels=model_label) # loss_fn = paddle.nn.CrossEntropyLoss() # optimizer = paddle.optimizer.SGD(learning_rate=LR, parameters=model.parameters()) # metrics = [ # paddle.metric.Accuracy() # ] # model.prepare(optimizer=optimizer, loss=loss_fn, metrics=metrics) # model.fit(train_data=train_loader, eval_data=valid_loader, epochs=NUM_EPOCHS, verbose=1) # model.save('saved/checkpoint')
} feature_loader = FeatureLoader(**data_params) param = { 'kernel_size': [3, 5, 7], 'batch_size': 32, 'epochs': 100, 'loss': 'categorical_crossentropy', 'embedding_dim': 100, 'user_num': len(user2idx), 'max_ngram_len': max_ngram_len, 'feature_num': 300, 'vocab_size': len(ngram2idx) } # # x, y = feature_loader.load_n_gram_idx_feature_label(reviews) training_split = int(0.8 * x.shape[0]) training_x, training_y = x[:training_split, :], y[:training_split] testing_x, testing_y = x[training_split:, ], y[training_split:] model = TextCNN(**param) model.fit(training_x, training_y) model.save_weight(ku.CNN_AST_model) model.load_weight(ku.CNN_AST_model) res = model.evaluate(testing_x, testing_y) testing_loss = res[0] testing_acc = res[1] print('testing_loss: {}, testing_acc: {}'.format(testing_loss, testing_acc))