예제 #1
0
def main():
    # Directory Setting
    train_dir = "./data/multi_train.csv"
    test_dir = "./data/multi_test.csv"
    model_dir = "./model_save"
    embedding_dir = "./glove.6B.50d.txt"

    # HyperParameter
    epoch = 1
    batch = 256
    embedding_dim = 50
    target_names = ['0', '1', '2', '3']

    # Flow
    print("0. Setting Environment")
    set_env()

    print("1. load data")
    train_x, train_y, test_x, test_y, val_x, val_y = load_data(
        train_dir, test_dir, len(target_names))

    print("2. pre processing")
    train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x)

    print("3. text to vector")
    embedding_matrix = text_to_vector(tokenizer.word_index,
                                      embedding_dir,
                                      word_dimension=embedding_dim)

    print("4. build model")
    model = TextCNN(sequence_len=train_x.shape[1],
                    embedding_matrix=embedding_matrix,
                    embedding_dim=embedding_dim,
                    filter_sizes=[3, 4, 5],
                    flag="pre_training",
                    data_type="multi",
                    category_num=len(target_names))
    model.compile(optimizer="adam",
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    callbacks = create_callbacks(model_dir)

    model.fit(x=train_x,
              y=train_y,
              epochs=epoch,
              batch_size=batch,
              validation_data=(val_x, val_y),
              callbacks=callbacks)

    print("5. evaluation")
    evaluation = Evaluation(model, test_x, test_y)
    accuracy, cf_matrix, report = evaluation.eval_classification(
        data_type="multi")
    print("## Target Names : ", target_names)
    print("## Classification Report \n", report)
    print("## Confusion Matrix \n", cf_matrix)
    print("## Accuracy \n", accuracy)
def main():
    # Directory Setting
    train_dir = "../data/binary_train.csv"
    test_dir = "../data/binary_test.csv"
    model_dir = "./model_save"

    # HyperParameter
    epoch = 2
    batch = 256

    # Flow
    print("0. Setting Environment")
    set_env()

    print("1. load data")
    train_x, train_y, test_x, test_y, val_x, val_y = load_data(
        train_dir, test_dir)

    print("2. pre processing")
    train_x, test_x, val_x, tokenizer = pre_processing(train_x, test_x, val_x)

    print("3. build model")
    model = TextCNN(sequence_len=train_x.shape[1],
                    embedding_matrix=len(tokenizer.word_index) + 1,
                    embedding_dim=300,
                    filter_sizes=[3, 4, 5],
                    flag="self_training",
                    data_type="binary")
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    callbacks = create_callbacks(model_dir)
    model.fit(x=train_x,
              y=train_y,
              epochs=epoch,
              batch_size=batch,
              validation_data=(val_x, val_y),
              callbacks=callbacks)

    print("4. evaluation")
    evaluation = Evaluation(model, test_x, test_y)
    accuracy, cf_matrix, report = evaluation.eval_classification(
        data_type="binary")
    print("## Classification Report \n", report)
    print("## Confusion Matrix \n", cf_matrix)
    print("## Accuracy \n", accuracy)
예제 #3
0
 def __init__(self, rnn_type, **kwargs):
     self.rnn_type = rnn_type
     TextCNN.__init__(self, **kwargs)
예제 #4
0
파일: core.py 프로젝트: wroscoe/gradest
maxlen = 40
batch_size = 16
embedding_dims = 20
epochs = 1

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)...')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('Build model...')
model = TextCNN(maxlen, max_features, embedding_dims)
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

print('Train...')
early_stopping = EarlyStopping(monitor='val_accuracy', patience=3, mode='max')
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=[early_stopping],
          validation_data=(x_test, y_test))

print('Test...')
result = model.predict(x_test)
예제 #5
0
                          batch_size=BATCH_SIZE,
                          collate_fn=generate_batch)
valid_data = _create_dataset(VALID_PATH)
valid_loader = DataLoader(valid_data,
                          batch_size=BATCH_SIZE,
                          collate_fn=generate_batch)
# test_data = _create_dataset(TEST_PATH)
# test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, collate_fn=generate_batch)

model_input = InputSpec([None, MAX_SEQUENCE_LENGTH], 'int64', 'input')
model_label = InputSpec([None, 1], 'int64', 'label')

network = TextCNN(vocab_size=VOCAB_SIZE,
                  embed_dim=EMBED_DIM,
                  num_class=NUM_CLASS,
                  kernel_num=KERNEL_NUM,
                  kernel_sizes=KERNEL_SIZES,
                  dropout=DROPOUT,
                  embeddings=embeddings)

# Model 方式训练
# model = paddle.Model(network, inputs=model_input, labels=model_label)
# loss_fn = paddle.nn.CrossEntropyLoss()
# optimizer = paddle.optimizer.SGD(learning_rate=LR, parameters=model.parameters())
# metrics = [
#     paddle.metric.Accuracy()
# ]
# model.prepare(optimizer=optimizer, loss=loss_fn, metrics=metrics)
# model.fit(train_data=train_loader, eval_data=valid_loader, epochs=NUM_EPOCHS, verbose=1)

# model.save('saved/checkpoint')
예제 #6
0
}
feature_loader = FeatureLoader(**data_params)

param = {
    'kernel_size': [3, 5, 7],
    'batch_size': 32,
    'epochs': 100,
    'loss': 'categorical_crossentropy',
    'embedding_dim': 100,
    'user_num': len(user2idx),
    'max_ngram_len': max_ngram_len,
    'feature_num': 300,
    'vocab_size': len(ngram2idx)
}
#
#
x, y = feature_loader.load_n_gram_idx_feature_label(reviews)

training_split = int(0.8 * x.shape[0])
training_x, training_y = x[:training_split, :], y[:training_split]
testing_x, testing_y = x[training_split:, ], y[training_split:]

model = TextCNN(**param)
model.fit(training_x, training_y)
model.save_weight(ku.CNN_AST_model)
model.load_weight(ku.CNN_AST_model)
res = model.evaluate(testing_x, testing_y)
testing_loss = res[0]
testing_acc = res[1]
print('testing_loss: {}, testing_acc: {}'.format(testing_loss, testing_acc))