Esempio n. 1
0
x_train, y_train, x_test, y_test, word2index = data_helpers.preprocess()
max_features = len(word2index)

max_len = max(len(x) for x in x_train)
print(max_len)

print('Pad sequences...')
x_train = sequence.pad_sequences(x_train, maxlen=max_len, value=0)
x_test = sequence.pad_sequences(x_test, maxlen=max_len, value=0)

print('Build model...')
model = TextCNN(max_len,
                embedding_dim,
                batch_size=batch_size,
                class_num=2,
                max_features=max_features,
                epochs=epochs)

print('Train...')
model.fit(x_train, x_test, y_train, y_test)

print('Test...')
result = model.predict(x_test)
result = np.argmax(np.array(result), axis=1)
y_test = np.argmax(np.array(y_test), axis=1)

print('f1:', f1_score(y_test, result, average='macro'))
print('accuracy:', accuracy_score(y_test, result))
print('classification report:\n', classification_report(y_test, result))
print('confusion matrix:\n', confusion_matrix(y_test, result))
Esempio n. 2
0
logger.info('loading data...')
try:
    (x_train, y_train), (x_test,
                         y_test) = imdb.load_data(num_words=max_features)
except:
    logger.info('np bug occur...')
    (x_train, y_train), (x_test, y_test) = load_data(num_words=max_features)
logger.info('train data length: {}'.format(len(x_train)))
logger.info('test data length: {}'.format(len(x_test)))

logger.info('padding data...')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

logger.info('build model...')
model = TextCNN(max_features=max_features, maxlen=maxlen,
                emb_dim=emb_dim).build_model()

logger.info('training...')
earlystop = EarlyStopping(patience=3, mode='max', monitor='val_acc')
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
model.fit(x_train,
          y_train,
          batch_size=batch_size,
          epochs=epochs,
          callbacks=[earlystop],
          validation_data=(x_test, y_test))

logger.info('test...')
pred = model.predict(x_test[:10])
logger.info(list(zip(pred, y_test[:10])))