x_train, y_train, x_test, y_test, word2index = data_helpers.preprocess() max_features = len(word2index) max_len = max(len(x) for x in x_train) print(max_len) print('Pad sequences...') x_train = sequence.pad_sequences(x_train, maxlen=max_len, value=0) x_test = sequence.pad_sequences(x_test, maxlen=max_len, value=0) print('Build model...') model = TextCNN(max_len, embedding_dim, batch_size=batch_size, class_num=2, max_features=max_features, epochs=epochs) print('Train...') model.fit(x_train, x_test, y_train, y_test) print('Test...') result = model.predict(x_test) result = np.argmax(np.array(result), axis=1) y_test = np.argmax(np.array(y_test), axis=1) print('f1:', f1_score(y_test, result, average='macro')) print('accuracy:', accuracy_score(y_test, result)) print('classification report:\n', classification_report(y_test, result)) print('confusion matrix:\n', confusion_matrix(y_test, result))
logger.info('loading data...') try: (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) except: logger.info('np bug occur...') (x_train, y_train), (x_test, y_test) = load_data(num_words=max_features) logger.info('train data length: {}'.format(len(x_train))) logger.info('test data length: {}'.format(len(x_test))) logger.info('padding data...') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) logger.info('build model...') model = TextCNN(max_features=max_features, maxlen=maxlen, emb_dim=emb_dim).build_model() logger.info('training...') earlystop = EarlyStopping(patience=3, mode='max', monitor='val_acc') model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[earlystop], validation_data=(x_test, y_test)) logger.info('test...') pred = model.predict(x_test[:10]) logger.info(list(zip(pred, y_test[:10])))