def evaluateModel(model, trainPath, labelsPath, preprocesser=sparseBagOfWords): """ Evaluate the f1-score of a model. Parameters: ----------- model: Class with fit and predict methods. trainPath (str): The path of the pickle of the training examples. labelsPath (str): The path of the pickle of the training labels. preprocesser (func): Function used to transform the list of sequences into a matrix. """ sequences = np.array(openPickle(trainPath)) labels = toBoolList(openPickle(labelsPath)) return evaluateModel_(model, sequences, labels, preprocesser=preprocesser)
def getPredictions(model, trainPath, labelsPath, testPath, preprocesser=sparseBagOfWords): """ Train a model and predict a testSet. Parameters: ----------- model: class with fit and predict methods trainPath (str): The path of the pickle of the training examples. labelsPath (str): The path of the pickle of the training labels. testPath (str): The path of the pickle of the testing examples. preprocesser (func): Function used to transform the list of sequences into a matrix. """ sequences = np.array(openPickle(trainPath)) labels = toBoolList(openPickle(labelsPath)) trainSeq = preprocesser(sequences) n_features = trainSeq.shape[1] model.fit(trainSeq, labels) testSeq = openPickle(testPath) return model.predict(preprocesser(testSeq, shape=(len(testSeq), n_features)))
# coding: utf-8 from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.metrics import f1_score from utils import openPickle from preprocessing import toBoolList, getTrainTest cv = CountVectorizer(ngram_range=(1, 2)) X = cv.fit_transform(openPickle("./Data/Learn/sentences.pkl")) labels = toBoolList(openPickle("./Data/Learn/labels.pkl")) trainInd, testInd = getTrainTest(labels) X_train, X_test, y_train, y_test = X[trainInd], X[testInd], labels[ trainInd], labels[testInd] model = MultinomialNB(alpha=0.01) model.fit(X_train, y_train) trainScore = f1_score(y_train, model.predict(X_train)) testScore = f1_score(y_test, model.predict(X_test)) print("Training f1 score: %.4f" % trainScore) print("Testing f1 score: %.4f" % testScore)
import numpy as np import os import word2vec from keras.models import Model, load_model from keras.layers import Dense, LSTM, Input from keras.layers.embeddings import Embedding from keras.callbacks import ModelCheckpoint from preprocessing import embeddingMatrix, preprocessDeepModel, toBoolList, getTrainTest from utils import openPickle paddedSeq = preprocessDeepModel("./Data/Learn/correctedSequences.pkl", "./Data/Learn/kerasSequences.pkl") labels = np.array(toBoolList( openPickle("./Data/Learn/labels.pkl"))).astype(int) print('Shape of data tensor:', paddedSeq.shape) print('Shape of label tensor:', labels.shape) trainInd, testInd = getTrainTest(labels) X_train, X_val = paddedSeq[trainInd], paddedSeq[testInd] y_train, y_val = labels[trainInd], labels[testInd] w2v = word2vec.load( "./Resources/frWac_non_lem_no_postag_no_phrase_200_cbow_cut100.bin") encoder = openPickle("./Data/newDict.pkl") decoder = {encoder[key]: key for key in encoder}