예제 #1
0
def evaluateModel(model, trainPath, labelsPath, preprocesser=sparseBagOfWords):
	"""
	Evaluate the f1-score of a model.

	Parameters:	
	-----------
		model: Class with fit and predict methods.
		trainPath (str): The path of the pickle of the training examples.
		labelsPath (str): The path of the pickle of the training labels.
		preprocesser (func): Function used to transform the list of sequences into a matrix.
	"""
	sequences = np.array(openPickle(trainPath))
	labels = toBoolList(openPickle(labelsPath))
	return evaluateModel_(model, sequences, labels, preprocesser=preprocesser)
예제 #2
0
def getPredictions(model, trainPath, labelsPath, testPath, preprocesser=sparseBagOfWords):
	"""
	Train a model and predict a testSet.

	Parameters:
	-----------
		model: class with fit and predict methods
		trainPath (str): The path of the pickle of the training examples.
		labelsPath (str): The path of the pickle of the training labels.
		testPath (str): The path of the pickle of the testing examples.
		preprocesser (func): Function used to transform  the list of sequences into a matrix.
	"""
	sequences = np.array(openPickle(trainPath))
	labels = toBoolList(openPickle(labelsPath))

	trainSeq = preprocesser(sequences)
	n_features = trainSeq.shape[1]

	model.fit(trainSeq, labels)

	testSeq = openPickle(testPath)
	return model.predict(preprocesser(testSeq, shape=(len(testSeq), n_features)))
예제 #3
0
# coding: utf-8

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import f1_score
from utils import openPickle
from preprocessing import toBoolList, getTrainTest

cv = CountVectorizer(ngram_range=(1, 2))

X = cv.fit_transform(openPickle("./Data/Learn/sentences.pkl"))

labels = toBoolList(openPickle("./Data/Learn/labels.pkl"))

trainInd, testInd = getTrainTest(labels)

X_train, X_test, y_train, y_test = X[trainInd], X[testInd], labels[
    trainInd], labels[testInd]

model = MultinomialNB(alpha=0.01)

model.fit(X_train, y_train)

trainScore = f1_score(y_train, model.predict(X_train))
testScore = f1_score(y_test, model.predict(X_test))

print("Training f1 score: %.4f" % trainScore)
print("Testing f1 score: %.4f" % testScore)
예제 #4
0
import numpy as np
import os
import word2vec

from keras.models import Model, load_model
from keras.layers import Dense, LSTM, Input
from keras.layers.embeddings import Embedding
from keras.callbacks import ModelCheckpoint

from preprocessing import embeddingMatrix, preprocessDeepModel, toBoolList, getTrainTest
from utils import openPickle

paddedSeq = preprocessDeepModel("./Data/Learn/correctedSequences.pkl",
                                "./Data/Learn/kerasSequences.pkl")

labels = np.array(toBoolList(
    openPickle("./Data/Learn/labels.pkl"))).astype(int)

print('Shape of data tensor:', paddedSeq.shape)
print('Shape of label tensor:', labels.shape)

trainInd, testInd = getTrainTest(labels)

X_train, X_val = paddedSeq[trainInd], paddedSeq[testInd]
y_train, y_val = labels[trainInd], labels[testInd]

w2v = word2vec.load(
    "./Resources/frWac_non_lem_no_postag_no_phrase_200_cbow_cut100.bin")

encoder = openPickle("./Data/newDict.pkl")
decoder = {encoder[key]: key for key in encoder}