Exemple #1
0
def retrain_models(username):
	train_x, train_y, body_x, body_y, head_x, head_y = model_retriever.retrieve_data_db(username)

	b_train_x = []
	b_train_y = numpy.concatenate([body_y, train_y])

	for msg in (body_x + train_x):
		b_train_x.append(extract_body_features(msg))

	body_vec = TfidfVectorizer(norm="l2")
	b_train_x = body_vec.fit_transform(b_train_x)

	h_train_x = []
	h_train_y = numpy.concatenate([head_y, train_y])

	for msg in (head_x + train_x):
		h_train_x.append(extract_header_features(msg))

	head_vec = DictVectorizer()
	h_train_x = head_vec.fit_transform(h_train_x)

	body_model = LinearSVC(loss='l2', penalty="l2", dual=False, tol=1e-3)
	head_model = RidgeClassifier(tol=1e-2, solver="lsqr")

	body_model.fit(b_train_x, b_train_y)
	head_model.fit(h_train_x, h_train_y)

        print("Finished training models for "+username+"...")

	store_models(username, body_vec, body_model, head_vec, head_model)
Exemple #2
0
def classify(msg, username):
	body_vec, body_model, head_vec, head_model = retrieve_models(username)

	body_feat = extract_body_features(msg)
	body_feat = body_vec.transform(body_feat)

	head_feat = extract_header_features(msg)
	head_feat = head_vec.transform(head_feat)

	body_pred = body_model.predict(body_feat)
	head_pred = head_model.predict(head_feat)

	if body_pred[0] == head_pred[0]:
		return body_pred[0]
	else:
		return head_pred[0]