def retrain_models(username): train_x, train_y, body_x, body_y, head_x, head_y = model_retriever.retrieve_data_db(username) b_train_x = [] b_train_y = numpy.concatenate([body_y, train_y]) for msg in (body_x + train_x): b_train_x.append(extract_body_features(msg)) body_vec = TfidfVectorizer(norm="l2") b_train_x = body_vec.fit_transform(b_train_x) h_train_x = [] h_train_y = numpy.concatenate([head_y, train_y]) for msg in (head_x + train_x): h_train_x.append(extract_header_features(msg)) head_vec = DictVectorizer() h_train_x = head_vec.fit_transform(h_train_x) body_model = LinearSVC(loss='l2', penalty="l2", dual=False, tol=1e-3) head_model = RidgeClassifier(tol=1e-2, solver="lsqr") body_model.fit(b_train_x, b_train_y) head_model.fit(h_train_x, h_train_y) print("Finished training models for "+username+"...") store_models(username, body_vec, body_model, head_vec, head_model)
def classify(msg, username): body_vec, body_model, head_vec, head_model = retrieve_models(username) body_feat = extract_body_features(msg) body_feat = body_vec.transform(body_feat) head_feat = extract_header_features(msg) head_feat = head_vec.transform(head_feat) body_pred = body_model.predict(body_feat) head_pred = head_model.predict(head_feat) if body_pred[0] == head_pred[0]: return body_pred[0] else: return head_pred[0]