Beispiel #1
0
def learn():

    stoplist = makeStoplist()
    features = extractFeaturesFromFile(stoplist=stoplist)
    vectorizer = TfidfVectorizer(encoding=ENCODING)
    X_train = vectorizer.fit_transform(
        [" ".join(feature[1:]) for feature in features])
    y_train = np.zeros(len(features))
    for i in range(len(features)):
        if features[i][0] == "+1":
            y_train[i] = 1
    clf = LogisticRegression()
    clf.fit(X_train, y_train)

    io.savemat("X_train", {"X_train": X_train})
    np.save("y_train", y_train)
    joblib.dump(vectorizer, "tfidf.vec")
    clf.save("logreg")