def test_BayesianClassifier():
    """
    Trains Bayesian Classifier on test data and then tests it
    """
    train_X, train_y = process_train_data("train.csv")
    classifier = BayesianClassifier()
    classifier.fit(train_X, train_y)
    test_data = pd \
                .read_csv("test.csv", encoding="utf8") \
                .drop(labels=["id", "Unnamed: 0"], axis=1)
    test_X = test_data.drop("label", axis=1)
    test_y = test_data.drop("tweet", axis=1)

    print("model score: ", classifier.score(test_X, test_y) * 100, "%")
Example #2
0
    for index, i in data[['text']].iterrows():
        t = i['text'].lower().translate(
            str.maketrans('', '', string.punctuation)).strip().split()
        for j in t:
            if j not in stop_words:
                lst_for_rep.append(j)
        data.at[index, 'text'] = lst_for_rep
        lst_for_rep = []
    return data


def read_stop_words():
    """
    Reads file and returns list with words from file.
    """
    lst = []
    with open('authors/stop_words.txt', 'r') as file:
        reader = csv.reader(file)
        for row in reader:
            lst += row
    return lst


if __name__ == "__main__":
    train = process_data("authors/train.csv")
    test = process_data("authors/test.csv")

    classifier = BayesianClassifier()
    classifier.fit(train)
    print(f"Model score: {classifier.score(test)}%")