Esempio n. 1
0
from sklearn.datasets import fetch_20newsgroups
import helper as hlp
import taskd as td
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
import sklearn.metrics as smet
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt

categories = hlp.fetch_categories()
twenty_train, twenty_test = hlp.fetch_data()
hlp.classify_into_two_class(twenty_train)
hlp.classify_into_two_class(twenty_test)

svdListTrain = td.getsvdListTrain()
nmfListTrain = td.getnmfListTrain()
svdListTest = td.getsvdListTest()
nmfListTest = td.getnmfListTest()
classifier = LogisticRegression(C=10000)


def classifyLR(train, test):
    classifier.fit(train, twenty_train.target)
    predicted = classifier.predict(test)
    predicted_probs = classifier.predict_proba(test)
    hlp.getStats(twenty_test.target, predicted)
    hlp.plot_roc(twenty_test.target, predicted_probs[:, 1],
                 'Logistic Regression')


for min_df in [2, 5]:
Esempio n. 2
0
import helper as hlp
import task1 as t1

dataset = hlp.fetch_data()
hlp.classify_into_two_class(dataset)
labels = hlp.fetch_labels(dataset)

tfidf_matrix = t1.getTFIDF_matrix(dataset, 3)
km = hlp.getKmeans(2)
km.fit(tfidf_matrix)
hlp.getStats(labels, km.labels_)