Example #1
0
def test(clf):
    global labels, pres, recall, f1score

    y_pred = clf.predict(docs_test)
    p, r, f1, s = precision_recall_fscore_support(y_test, y_pred)

    labels.append(str(clf))

    res[0].append(np.mean(p))
    res[1].append(np.mean(r))
    res[2].append(np.mean(f1))
    res[3].append(np.mean(y_pred == y_test))

    conf_arr = stat.confusion_matrix(y_test, y_pred)

    norm_conf = []
    for i in conf_arr:
            a = 0
            tmp_arr = []
            a = sum(i,0)
            for j in i:
                    tmp_arr.append(float(j)/float(a))
            norm_conf.append(tmp_arr)

    return conf_arr, norm_conf
Example #2
0
def test(clf):
    global labels, res

    y_pred = clf.predict(docs_test)
    p, r, f1, s = precision_recall_fscore_support(y_test, y_pred)

    labels.append(str(clf))

    res[0].append(np.mean(p))
    res[1].append(np.mean(r))
    res[2].append(np.mean(f1))
    res[3].append(np.mean(y_pred == y_test))

    conf_arr = stat.confusion_matrix(y_test, y_pred)

    norm_conf = []
    for z in conf_arr:
        a = sum(z, 0)
        norm_conf.append([float(k)/float(a) for k in z])

    plt.clf()
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.title('Confusion Matrix for %s' % str(clf))
    ares = ax.imshow(array(norm_conf), cmap=cm.jet, interpolation='nearest', vmin=0, vmax=1.0)
    for i, cas in enumerate(conf_arr):
        for j, c in enumerate(cas):
            if c>0:
                plt.text(j-.2, i+.2, c, fontsize=14)
    cb = fig.colorbar(ares)
    savefig("plots/gridsearch/confusion_matrix_%s.png" % str(clf), format="png")
Example #3
0
def test(clf):
    global labels, pres, recall, f1score

    y_pred = clf.predict(docs_test)
    p, r, f1, s = precision_recall_fscore_support(y_test, y_pred)

    labels.append(str(clf))

    res[0].append(np.mean(p))
    res[1].append(np.mean(r))
    res[2].append(np.mean(f1))
    res[3].append(np.mean(y_pred == y_test))

    conf_arr = stat.confusion_matrix(y_test, y_pred)

    norm_conf = []
    for i in conf_arr:
        a = 0
        tmp_arr = []
        a = sum(i, 0)
        for j in i:
            tmp_arr.append(float(j) / float(a))
        norm_conf.append(tmp_arr)

    return conf_arr, norm_conf
Example #4
0
def test(clf):
    global labels, res

    y_pred = clf.predict(docs_test)
    p, r, f1, s = precision_recall_fscore_support(y_test, y_pred)

    labels.append(str(clf))

    res[0].append(np.mean(p))
    res[1].append(np.mean(r))
    res[2].append(np.mean(f1))
    res[3].append(np.mean(y_pred == y_test))

    conf_arr = stat.confusion_matrix(y_test, y_pred)

    norm_conf = []
    for z in conf_arr:
        a = sum(z, 0)
        norm_conf.append([float(k) / float(a) for k in z])

    plt.clf()
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.title('Confusion Matrix for %s' % str(clf))
    ares = ax.imshow(array(norm_conf),
                     cmap=cm.jet,
                     interpolation='nearest',
                     vmin=0,
                     vmax=1.0)
    for i, cas in enumerate(conf_arr):
        for j, c in enumerate(cas):
            if c > 0:
                plt.text(j - .2, i + .2, c, fontsize=14)
    cb = fig.colorbar(ares)
    savefig("plots/gridsearch/confusion_matrix_%s.png" % str(clf),
            format="png")
Example #5
0
vect_options = {
  'ngram_range': (1,1),
  'sublinear_tf': True,
  'preprocessor': pr.remove_noise,
  'use_idf': False,
  'stop_words': None
}

default_options = {
  'C': 1.0
}

clf = SVM(docs_train, y_train, default_options=default_options, vect_options=vect_options)

y_predicted = clf.predict(docs_test)
conf_arr = s.confusion_matrix(y_test, y_predicted)

norm_conf = []
for i in conf_arr:
        a = 0
        tmp_arr = []
        a = sum(i,0)
        for j in i:
                tmp_arr.append(float(j)/float(a))
        norm_conf.append(tmp_arr)

plt.clf()
fig = plt.figure()
ax = fig.add_subplot(111)
res = ax.imshow(array(norm_conf), cmap=cm.jet, interpolation='nearest')
for i, cas in enumerate(conf_arr):