Beispiel #1
0
 def test_svm_based3(self):
     print "Compare Sentiment with SVM"
     dataset = sentiment_reader.toNumpy()
     X_train, y_train, X_test, y_test = dataset
     X_test = X_test[:1000]
     y_test = y_test[:1000]
     #n_feature = 'all'
     acc_matrix, f1_matrix, auc_matrix = self.compare_svm_based(dataset)
Beispiel #2
0
    def test_ratio(self):
        '''
        Compare several competing methods changing the ratio of the positive
        class in the dataset. We use binary class dataset for the easy of
        interpretation.
        '''
        #dataset = rcv1_binary_reader.toNumpy()
        #dataset = snippet_reader.toNumpy()
        dataset = sentiment_reader.toNumpy()
        #set_size = 200
        #X_train_full, y_train_full, X_test, y_test = dataset
        #X_train, y_train = self.get_sub_set_with_size([X_train_full, y_train_full], set_size)
        #assert(len(y_train) == set_size)

        X_train, y_train, X_test, y_test = dataset

        X_test = X_test[:1000]
        y_test = y_test[:1000]

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        clf = SVMLight()
        #clf = LinearSVC()
        clf.fit(X_train, y_train)


        mla = MLA(clf, verbose=1)

        for r in np.arange(0.05, 1.0, 0.05):
        #r = 0.1

            # Generate a new test set with desired positive proportions.
            X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
            test_set = [X_test_new, y_test_new]

            dist_dict = DE.arrayToDistDict(y_test_new)

            mla.fit(X_train, y_train, dist_dict)
            y_pred = mla.predict(X_test_new)
            cm = confusion_matrix(y_test_new, y_pred)
            acc = self.accuracy(cm)

            print r, acc
Beispiel #3
0

from data_io import sentiment_reader
from data_io import domain_reader
from data_io import snippet_reader
from sklearn.svm import LinearSVC


X_train, y_train, X_test, y_test = sentiment_reader.toNumpy()

clf = LinearSVC()
clf.fit(X_train, y_train)

print clf.score(X_test, y_test)

Beispiel #4
0
 def _test_svm_based4(self):
     print "Compare Domain with SVM"
     dataset = sentiment_reader.toNumpy()
     #n_feature = 'all'
     acc_matrix, f1_matrix, auc_matrix = self.compare_svm_based(dataset)