예제 #1
0
파일: comp.py 프로젝트: pyongjoo/ende
    def _test_maxent_based(self):
        dataset = news_20_reader.toNumpy()
        n_feature = 'all'

        print "Dataset: News20, Classifier: Maximum Entropy"
        print
        self.run_ratio(LogisticRegression, dataset, n_feature)
예제 #2
0
파일: mshi.py 프로젝트: pyongjoo/ende
    def test_class_ratio(self):
        '''
        Compare several competing methods changing the ratio of the positive
        class in the dataset. We use binary class dataset for the easy of
        interpretation.
        '''
        #X_train_full, y_train_full, X_test, y_test = nc_rna_reader.toNumpy()
        X_train_full, y_train_full, X_test, y_test = news_20_reader.toNumpy()
        set_size = 1000
        X_train, y_train = self.get_sub_set_with_size([X_train_full, y_train_full], set_size)

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        ms = MSHI(LinearSVC)
        ms.fit(train_set)

        print 'Done training'

        for r in numpy.arange(0.05, 1.0, 0.05):
        #for r in [0.05]:
            X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
            test_set = [X_test_new, y_test_new]

            dist_true = DE.arrayToDist(y_test_new)
            dist_est = ms.predict(X_test_new)

            err = rms(dist_est, dist_true)

            print "r: %f, pos: %f" % (r, dist_est[1])
예제 #3
0
파일: it2.py 프로젝트: pyongjoo/ende
    def test(self):
        X_train, y_train, X_test, y_test = news_20_reader.toNumpy()
        X_test, y_test = SetGen.with_pos_ratio([X_test, y_test], 0.50, pos_label=1)

        clf = LogisticRegression
        it = Itr2(clf, 2)
        it.fit([X_train, y_train])
        dist = it.predict(X_test)

        print dist
예제 #4
0
파일: comp_news.py 프로젝트: pyongjoo/ende
 def test_ratio(self):
     dataset = news_20_reader.toNumpy()
     for set_size in numpy.arange(100, 210, 10):
         self.run_ratio(dataset, set_size)
         print
예제 #5
0
파일: comp.py 프로젝트: pyongjoo/ende
    def _test_maxent_based(self):
        dataset = news_20_reader.toNumpy()
        n_feature = 'all'

        print "News20 with Maxent"
        self.run_test_with(dataset, LogisticRegression, n_feature)
예제 #6
0
파일: comp.py 프로젝트: pyongjoo/ende
    def _test_svm_based3(self):
        dataset = news_20_reader.toNumpy()
        n_feature = 'all'

        print "News20 with SVM"
        self.run_test_with(dataset, LinearSVC, n_feature)
예제 #7
0
파일: comp.py 프로젝트: pyongjoo/ende
 def _test_maxent_based(self):
     print "Compare News20 with Maxent"
     dataset = news_20_reader.toNumpy()
     n_feature = 'all'
     self.run_test_with(dataset, self.compare_maxent_based, n_feature)