def _test_maxent_based(self): dataset = news_20_reader.toNumpy() n_feature = 'all' print "Dataset: News20, Classifier: Maximum Entropy" print self.run_ratio(LogisticRegression, dataset, n_feature)
def test_class_ratio(self): ''' Compare several competing methods changing the ratio of the positive class in the dataset. We use binary class dataset for the easy of interpretation. ''' #X_train_full, y_train_full, X_test, y_test = nc_rna_reader.toNumpy() X_train_full, y_train_full, X_test, y_test = news_20_reader.toNumpy() set_size = 1000 X_train, y_train = self.get_sub_set_with_size([X_train_full, y_train_full], set_size) train_set = (X_train, y_train) test_set_original = (X_test, y_test) ms = MSHI(LinearSVC) ms.fit(train_set) print 'Done training' for r in numpy.arange(0.05, 1.0, 0.05): #for r in [0.05]: X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1) test_set = [X_test_new, y_test_new] dist_true = DE.arrayToDist(y_test_new) dist_est = ms.predict(X_test_new) err = rms(dist_est, dist_true) print "r: %f, pos: %f" % (r, dist_est[1])
def test(self): X_train, y_train, X_test, y_test = news_20_reader.toNumpy() X_test, y_test = SetGen.with_pos_ratio([X_test, y_test], 0.50, pos_label=1) clf = LogisticRegression it = Itr2(clf, 2) it.fit([X_train, y_train]) dist = it.predict(X_test) print dist
def test_ratio(self): dataset = news_20_reader.toNumpy() for set_size in numpy.arange(100, 210, 10): self.run_ratio(dataset, set_size) print
def _test_maxent_based(self): dataset = news_20_reader.toNumpy() n_feature = 'all' print "News20 with Maxent" self.run_test_with(dataset, LogisticRegression, n_feature)
def _test_svm_based3(self): dataset = news_20_reader.toNumpy() n_feature = 'all' print "News20 with SVM" self.run_test_with(dataset, LinearSVC, n_feature)
def _test_maxent_based(self): print "Compare News20 with Maxent" dataset = news_20_reader.toNumpy() n_feature = 'all' self.run_test_with(dataset, self.compare_maxent_based, n_feature)