def test_RandomForestRegression(self): forest = RandomForestRegressionLearner() cv = CrossValidation(k=10) results = cv(self.housing, [forest]) _ = RMSE(results)
def test_KNN(self): results = CrossValidation(self.iris, [self.learn], k=10) ca = CA(results) self.assertGreater(ca, 0.8) self.assertLess(ca, 0.99)
def test_folds(self): res = CrossValidation(self.random_table, [NaiveBayesLearner()], k=5) self.check_folds(res, 5, self.nrows)
def test_classification(self): sf = StackedFitter([TreeLearner(), KNNLearner()]) results = CrossValidation(self.iris, [sf], k=3) ca = CA(results) self.assertGreater(ca, 0.9)
def test_NaiveBayes(self): results = CrossValidation(self.table, [self.learner], k=10) ca = CA(results) self.assertGreater(ca, 0.7) self.assertLess(ca, 0.9)
feature_vars = list(data_tab.domain[:-1]) class_label_var = data_tab.domain[len(data_tab.domain) - 1] iris_domain = Domain(feature_vars, class_label_var) data_tab = Table.from_table(domain=iris_domain, source=data_tab) print("DOMAIN: %s \nVARIABLES: %s \nATTRIBUTES: %s \nCLASS_VAR: %s" % (data_tab.domain, data_tab.domain.variables, data_tab.domain.attributes, data_tab.domain.class_var)) print(len(data_tab)) tree_learner = NNClassificationLearner(hidden_layer_sizes=(10, ), max_iter=1750) #Accuracy of cross validation: 0.953 #AUC: 0.991 eval_results = CrossValidation(data_tab, [tree_learner], k=10) print("Accuracy of cross validation: {:.3f}".format( scoring.CA(eval_results)[0])) print("AUC: {:.3f}".format(scoring.AUC(eval_results)[0])) #####################TASK 3########################## tree_learner2 = NNClassificationLearner(hidden_layer_sizes=(10, ), max_iter=2000, verbose=True, solver="sgd", learning_rate_init=3, learning_rate="invscaling", power_t=0.3) #Needs ~1667 iterations. Same accuracy and AUC as before. #When learning rate init = 0.01 we need 445 iterations. Better Acc and AUC. Accuracy: 0.960, AUC: 0.997 #When learning rate init = 1 we need 81 iterations. Accuracy: 0.960, AUC: 0.993.
def test_adaboost_reg(self): learn = SklAdaBoostRegressionLearner() results = CrossValidation(self.housing, [learn], k=3) _ = RMSE(results)
def test_KNN_mahalanobis(self): learners = [KNNLearner(metric="mahalanobis")] cv = CrossValidation(k=3) results = cv(self.iris, learners) ca = CA(results) self.assertGreater(ca, 0.8)
with open(exportFilePath, "w") as output: datae1.to_csv(output, header=True, sep=",") path1 = ("C:\\Users\\acer\\Desktop\\friends\\export.csv") datae = p.read_csv(path1) ''' now fro b part''' from Orange.classification import SklTreeLearner td = Table.from_file("C:\\Users\\acer\\Desktop\\friends\\export.csv") #print(data1.domain) #print(d) feature_vars = list(td.domain.variables[1:]) class_label_var = td.domain.variables[7] print(class_label_var) md = Domain(feature_vars, class_label_var) #print(d_dis[0]) td = Table.from_table(domain=md, source=td) #print(.domain.variables[1:]) n1 = td.approx_len() print(n1 * 80 / 100) train_data_set = td[:1360] test_data_set = td[1360:] #print(train_data_set.domain) #print(test_data_set.domain) tree_learner = SklTreeLearner() decision_tree = tree_learner(train_data_set) results = CrossValidation(td, [tree_learner], k=10) print(decision_tree(test_data_set)) print("Accuracy", scoring.CA(results)[0]) print("AUC", scoring.AUC(results)[0])
def test_LinearSVM(self): learn = LinearSVMLearner() res = CrossValidation(self.data, [learn], k=2) self.assertGreater(CA(res)[0], 0.8) self.assertLess(CA(res)[0], 0.9)
def test_KNN(self): cv = CrossValidation(k=3) results = cv(self.iris, [KNNLearner()]) ca = CA(results) self.assertGreater(ca, 0.8) self.assertLess(ca, 0.99)
return grad d = Orange.data.Table('housing') d.X = np.hstack((d.X, np.ones((d.X.shape[0], 1)))) d.shuffle() # m = LinearRegressionLearner(lambda_=1.0) # print(m(d)(d)) # # gradient check # m = LinearRegressionLearner(lambda_=1.0) # theta = np.random.randn(d.X.shape[1]) # # ga = m.cost_grad(theta, d.X, d.Y.ravel())[1] # gm = numerical_grad(lambda t: m.cost_grad(t, d.X, d.Y.ravel())[0], theta) # # print(np.sum((ga - gm)**2)) for lambda_ in (0.01, 0.03, 0.1, 0.3, 1, 3): m = LinearRegressionLearner(lambda_=lambda_) scores = [] res = CrossValidation(d, [m], 3, False) for tr_ind, te_ind in res.indices: s = np.mean((m(d[tr_ind])(d[te_ind]) - d[te_ind].Y.ravel())**2) scores.append(s) print('{:5.2f} {}'.format(lambda_, np.mean(scores))) m = LinearRegressionLearner(lambda_=0) print('test data', np.mean((m(d)(d) - d.Y.ravel())**2)) print('majority', np.mean((np.mean(d.Y.ravel()) - d.Y.ravel())**2))
def test_KNN_regression(self): learners = [KNNRegressionLearner(), KNNRegressionLearner(metric="mahalanobis")] results = CrossValidation(self.housing, learners, k=3) mse = MSE(results) self.assertLess(mse[1], mse[0])
def test_SGDClassification(self): sgd = SGDClassificationLearner() res = CrossValidation(self.iris, [sgd], k=3) self.assertGreater(AUC(res)[0], 0.8)
def test_njobs(self): with patch('Orange.evaluation.testing.CrossValidation._MIN_NJOBS_X_SIZE', 1): res = CrossValidation(self.random_table, [NaiveBayesLearner()], k=5, n_jobs=3) self.check_folds(res, 5, self.nrows)
def test_LogisticRegression(self): learn = LogisticRegressionLearner() results = CrossValidation(self.voting, [learn], k=2) ca = CA(results) self.assertGreater(ca, 0.8) self.assertLess(ca, 1.0)
def __call__(self, data): learner = MajorityLearner() CrossValidation(data, [learner], k=2) return learner(data)
def test_multinomial(self): table = Table("titanic") lr = LogisticRegressionLearner() assert isinstance(lr, Orange.classification.SklLearner) res = CrossValidation(table, [lr], k=2) self.assertTrue(0.7 < Orange.evaluation.AUC(res)[0] < 0.9)
def test_adaboost(self): learn = SklAdaBoostLearner() results = CrossValidation(self.iris, [learn], k=3) ca = CA(results) self.assertGreater(ca, 0.9) self.assertLess(ca, 0.99)
def test_RandomForest(self): forest = RandomForestLearner() results = CrossValidation(self.iris, [forest], k=10) ca = CA(results) self.assertGreater(ca, 0.9) self.assertLess(ca, 0.99)
def test_allnan_cv(self): # GH 2740 data = Table('voting') results = CrossValidation(data, [self.learner]) self.assertFalse(any(results.failed))
def test_RandomForestRegression(self): forest = RandomForestRegressionLearner() results = CrossValidation(self.house, [forest], k=10) _ = RMSE(results)
def test_NuSVM(self): learn = NuSVMLearner(nu=0.01) cv = CrossValidation(k=2) res = cv(self.data, [learn]) self.assertGreater(CA(res)[0], 0.9)
def test_allnan_cv(self): # GH 2740 data = Table(test_filename('datasets/lenses.tab')) cv = CrossValidation(stratified=False) results = cv(data, [self.learner]) self.assertFalse(any(results.failed))
def test_n_jobs_fitting(self): with patch( 'Orange.evaluation.testing.CrossValidation._MIN_NJOBS_X_SIZE', 1): CrossValidation(self.heart_disease, [DummyFitter()], k=5, n_jobs=5)
def test_tree(self): tree = SklTreeLearner() res = CrossValidation(k=2)(self.iris, [tree]) self.assertGreater(AUC(res)[0], 0.8) self.assertLess(AUC(res)[0], 1.)
def test_continuous(self): res = CrossValidation(self.housing, [LinearRegressionLearner()], k=3, n_jobs=1) self.assertLess(RMSE(res), 5)
def test_multiclass_auc_multi_learners(self): learners = [LogisticRegressionLearner(), MajorityLearner()] res = CrossValidation(k=10)(self.iris, learners) self.assertGreater(AUC(res)[0], 0.6) self.assertLess(AUC(res)[1], 0.6) self.assertGreater(AUC(res)[1], 0.4)
def test_too_many_folds(self): w = [] res = CrossValidation(self.iris, [MajorityLearner()], k=len(self.iris) / 2, warnings=w) self.assertGreater(len(w), 0)
def test_SoftmaxRegression(self): learner = SoftmaxRegressionLearner() results = CrossValidation(self.iris, [learner], k=3) ca = CA(results) self.assertGreater(ca, 0.9) self.assertLess(ca, 1.0)