def test_learner_on(self, dataset): """ Default test case for Orange learners. """ if isinstance(dataset.domain.class_var, Orange.feature.Discrete): indices = _MakeRandomIndices2(p0=0.3, stratified=True)(dataset) else: indices = _MakeRandomIndices2(p0=0.3)(dataset) learn = dataset.select(indices, 1) test = dataset.select(indices, 0) classifier = self.learner(learn) # Test for classVar self.assertTrue(hasattr(classifier, "class_var")) self.assertIs(classifier.class_var, dataset.domain.class_var) res = _testing.test_on_data([classifier], test) for ex in test: self.assertIsInstance(classifier(ex, Orange.core.GetValue), Orange.core.Value) self.assertIsInstance(classifier(ex, Orange.core.GetProbabilities), Orange.core.Distribution) value, dist = classifier(ex, Orange.core.GetBoth) self.assertIsInstance(value, Orange.core.Value) self.assertIsInstance(dist, Orange.core.Distribution) self.assertIs(dist.variable, classifier.class_var) if isinstance(dist, Orange.core.ContDistribution): dist_sum = sum(dist.values()) else: dist_sum = sum(dist) self.assertGreater(dist_sum, 0.0) self.assertLess(abs(dist_sum - 1.0), 1e-3) # just for fun also test this # self.assertLess(abs(dist_sum - dist.abs), 1e-3) # not fun because it fails # Store classifier for possible use in subclasses self.classifier = classifier
def multi_target_test(self, learner, data): indices = Orange.data.sample.SubsetIndices2(p0=0.3)(data) learn = data.select(indices, 1) test = data.select(indices, 0) predictor = learner(learn) self.assertIsInstance(predictor, Orange.classification.Classifier) self.multi_target_predictor_interface(predictor, learn.domain) from Orange.evaluation import testing as _testing r = _testing.test_on_data([predictor], test) def all_values(vals): for v in vals: self.assertIsInstance(v, Orange.core.Value) def all_dists(dist): for d in dist: self.assertIsInstance(d, Orange.core.Distribution) for ex in test: preds = predictor(ex, Orange.core.GetValue) all_values(preds) dist = predictor(ex, Orange.core.GetProbabilities) all_dists(dist) preds, dist = predictor(ex, Orange.core.GetBoth) all_values(preds) all_dists(dist) for d in dist: if isinstance(d, Orange.core.ContDistribution): dist_sum = sum(d.values()) else: dist_sum = sum(d) self.assertGreater(dist_sum, 0.0) self.assertLess(abs(dist_sum - 1.0), 1e-3) return predictor
def benchmark_data_subsets_hellinger(data, rand, conv): # Levels: 1. Learn subset distance (2. Samples, 3. Learner) levels = 1 results = {} dlen = len(data) level = 5 l_domain = len(data.domain) class_vals = tuple(float(i) for i in xrange(len(data.domain.class_var.values))) if level > l_domain: level = l_domain #n_combinations = sum(factorial(l_domain)/factorial(l)/factorial(l_domain-l) # for l in xrange(1, level+1))*len(class_vals) n_combinations = sum(factorial(l_domain)/factorial(l)/factorial(l_domain-l) for l in xrange(1, level+1)) ddata = Orange.data.discretization.DiscretizeTable(data, method=Orange.feature.discretization.EqualWidth(n=len(data)/10)) ddata = np.array([tuple(float(d[i]) for i in xrange(len(ddata.domain))) for d in ddata]) #data = np.array([tuple(float(d[i]) for i in xrange(len(data.domain))) for d in data]) ddata_distr = JointDistributions(ddata) dd_sq_vals = combined_distribution(ddata_distr, level, ddata) dd_sq_vals /= len(ddata) dd_sq_vals = np.sqrt(dd_sq_vals) mean_dist = None def get_current_CA(classifier): if mean_dist: return results[mean_dist][classifier.name]['CA'][0] else: return 1.0 for sp in HDISTANCES: sn = conv.subset_size(sp) sample_results = {} dists = [] for i in xrange(SAMPLE_SIZE): ind = random.sample(xrange(len(data)), int(sn)) sn_data = data.get_items(ind) sn_ddata = ddata[ind] # Calculating Hellinger distance. sn_ddata_distr = JointDistributions(sn_ddata) sd_vals = combined_distribution(sn_ddata_distr, level, ddata) sd_vals /= sn r = np.sqrt(sd_vals) - dd_sq_vals dist = np.sqrt(np.sum(np.multiply(r,r))/2/n_combinations) dists.append(dist) #print fano_min_error(sn_ddata_distr, len(ddata.domain.class_var.values)) classifiers = [l(sn_data) for l in LEARNERS] for j in xrange(len(LEARNERS)): classifiers[j].name = LEARNERS[j].name majority_vote = MajorityVoteClassifier(list(classifiers), name="vote") wcs = WeightedConfidenceSharingClassifier(list(classifiers), name="wcs") current_best = BestDecidesClassifier(list(classifiers), get_current_CA, name="current_best") classifiers.append(majority_vote) classifiers.append(wcs) classifiers.append(current_best) CAs = Orange.evaluation.scoring.CA(test_on_data(classifiers, data)) sample_results[i] = {} for idx, classifier in enumerate(LEARNERS_NAMES): sample_results[i][classifier] = {} sample_results[i][classifier]['CA'] = CAs[idx] mean_results = evaluate.dict_recur_mean_err(sample_results.values()) mean_dist = float(sum(dists))/SAMPLE_SIZE results[mean_dist] = mean_results return (levels, results)