def test_construction(self): d = orange.ExampleTable("iris") dd = orange.DomainDistributions(d) for i in range(4): self.assertTrue(isinstance(dd[i], orange.ContDistribution)) self.assertEqual(id(dd[i]), id(dd[d.domain[i]])) self.assertEqual(id(dd[i]), id(dd[d.domain[i].name])) self.assertTrue(isinstance(dd[4], orange.DiscDistribution)) self.assertEqual(id(dd[4]), id(dd[d.domain.classVar])) self.assertEqual(id(dd[4]), id(dd["iris"])) self.assertEqual(id(dd[4]), id(dd[-1])) for i, ddd in enumerate(list(dd)): self.assertEqual(id(ddd), id(dd[i])) dd = orange.DomainDistributions(d, skip_discrete=True) for i in range(4): self.assertTrue(isinstance(dd[i], orange.ContDistribution)) self.assertEqual(dd[-1], None) dd = orange.DomainDistributions(d, skip_continuous=True) for i in range(4): self.assertEqual(dd[i], None) self.assertTrue(isinstance(dd[-1], orange.DiscDistribution)) self.assertEqual(list(dd[-1]), [50, 50, 50]) dd = orange.DomainDistributions(d, skip_continuous=True, skip_discrete=True) for i in range(5): self.assertEqual(dd[i], None)
def test_pickle(self): d = orange.ExampleTable("iris") dd = orange.DomainDistributions(d) import pickle s = pickle.dumps(dd) dd2 = pickle.loads(s) for i in range(4): self.assertTrue(isinstance(dd2[i], orange.ContDistribution)) self.assertEqual(id(dd2[i]), id(dd2[d.domain[i]])) self.assertEqual(id(dd2[i]), id(dd2[d.domain[i].name])) self.assertEqual(dd[i], dd2[i]) self.assertTrue(isinstance(dd2[4], orange.DiscDistribution)) self.assertEqual(id(dd2[4]), id(dd2[d.domain.classVar])) self.assertEqual(id(dd2[4]), id(dd2["iris"])) self.assertEqual(id(dd2[4]), id(dd2[-1])) dd = orange.DomainDistributions(d, skip_discrete=True) s = pickle.dumps(dd) dd2 = pickle.loads(s) for i in range(4): self.assertTrue(isinstance(dd2[i], orange.ContDistribution)) self.assertEqual(dd2[-1], None) dd = orange.DomainDistributions(d, skip_continuous=True) s = pickle.dumps(dd) dd2 = pickle.loads(s) for i in range(4): self.assertEqual(dd2[i], None) self.assertTrue(isinstance(dd2[-1], orange.DiscDistribution)) self.assertEqual(list(dd2[-1]), [50, 50, 50])
def cforange_hierarchical_clustering_finished(postdata, input_dict, output_dict): import json import orange matrix = input_dict['dm'] linkage = int(input_dict['linkage']) widget_pk = postdata['widget_id'][0] try: selected_nodes = json.loads(postdata.get('selected_nodes')[0]) except: raise Exception('Please select a threshold for determining clusters.') if isinstance(matrix.items, orange.ExampleTable): root = Clustering.hierarchical_clustering(linkage, matrix) cluster_ids = set([cluster for _, _, cluster in selected_nodes]) selected_clusters = set( [cluster for _, selected, cluster in selected_nodes if selected]) clustVar = orange.EnumVariable( str('Cluster'), values=["Cluster %d" % i for i in cluster_ids] + ["Other"]) origDomain = matrix.items.domain domain = orange.Domain(origDomain.attributes, origDomain.classVar) domain.addmeta(orange.newmetaid(), clustVar) domain.addmetas(origDomain.getmetas()) # Build table with selected clusters selected_table, unselected_table = orange.ExampleTable( domain), orange.ExampleTable(domain) for id, selected, cluster in selected_nodes: new_ex = orange.Example(domain, matrix.items[id]) if selected: new_ex[clustVar] = clustVar("Cluster %d" % cluster) selected_table.append(new_ex) else: new_ex[clustVar] = clustVar("Other") unselected_table.append(new_ex) # Build table of centroids centroids = orange.ExampleTable(selected_table.domain) if len(selected_table) > 0: for cluster in sorted(selected_clusters): clusterEx = orange.ExampleTable([ ex for ex in selected_table if ex[clustVar] == "Cluster %d" % cluster ]) # Attribute statistics contstat = orange.DomainBasicAttrStat(clusterEx) discstat = orange.DomainDistributions(clusterEx, 0, 0, 1) ex = [ cs.avg if cs else (ds.modus() if ds else "?") for cs, ds in zip(contstat, discstat) ] example = orange.Example(centroids.domain, ex) example[clustVar] = clustVar("Cluster %d" % cluster) centroids.append(example) else: # Attribute distance centroids, selected_table, unselected_table = None, None, None return { 'centroids': centroids, 'selected_examples': selected_table, 'unselected_examples': unselected_table }
def test_equalFreq(self): d = orange.ExampleTable("iris") for i in range(150): d[i, 0] = i dd = orange.DomainDiscretization( orange.EqualFreqDiscretization(n_intervals=5), d) d2 = orange.ExampleTable(dd, d) dist = orange.DomainDistributions(d2) self.assertEqual(dist[0], [30] * 5) self.assertEqual(dd[0].get_value_from.transformer.points, [29.5, 59.5, 89.5, 119.5]) v2 = orange.EqualFreqDiscretization(n_intervals=5)(d.domain[0], d) self.assertEqual(v2.get_value_from.transformer.points, [29.5, 59.5, 89.5, 119.5]) s = pickle.dumps(dd) dd2 = pickle.loads(s) self.assertEqual(dd2[0].get_value_from.transformer.points, [29.5, 59.5, 89.5, 119.5])
def commit_data(self): items = getattr(self.matrix, "items", None) if not items: return # nothing to commit self.selectionChanged = False self.selectedExamples = None selection = self.selected_clusters selection = sorted(selection, key=lambda c: c.first) maps = [ list(self.root_cluster.mapping[c.first:c.last]) for c in selection ] from operator import add selected_indices = reduce(add, maps, []) unselected_indices = sorted( set(self.root_cluster.mapping) - set(selected_indices)) self.selection = selected = [items[k] for k in selected_indices] unselected = [items[k] for k in unselected_indices] if not selected: self.send("Selected Data", None) self.send("Other Data", None) self.send("Centroids", None) return if isinstance(items, ExampleTable): c = [i for i in range(len(maps)) for j in maps[i]] aid = clustVar = None if self.AppendClusters: clustVar = orange.EnumVariable( str(self.ClassifyName), values=["Cluster " + str(i) for i in range(len(maps))] + ["Other"]) origDomain = items.domain if self.addIdAs == 0: domain = orange.Domain(origDomain.attributes, clustVar) if origDomain.classVar: domain.addmeta(orange.newmetaid(), origDomain.classVar) aid = -1 elif self.addIdAs == 1: domain = orange.Domain(origDomain.attributes + [clustVar], origDomain.classVar) aid = len(origDomain.attributes) else: domain = orange.Domain(origDomain.attributes, origDomain.classVar) aid = orange.newmetaid() domain.addmeta(aid, clustVar) domain.addmetas(origDomain.getmetas()) table1 = table2 = None if selected: table1 = orange.ExampleTable(domain, selected) for i in range(len(selected)): table1[i][clustVar] = clustVar("Cluster " + str(c[i])) if unselected: table2 = orange.ExampleTable(domain, unselected) for ex in table2: ex[clustVar] = clustVar("Other") self.selectedExamples = table1 self.unselectedExamples = table2 else: self.selectedExamples = orange.ExampleTable( selected) if selected else None self.unselectedExamples = orange.ExampleTable( unselected) if unselected else None self.send("Selected Data", self.selectedExamples) self.send("Other Data", self.unselectedExamples) self.centroids = None if self.selectedExamples: self.centroids = orange.ExampleTable( self.selectedExamples.domain) for i in range(len(maps)): clusterEx = [ ex for cluster, ex in zip(c, self.selectedExamples) if cluster == i ] clusterEx = orange.ExampleTable(clusterEx) contstat = orange.DomainBasicAttrStat(clusterEx) discstat = orange.DomainDistributions(clusterEx, 0, 0, 1) ex = [ cs.avg if cs else (ds.modus() if ds else "?") for cs, ds in zip(contstat, discstat) ] example = orange.Example(self.centroids.domain, ex) if clustVar is not None: example[clustVar] = clustVar(i) self.centroids.append(ex) self.send("Centroids", self.centroids) elif self.matrixSource == "Data Distance": names = list(set([d.strain for d in self.selection])) data = [(name, [ d for d in filter(lambda a: a.strain == name, self.selection) ]) for name in names] self.send("Structured Data Files", data)
# Description: Show frequences for values of discrete attributes, count number of instances where attribute is not defined # Category: description # Uses: adult_sample.tab # Referenced: basic_exploration.htm import orange data = orange.ExampleTable("../datasets/adult_sample") dist = orange.DomainDistributions(data) print "Average values and mean square errors:" for i in range(len(data.domain.attributes)): if data.domain.attributes[i].varType == orange.VarTypes.Continuous: print "%s, mean=%5.2f +- %5.2f" % \ (data.domain.attributes[i].name, dist[i].average(), dist[i].error()) print "\nFrequencies for values of discrete attributes:" for i in range(len(data.domain.attributes)): a = data.domain.attributes[i] if a.varType == orange.VarTypes.Discrete: print "%s:" % a.name for j in range(len(a.values)): print " %s: %d" % (a.values[j], int(dist[i][j])) print "\nNumber of instances where attribute is not defined:" for i in range(len(data.domain.attributes)): a = data.domain.attributes[i] print " %2d %s" % (dist[i].unknowns, a.name)
def test_continuizer_zoo(self): d = orange.ExampleTable("zoo") dd = orange.DomainDistributions(d) for i, e in enumerate(dd): if i == 2: break dc = orange.DomainContinuizer() dc.multinomial_treatment = dc.MultinomialTreatment.LowestIsBase dc.class_treatment = dc.ClassTreatment.ErrorIfCannotHandle self.assertRaises(ValueError, dc, d.domain) dc.class_treatment = dc.ClassTreatment.LeaveUnlessTarget cdomain = dc(d.domain) dd = orange.ExampleTable(cdomain, d) self.assertEqual(list(map(int, d[0]))[:3], list(map(int, dd[0]))[:3]) for l in [2, 4, 5, 6, 8]: self.assertEqual(int(dd[0, "legs=%i" % l]), l == 4) self.assertFalse("legs=0" in cdomain) self.assertEqual(cdomain.classVar.name, "type") self.assertFalse(cdomain.has_discrete_attributes()) self.assertFalse(cdomain.has_discrete_attributes(False)) self.assertTrue(cdomain.has_discrete_attributes(True)) dc.class_treatment = dc.ClassTreatment.AsOrdinal cdomain = dc(d.domain) dd = orange.ExampleTable(cdomain, d) self.assertEqual(list(map(int, d[0]))[:3], list(map(int, dd[0]))[:3]) for l in [2, 4, 5, 6, 8]: self.assertEqual(int(dd[0, "legs=%i" % l]), l == 4) self.assertFalse("legs=0" in cdomain) self.assertEqual(cdomain.classVar.name, "C_type") self.assertEqual(dd[0, -1], d.domain.class_var.values.index("mammal")) self.assertFalse(cdomain.has_discrete_attributes()) dc.class_treatment = dc.ClassTreatment.AsOrdinal cdomain = dc(d) dd = orange.ExampleTable(cdomain, d) self.assertEqual(list(map(int, d[0]))[:3], list(map(int, dd[0]))[:3]) for l in [2, 4, 5, 6, 8]: self.assertEqual(int(dd[0, "legs=%i" % l]), l == 4) self.assertFalse("legs=0" in cdomain) self.assertEqual(cdomain.classVar.name, "C_type") self.assertEqual(dd[0, -1], d.domain.class_var.values.index("mammal")) self.assertFalse(cdomain.has_discrete_attributes()) dc.multinomial_treatment = dc.MultinomialTreatment.FrequentIsBase self.assertRaises(ValueError, dc, d.domain) cdomain = dc(d) dd = orange.ExampleTable(cdomain, d) self.assertEqual(dd[0, 0], 1) self.assertEqual(dd[0, 1], 0) self.assertEqual(dd[0, 2], 1) dc.multinomial_treatment = dc.MultinomialTreatment.FrequentIsBase dc.zero_based = False self.assertRaises(ValueError, dc, d.domain) cdomain = dc(d) dd = orange.ExampleTable(cdomain, d) self.assertEqual(dd[0, 0], 1) self.assertEqual(dd[0, 1], -1) self.assertEqual(dd[0, 2], 1) dc.zero_based = True dc.multinomial_treatment = dc.MultinomialTreatment.NValues cdomain = dc(d.domain) dd = orange.ExampleTable(cdomain, d) for l in [0, 2, 4, 5, 6, 8]: self.assertEqual(int(dd[0, "legs=%i" % l]), l == 4) dc.multinomial_treatment = dc.MultinomialTreatment.Ignore cdomain = dc(d.domain) for l in [0, 2, 4, 5, 6, 8]: self.assertFalse("legs=%i" in cdomain) dc.multinomial_treatment = dc.MultinomialTreatment.IgnoreAllDiscrete cdomain = dc(d.domain) self.assertEqual(cdomain.variables, [cdomain.class_var]) dc.multinomial_treatment = dc.MultinomialTreatment.ReportError self.assertRaises(ValueError, dc, d.domain) dc.multinomial_treatment = dc.MultinomialTreatment.AsOrdinal cdomain = dc(d.domain) dd = orange.ExampleTable(cdomain, d) for e, ec in zip(d[:10], dd): self.assertEqual(int(e["legs"]), ec["C_legs"]) dc.multinomial_treatment = dc.MultinomialTreatment.AsNormalizedOrdinal cdomain = dc(d.domain) dd = orange.ExampleTable(cdomain, d) for e, ec in zip(d[:10], dd): self.assertEqual(int(e["legs"]) / 5, ec["C_legs"])