# Description: Demonstrates the use of discretization # Category: discretization # Classes: entropyDiscretization, DiscretizedLearner # Uses: iris.tab import orange import orngDisc data = orange.ExampleTable("iris.tab") disc_data = orngDisc.entropyDiscretization(data) disc_learner = orngDisc.DiscretizedLearner(orange.BayesLearner(), name="disc-bayes") learner = orange.BayesLearner(name="bayes") learners = [learner, disc_learner] import orngTest, orngStat results = orngTest.crossValidation(learners, data) print "Classification Accuracy:" for i in range(len(learners)): print("%15s: %5.3f") % (learners[i].name, orngStat.CA(results)[i])
# Description: Demonstrates the use of discretization # Category: discretization # Classes: entropyDiscretization, DiscretizedLearner # Uses: iris.tab import orange import orngDisc data = orange.ExampleTable("iris.tab") disc_data = orngDisc.entropyDiscretization(data) disc_learner = orngDisc.DiscretizedLearner(orange.BayesLearner(), name="disc-bayes") learner = orange.BayesLearner(name="bayes") learners = [learner, disc_learner] import orngTest, orngStat results = orngTest.crossValidation(learners, data) print "Classification Accuracy:" for i in range(len(learners)): print ("%15s: %5.3f") % (learners[i].name, orngStat.CA(results)[i])
# Description: Entropy based discretization compared to discretization with equal-frequency # of instances in intervals # Category: preprocessing # Uses: wdbc.tab # Classes: Preprocessor_discretize, EntropyDiscretization # Referenced: o_categorization.htm import orange def show_values(data, heading): for a in data.domain.attributes: print "%s/%d: %s" % (a.name, len(a.values), reduce(lambda x, y: x + ', ' + y, [i for i in a.values])) data = orange.ExampleTable("wdbc.tab") print '%d features in original data set, discretized:' % len(data.domain.attributes) data_ent = orange.Preprocessor_discretize(data, method=orange.EntropyDiscretization()) show_values(data_ent, "Entropy based discretization") print '\nFeatures with sole value after discretization:' for a in data_ent.domain.attributes: if len(a.values) == 1: print a.name import orngDisc data_ent2 = orngDisc.entropyDiscretization(data) print '%d features after removing features discretized to a constant value' % len(data_ent2.domain.attributes)
# Description: Entropy based discretization compared to discretization with equal-frequency # of instances in intervals # Category: preprocessing # Uses: wdbc.tab # Classes: Preprocessor_discretize, EntropyDiscretization # Referenced: o_categorization.htm import orange def show_values(data, heading): for a in data.domain.attributes: print "%s/%d: %s" % (a.name, len(a.values), reduce(lambda x,y: x+', '+y, [i for i in a.values])) data = orange.ExampleTable("../datasets/wdbc") print '%d features in original data set, discretized:' % len(data.domain.attributes) data_ent = orange.Preprocessor_discretize(data, method=orange.EntropyDiscretization()) show_values(data_ent, "Entropy based discretization") print '\nFeatures with sole value after discretization:' for a in data_ent.domain.attributes: if len(a.values)==1: print a.name import orngDisc reload(orngDisc) data_ent2 = orngDisc.entropyDiscretization(data) print '%d features after removing features discretized to a constant value' % len(data_ent2.domain.attributes)