Ejemplo n.º 1
0
def main():
    print "loading"
    annotations = annotation_reader.from_file("%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME)
    table = annotations.as_orange_table()
    cv_indices = orange.MakeRandomIndices2(table, p0=0.5)
    print "indices", set(cv_indices)
    print "splitting"
    training, testing = annotation_reader.split(annotations, cv_indices)
    print "features"

    engine = PairwiseEngine(training)
    
    training_table = engine.training_table
    testing_table = engine.makeTable(testing)
    print len(training_table), "training"
    print len(testing_table), "testing"
    
    learners = [orange.MajorityLearner(),
                orngEnsemble.RandomForestLearner(),
                ]
    results = orngTest.learnAndTestOnTestData(learners, 
                                              training_table, testing_table)

    for accuracy, cm in zip(orngStat.CA(results),
                            orngStat.confusionMatrices(results)):
        print orangeUtils.confusion_matrix_to_string(table.domain, cm)
        print "accuracy: %.2f%%" % (accuracy*100)
Ejemplo n.º 2
0
 def test_mammal(self):
     d = orange.ExampleTable("zoo")
     cc = orange.MajorityLearner(d)
     cd = orange.get_class_distribution(d)
     cd.normalize()
     
     for e in d:
         self.assertEqual(cc(e), "mammal")
         self.assertEqual(cc(e, orange.Classifier.GetProbabilities), cd)
Ejemplo n.º 3
0
    def __init__(self, data):
        if type(data) is not orange.ExampleTable:
            raise TypeError('Data is not an orange.ExampleTable')
        if data.domain.classVar.varType != orange.VarTypes.Discrete:
            raise TypeError('Data should have a discrete target variable.')

        self.data = data
        self.majorityClassifier = orange.MajorityLearner(self.data)
        self.rulesClass = []             # list of istances SDRules
        self.algorithm = "Subgroup discovery algorithm"
Ejemplo n.º 4
0
    def __call__(self, data, weight=0):
        import orngWrap

        type = getattr(self, "type", "auto")

        if hasattr(self, "boundsize"):
            if type(self) == int:
                subgen = orange.SubsetsGenerator_constSize(B=self.boundsize)
            else:
                subgen = orange.SubsetsGenerator_minMaxSize(
                    min=self.boundsize[0], max=self.boundsize[1])
        else:
            subgen = orange.SubsetsGenerator_constSize(B=2)

        if type == "auto":
            im = orange.IMBySorting(data, [])
            if im.fuzzy():
                type = "error"
            else:
                type = "complexity"

        inducer = StructureInducer(
            removeDuplicates=1,
            redundancyRemover=AttributeRedundanciesRemover(),
            learnerForUnknown=orange.MajorityLearner())

        if type == "complexity":
            inducer.featureInducer = FeatureByMinComplexity()
            return inducer(data, weight)

        elif type == "error":
            ms = getattr(
                self, "m",
                orange.frange(0.1) + orange.frange(1.2, 3.0, 0.2) +
                orange.frange(4.0, 10.0, 1.0))

            inducer.redundancyRemover.inducer = inducer.featureInducer = FeatureByMinError(
            )

            # it's the same object for redundancy remover and the real inducer, so we can tune just one
            return orngWrap.Tune1Parameter(
                parameter="featureInducer.m",
                values=ms,
                object=inducer,
                returnWhat=orngWrap.Tune1Parameter.returnClassifier)(data,
                                                                     weight)

            print(inducer.featureInducer.m,
                  inducer.redundancyRemover.inducer.m)
            return inducer(data, weight)
Ejemplo n.º 5
0
    def __init__(self, map=[], examples=[]):
        self.map = map
        self.examples = examples
        for node in map:
            node.referenceExample = orange.Example(orange.Domain(examples.domain.attributes, False),
                                                 [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \
                                                  for var, value in zip(examples.domain.attributes, node.vector)])
            node.examples = orange.ExampleTable(examples.domain)

        for ex in examples:
            node = self.getBestMatchingNode(ex)
            node.examples.append(ex)

        if examples and examples.domain.classVar:
            for node in self.map:
                node.classifier = orange.MajorityLearner(node.examples)
Ejemplo n.º 6
0
def main():
    print "loading"
    annotations = annotation_reader.from_file(
        "%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME)
    annotator2 = annotation_reader.from_file(
        "%s/data/directions/breadbox/nouns_dlaude.partial.txt" % TKLIB_HOME)
    #histogram(annotations)
    print "table"
    table = annotations.as_orange_table()
    cv_indices = orange.MakeRandomIndices2(table, p0=0.5)
    print "indices", set(cv_indices)
    print "splitting"
    training, testing = annotation_reader.split(annotations, cv_indices)
    print "features"

    engine = WordnetParentsEngine(training)
    training_table = engine.makeTable(training)
    testing_table = engine.makeTable(testing)

    #training_table, testing_table = wordnet_parents(training, testing)
    #training_table, testing_table = wordnet_glosses(training, testing)
    #training_table, testing_table = flickr_parents(training, testing)

    print len(training_table), "training examples"
    print len(testing_table), "testing examples"

    #training_table = annotation_reader.to_big_small(training_table)
    #testing_table = annotation_reader.to_big_small(testing_table)

    #information_gain = orange.MeasureAttribute_info()
    #for x in training_table.domain.attributes:
    #    print "x", information_gain(x, training_table)

    learners = [
        orange.MajorityLearner(),
        orngEnsemble.RandomForestLearner(), WordnetKnnClassifier,
        agreement.WizardOfOzLearner(annotator2.as_orange_table())
    ]
    results = orngTest.learnAndTestOnTestData(learners, training_table,
                                              testing_table)
    for accuracy, cm in zip(orngStat.CA(results),
                            orngStat.confusionMatrices(results)):
        print orangeUtils.confusion_matrix_to_string(table.domain, cm)
        print "accuracy: %.2f%%" % (accuracy * 100)
Ejemplo n.º 7
0
    def __init__(self, map=[], data=[]):
        self.map = map
        self.data = data
        for node in map:
            node.reference_instance = orange.Example(orange.Domain(self.data.domain.attributes, False),
                                                 [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \
                                                  for var, value in zip(self.data.domain.attributes, node.vector)])

            node.instances = orange.ExampleTable(self.data.domain)

        for inst in self.data:
            node = self.get_best_matching_node(inst)
            node.instances.append(inst)

        if self.data and self.data.domain.class_var:
            for node in self.map:
                node.classifier = orange.MajorityLearner(
                    node.instances if node.instances else self.data)

            self.class_var = self.data.domain.class_var
        else:
            self.class_var = None
Ejemplo n.º 8
0
    def test_equal(self):
        d = orange.ExampleTable("iris")
        cc = orange.MajorityLearner(d)
        
        for e in d[0:150:20]:
            anss = set()
            for i in range(5):
                anss.add(cc(e))
            self.assertEqual(len(anss), 1)
            
        anss = set()
        for e in d:
            anss.add(cc(e))
        self.assertEqual(len(anss), 3)
        for e in d[0:150:20]:
            self.assertTrue(all(x==1/3 for x in cc(e, orange.Classifier.GetProbabilities)))

        import pickle
        s = pickle.dumps(cc)
        cc2 = pickle.loads(s)
        for e in d:
            self.assertEqual(cc(e), cc2(e))
Ejemplo n.º 9
0
def testTrecentoSimpler():
    import orange, orngTree # @UnusedImport @UnresolvedImport

    trainData = orange.ExampleTable('d:/desktop/trecento2.tab')
    testData  = orange.ExampleTable('d:/desktop/trecento1.tab')

    majClassifier = orange.MajorityLearner(trainData)
    knnClassifier = orange.kNNLearner(trainData)
        
    majWrong = 0
    knnWrong = 0
        
    for testRow in testData:
        majGuess = majClassifier(testRow)
        knnGuess = knnClassifier(testRow)
        realAnswer = testRow.getclass()
        if majGuess != realAnswer:
            majWrong += 1
        if knnGuess != realAnswer:
            knnWrong += 1
       
    total = float(len(testData))
    print (majWrong/total, knnWrong/total)
Ejemplo n.º 10
0
def xtestChinaEuropeSimpler():
    import orange, orngTree # @UnusedImport @UnresolvedImport

    trainData = orange.ExampleTable('ismir2011_fb_folkTrain.tab')
    testData  = orange.ExampleTable('ismir2011_fb_folkTest.tab')

    majClassifier = orange.MajorityLearner(trainData)
    knnClassifier = orange.kNNLearner(trainData)
        
    majWrong = 0
    knnWrong = 0
        
    for testRow in testData:
        majGuess = majClassifier(testRow)
        knnGuess = knnClassifier(testRow)
        realAnswer = testRow.getclass()
        if majGuess != realAnswer:
            majWrong += 1
        if knnGuess != realAnswer:
            knnWrong += 1
       
    total = float(len(testData))
    print (majWrong/total, knnWrong/total)
Ejemplo n.º 11
0
# Classes:     orngTest.crossValidation, orngTree.TreeLearner, orange.kNNLearner, orngRegression.LinearRegressionLearner
# Referenced:  regression.htm

import orange
import orngRegression
import orngTree
import orngStat, orngTest

data = orange.ExampleTable("housing")

# definition of learners (regressors)
lr = orngRegression.LinearRegressionLearner(name="lr")
rt = orngTree.TreeLearner(measure="retis",
                          mForPruning=2,
                          minExamples=20,
                          name="rt")
maj = orange.MajorityLearner(name="maj")
knn = orange.kNNLearner(k=10, name="knn")
learners = [maj, lr, rt, knn]

# evaluation and reporting of scores
results = orngTest.crossValidation(learners, data, folds=10)
scores = [("MSE", orngStat.MSE), ("RMSE", orngStat.RMSE),
          ("MAE", orngStat.MAE), ("RSE", orngStat.RSE),
          ("RRSE", orngStat.RRSE), ("RAE", orngStat.RAE), ("R2", orngStat.R2)]

print "Learner  " + "".join(["%-7s" % s[0] for s in scores])
for i in range(len(learners)):
    print "%-8s " % learners[i].name + "".join(
        ["%6.3f " % s[1](results)[i] for s in scores])
Ejemplo n.º 12
0
# Description: Builds regression models from data and outputs predictions for first five instances
# Category:    modelling
# Uses:        housing
# Classes:     MakeRandomIndices2, MajorityLearner, orngTree.TreeLearner, orange.kNNLearner
# Referenced:  regression.htm

import orange, orngTree, orngTest, orngStat

data = orange.ExampleTable("housing.tab")
selection = orange.MakeRandomIndices2(data, 0.5)
train_data = data.select(selection, 0)
test_data = data.select(selection, 1)

maj = orange.MajorityLearner(train_data)
maj.name = "default"

rt = orngTree.TreeLearner(train_data, measure="retis", mForPruning=2, minExamples=20)
rt.name = "reg. tree"

k = 5
knn = orange.kNNLearner(train_data, k=k)
knn.name = "k-NN (k=%i)" % k

regressors = [maj, rt, knn]

print "\n%10s " % "original",
for r in regressors:
  print "%10s " % r.name,
print

for i in range(10):
Ejemplo n.º 13
0
import orange, orngTest, orngStat
import random

data = orange.ExampleTable("voting")

bayes = orange.BayesLearner(name="bayes")
tree = orange.TreeLearner(name="tree")
majority = orange.MajorityLearner(name="default")
learners = [bayes, tree, majority]
names = [x.name for x in learners]


def printResults(res):
    CAs = orngStat.CA(res, reportSE=1)
    for i in range(len(names)):
        print "%s: %5.3f+-%5.3f   " % (names[i], CAs[i][0], 1.96 * CAs[i][1]),
    print


print "\nproportionsTest that will always give the same results"
for i in range(3):
    res = orngTest.proportionTest(learners, data, 0.7)
    printResults(res)

print "\nproportionsTest that will give different results, but the same each time the script is run"
myRandom = orange.RandomGenerator()
for i in range(3):
    res = orngTest.proportionTest(learners,
                                  data,
                                  0.7,
                                  randomGenerator=myRandom)
Ejemplo n.º 14
0
def majority_learner(input_dict):
    import orange
    output_dict = {}
    output_dict['majorout'] = orange.MajorityLearner(
        name="Majority Classifier (Orange)")
    return output_dict
Ejemplo n.º 15
0
    p = max(maxp)  # max class probability
    classifier_index = maxp.index(p)
    c = pmatrix[classifier_index].modus()
    
    if resultType == orange.GetValue:
      return c
    elif resultType == orange.getClassDistribution:
      return pmatrix[classifier_index]
    else:
      return (c, pmatrix[classifier_index])


tree = orngTree.TreeLearner(mForPruning=5.0)
tree.name = 'class. tree'
bayes = orange.BayesLearner()
bayes.name = 'naive bayes'
winner = WinnerLearner(learners=[tree, bayes])
winner.name = 'winner'

majority = orange.MajorityLearner()
majority.name = 'default'
learners = [majority, tree, bayes, winner]

data = orange.ExampleTable("promoters")

results = orngTest.crossValidation(learners, data)
print "Classification Accuracy:"
for i in range(len(learners)):
    print ("%15s: %5.3f") % (learners[i].name, orngStat.CA(results)[i])
if __name__=="__main__":
    a = QApplication(sys.argv)
    ow = OWPredictions()
    ow.show()

    import orngTree

    dataset = orange.ExampleTable('../../doc/datasets/iris.tab')
#    dataset = orange.ExampleTable('../../doc/datasets/auto-mpg.tab')
    ind = orange.MakeRandomIndices2(p0=0.5)(dataset)
    data = dataset.select(ind, 0)
    test = dataset.select(ind, 1)
    testnoclass = orange.ExampleTable(orange.Domain(test.domain.attributes, False), test)        
    tree = orngTree.TreeLearner(data)
    tree.name = "tree"
    maj = orange.MajorityLearner(data)
    maj.name = "maj"
    knn = orange.kNNLearner(data, k = 10)
    knn.name = "knn"
    
#    ow.setData(test)
#    
#    ow.setPredictor(maj, 1)
    
    

    if 1: # data set only
        ow.setData(test)
    if 0: # two predictors, test data with class
        ow.setPredictor(maj, 1)
        ow.setPredictor(tree, 2)
Ejemplo n.º 17
0
print imputer(data[19])
print


impdata = imputer(data)
for i in range(20, 25):
    print data[i]
    print impdata[i]
    print



print "\n*** BAYES and AVERAGE IMPUTATION ***\n"

imputer = orange.ImputerConstructor_model()
imputer.learnerContinuous = orange.MajorityLearner()
imputer.learnerDiscrete = orange.BayesLearner()
imputer = imputer(data)

print "Example w/ missing values"
print data[19]
print "Imputed:"
print imputer(data[19])
print


impdata = imputer(data)
for i in range(20, 25):
    print data[i]
    print impdata[i]
    print
Ejemplo n.º 18
0
# Description: Demostrates the use of classification scores
# Category:    evaluation
# Uses:        voting.tab
# Referenced:  orngStat.htm

import orange, orngTest, orngTree

learners = [orange.BayesLearner(name = "bayes"),
            orngTree.TreeLearner(name="tree"),
            orange.MajorityLearner(name="majrty")]

voting = orange.ExampleTable("voting")
res = orngTest.crossValidation(learners, voting)

vehicle = orange.ExampleTable("vehicle")
resVeh = orngTest.crossValidation(learners, vehicle)

import orngStat

CAs = orngStat.CA(res)
APs = orngStat.AP(res)
Briers = orngStat.BrierScore(res)
ISs = orngStat.IS(res)

print
print "method\tCA\tAP\tBrier\tIS"
for l in range(len(learners)):
    print "%s\t%5.3f\t%5.3f\t%5.3f\t%6.3f" % (learners[l].name, CAs[l], APs[l], Briers[l], ISs[l])


CAs = orngStat.CA(res, reportSE=True)
Ejemplo n.º 19
0
    def __call__(self, data, weight=None):
        if not self.use_attributes is None:
            new_domain = orange.Domain(self.use_attributes,
                                       data.domain.classVar)
            new_domain.addmetas(data.domain.getmetas())
            data = orange.ExampleTable(new_domain, data)

        if self.stepwise and self.stepwise_before:
            use_attributes = stepwise(data,
                                      add_sig=self.add_sig,
                                      remove_sig=self.remove_sig)
            new_domain = orange.Domain(use_attributes, data.domain.classVar)
            new_domain.addmetas(data.domain.getmetas())
            data = orange.ExampleTable(new_domain, data)

        # continuization (replaces discrete with continuous attributes)
        continuizer = orange.DomainContinuizer()
        continuizer.multinomialTreatment = continuizer.FrequentIsBase
        continuizer.zeroBased = True
        domain0 = continuizer(data)
        data = data.translate(domain0)

        if self.stepwise and not self.stepwise_before:
            use_attributes = stepwise(data,
                                      weight,
                                      add_sig=self.add_sig,
                                      remove_sig=self.remove_sig)
            new_domain = orange.Domain(use_attributes, data.domain.classVar)
            new_domain.addmetas(data.domain.getmetas())
            data = orange.ExampleTable(new_domain, data)

        # missing values handling (impute missing)
        imputer = orange.ImputerConstructor_model()
        imputer.learnerContinuous = orange.MajorityLearner()
        imputer.learnerDiscrete = orange.MajorityLearner()
        imputer = imputer(data)
        data = imputer(data)

        # convertion to numpy
        A, y, w = data.toNumpy()  # weights ??
        if A is None:
            n = len(data)
            m = 0
        else:
            n, m = numpy.shape(A)

        if self.beta0 == True:
            if A is None:
                X = numpy.ones([len(data), 1])
            else:
                X = numpy.insert(A, 0, 1, axis=1)  # adds a column of ones
        else:
            X = A

        # set weights
        W = numpy.identity(len(data))
        if weight:
            for di, d in enumerate(data):
                W[di, di] = float(d[weight])

        D = dot(
            dot(numpy.linalg.pinv(dot(dot(X.T, W), X)), X.T), W
        )  # adds some robustness by computing the pseudo inverse; normal inverse could fail due to singularity of the X.T*W*X
        beta = dot(D, y)

        yEstimated = dot(X, beta)  # estimation
        # some desriptive statistisc
        muY, sigmaY = numpy.mean(y), numpy.std(y)
        muX, covX = numpy.mean(X, axis=0), numpy.cov(X, rowvar=0)

        # model statistics
        SST, SSR = numpy.sum((y - muY)**2), numpy.sum((yEstimated - muY)**2)
        SSE, RSquare = SST - SSR, SSR / SST
        R = numpy.sqrt(RSquare)  # coefficient of determination
        RAdjusted = 1 - (1 - RSquare) * (n - 1) / (n - m - 1)
        F = (SSR / m) / (SST - SSR / (n - m - 1))  # F statistisc
        df = m - 1

        sigmaSquare = SSE / (n - m - 1)

        # standard error of estimated coefficients
        errCoeff = sqrt(sigmaSquare * inv(dot(X.T, X)).diagonal())

        # t statistisc, significance
        t = beta / errCoeff
        df = n - 2
        significance = []
        for tt in t:
            try:
                significance.append(
                    statc.betai(df * 0.5, 0.5, df / (df + tt * tt)))
            except:
                significance.append(1.0)

        # standardized coefficients
        if m > 0:
            stdCoeff = (sqrt(covX.diagonal()) / sigmaY) * beta
        else:
            stdCoeff = (sqrt(covX) / sigmaY) * beta

        model = {
            'descriptives': {
                'meanX': muX,
                'covX': covX,
                'meanY': muY,
                'sigmaY': sigmaY
            },
            'model': {
                'estCoeff': beta,
                'stdErrorEstimation': errCoeff
            },
            'model summary': {
                'TotalVar': SST,
                'ExplVar': SSE,
                'ResVar': SSR,
                'R': R,
                'RAdjusted': RAdjusted,
                'F': F,
                't': t,
                'sig': significance
            }
        }
        return LinearRegression(statistics=model,
                                domain=data.domain,
                                name=self.name,
                                beta0=self.beta0,
                                imputer=imputer)
Ejemplo n.º 20
0
 def __init__(self, model=None, **kwargs):
     self.model = orange.MajorityLearner() if model is None else model
Ejemplo n.º 21
0
class TestImputeByLearner(testing.PreprocessorTestCase):
    PREPROCESSOR = Preprocessor_imputeByLearner(
        learner=orange.MajorityLearner())
Ejemplo n.º 22
0
    correct = [0.0]*len(classifiers)
    for ex in test_data:
        for i in range(len(classifiers)):
            if classifiers[i](ex) == ex.getclass():
                correct[i] += 1
    for i in range(len(correct)):
        correct[i] = 100.0*correct[i] / len(test_data)
        
    return correct

# set up the classifiers
trainD = orange.ExampleTable(trainData)
devD = orange.ExampleTable(devData)
testD = orange.ExampleTable(testData)

majority = orange.MajorityLearner(trainD)
majority.name   = 'majority         '
print majority.name

##tree = orngTree.TreeLearner(trainD, measure='gainRatio', binarization=0, minSubset=5, minExamples=5, sameMajorityPruning=1, mForPruning=5);
##tree.name       = "tree - gainRatio "
##f = file(trainData+'o.txt.tree', 'w')
##f.write(orngTree.dumpTree(tree, leafStr='%V (%^.2m% = %.0M out of %.0N)'))
##f.close()
##print tree.name

##treeC45 = orange.C45Learner(trainD, minObjs=5)
##treeC45.name    = "tree - C45       "
##f = file(trainData+'o.txt.C45tree', 'w')
##dumpC45Tree(treeC45,f)
##f.close()
Ejemplo n.º 23
0
import sys
sys.path.append('../')
import database
import warnings
warnings.simplefilter("ignore")
import math
from queryconstructor import QueryConstructor
from plotconstructor import *
import orange
warnings.filterwarnings("ignore", "", orange.AttributeWarning)

learner_data = orange.ExampleTable("learner_data")
print "starting learning"
treeLearner = orange.TreeLearner(learner_data)
majorityLearner = orange.MajorityLearner(learner_data)
print "done with learning"
eval_data = orange.ExampleTable("eval_data")
total=0.0
total_lame=0.0
total_true=0.0
count=0.0

print "starting evaluation"    

for i in eval_data:
    
    actual = i.getclass()
    if (treeLearner(i)==actual):
        total+=1
    if (majorityLearner(i)==actual):
        total_lame+=1