Ejemplo n.º 1
0
 def makeLearner(self):
     return orngBayes.BayesLearner()
Ejemplo n.º 2
0
import orange, orngTest, orngStat, orngBayes
data = orange.ExampleTable("lung-cancer")
bayes = orngBayes.BayesLearner()
bayes_m = orngBayes.BayesLearner(m=2)

res = orngTest.crossValidation([bayes, bayes_m], data)
CAs = orngStat.CA(res)
print
print "Without m: %5.3f" % CAs[0]
print "With m=2: %5.3f" % CAs[1]

data = orange.ExampleTable("voting")
model = orngBayes.BayesLearner(data)
orngBayes.printModel(model)
Ejemplo n.º 3
0
    def train(self, keyword, learner="svm"):
        training_docs, test_docs, train_label, test_label = self.get_training_test_sets(
            keyword, 0.8)

        if (len(train_label) == 0):
            return None

        F_train = []
        print "--------------TRAIN:", keyword, "-------------------"
        for i, doc_i in enumerate(training_docs):
            #print self.documents[i]
            myfeatures = self.get_features(keyword, self.documents[doc_i])
            if (not '1' in myfeatures):
                continue

            myfeatures.append(str(int(train_label[i])))
            F_train.append(myfeatures)

        if (len(F_train) == 0):
            return None

        #create the attributes and domain
        table = orange.ExampleTable(self.get_domain())

        #define the rest of the table by addign elements to it
        for i in range(len(F_train)):
            #print self.known_objects
            #print "i=", i
            #print "ftrain[i]", zip(self.known_objects, F_train[i]),
            #print " label[i]", train_label[i]
            #F_train[i].append(str(int(train_label[i])))
            table.append(F_train[i])

        #perform the learning

        if (learner == "bayes"):
            print "running bayes"
            classifier = orngBayes.BayesLearner(table)
            #classifier = orngBayes.BayesLearner(table, m=2)
        elif (learner == "tree"):
            print "running tree"
            classifier = orngTree.TreeLearner(table)
        elif (learner == "svm"):
            #can't load the svmlearner
            print "running svm"
            classifier = orngSVM.SVMLearner(table,
                                            svm_type=orange.SVMLearner.Nu_SVC,
                                            nu=0.3,
                                            probability=True)
        elif (learner == "boosting"):
            #problem here too
            #this is meant to be adaboost
            classifier = orngTree.BoostedLearner(table)
        elif (learner == "randomforest"):
            #problem here too
            classifier = orngEnsemble.RandomForestLearner(table,
                                                          trees=50,
                                                          name="forest")

        else:
            print "unknown learner"
            raise

        return classifier
    def train(self):
        F_train_obs = []
        F_train_trans = []
        print "--------------creating tables-------------------"
        for i, obs in enumerate(self.dataset.observations):
            print i, "of", len(self.dataset.observations)

            f_obs, f_trans = self.dataset.to_orange_entries(obs)

            for i, fs in enumerate(f_obs):
                print i, f_obs[0]
                F_train_obs.append(fs)

            for fs in f_trans:
                F_train_trans.append(fs)

        if (len(F_train_obs) == 0):
            return None

        print ">> adding elements to tables"
        #create the attributes and domain
        #define the rest of the table by addign elements to it
        table_obs = orange.ExampleTable(self.get_domain_obs())

        for i in range(len(F_train_obs)):
            table_obs.append(F_train_obs[i])

        table_trans = orange.ExampleTable(self.get_domain_trans())
        for i in range(len(F_train_trans)):
            table_trans.append(F_train_trans[i])

        #perform the learning
        print "training"
        if (self.learner == "bayes"):
            print "training bayes obs"
            self.classifier_obs = orngBayes.BayesLearner(table_obs)
            print "training bayes trans"
            self.classifier_trans = orngBayes.BayesLearner(table_trans)
        elif (self.learner == "tree"):
            print "running tree"
            self.classifier_obs = orngTree.TreeLearner(table_obs)
            self.classifier_trans = orngTree.TreeLearner(table_trans)
        elif (self.learner == "svm"):
            #can't load the svmlearner
            print "trianing observation svm"
            self.classifier_obs = orngSVM.SVMLearner(
                table_obs,
                svm_type=orange.SVMLearner.Nu_SVC,
                nu=0.3,
                probability=True)

            print "trianing transition svm"
            self.classifier_trans = orngSVM.SVMLearner(
                table_trans,
                svm_type=orange.SVMLearner.Nu_SVC,
                nu=0.3,
                probability=True)
        elif (self.learner == "boosting"):
            #problem here too
            #this is meant to be adaboost
            self.classifier_obs = orngTree.BoostedLearner(table_obs)
            self.classifier_trans = orngTree.BoostedLearner(table_trans)
        elif (self.learner == "randomforest"):
            #problem here too
            self.classifier_obs = orngEnsemble.RandomForestLearner(
                table_obs, trees=50, name="forest")
            self.classifier_trans = orngEnsemble.RandomForestLearner(
                table_trans, trees=50, name="forest")
        else:
            print "unknown learner"
            raise

        return self.classifier_obs, self.classifier_trans
Ejemplo n.º 5
0
 def setUp(self):
     import orngBayes
     self.learner = orngBayes.BayesLearner()
Ejemplo n.º 6
0
 def makeLearner(self):
     treeLearner = orngTree.TreeLearner(storeExamples=True)
     bayesLearner = orngBayes.BayesLearner()
     return bayesLearner
Ejemplo n.º 7
0
def ablateFeaturesForCls(engineCls):
    mpl.figure()
    trainer = Trainer()
    engine = engineCls()
    trainer.configureClassifier(engine)
    markers = [
        '.',
        ',',
        'v',
        '^',
        '<',
        '>',
        '1',
        '2',
        '3',
        '4',
        's',
        'p',
        '*',
        'h',
        'H',
    ]
    colors = ["b", "g", "r", "c", "m", "y"]

    sub_engines = []
    for i, name in enumerate(sorted(engine.masterList)):
        sub_engine = engineCls()
        sub_engine.setFeatureList([name])
        sub_engines.append((name, sub_engine))

    markers = markers[0:len(sub_engines)]
    colors = colors[0:len(sub_engines)]
    sub_engines.append(("all", engineCls()))
    markers.append("o")
    colors.append("k")

    for i, (name, sub_engine) in enumerate(sub_engines):
        table = trainer.configureClassifier(sub_engine)
        cv_indices = orange.MakeRandomIndices2(table, p0=0.75)

        training = table.select(cv_indices, 0, negate=True)
        testing = table.select(cv_indices, 0, negate=False)

        #classifier = orange.LogRegLearner(training)
        classifier = orngBayes.BayesLearner(training)
        results = orngTest.testOnData([classifier], testing)
        displayResults(results)

        line = rocCurve(
            results,
            "",
            stepSize=0.001,
            marker=markers[i % len(markers)],
            plotArgs=dict(linewidth=5,
                          markersize=10,
                          color=colors[i % len(colors)]),
        )

        line[0].set_label(name)

    mpl.title(engine.name(), size=30)
    mpl.xlabel("FP", fontsize=30)
    mpl.ylabel("TP", fontsize=30)
    mpl.xticks([0, 1], fontsize=17)
    mpl.yticks([0, 1], fontsize=17)
    mpl.subplots_adjust(bottom=0.14, top=0.91)
    mpl.legend(loc="lower right", prop=dict(size=17))
    mpl.savefig("roc.ablate.%s.png" % engine.name())