Example #1
0
    def one_fold_with_indices(self,
                              learners,
                              examples,
                              fold,
                              indices,
                              preprocessors=(),
                              weight=0):
        """Perform one fold of cross-validation like procedure using provided indices."""
        learn_set = examples.selectref(indices, fold, negate=1)
        test_set = examples.selectref(indices, fold, negate=0)
        if len(learn_set) == 0 or len(test_set) == 0:
            return (), ()

        # learning
        learn_set, test_set = self._preprocess_data(learn_set, test_set,
                                                    preprocessors)
        #Add train bias to the lear_set
        if self.trainBias:
            learn_set = dataUtilities.concatenate([learn_set, self.trainBias],
                                                  True)[0]

        if not learn_set:
            raise SystemError("no training examples after preprocessing")
        if not test_set:
            raise SystemError("no test examples after preprocessing")

        classifiers = [learner(learn_set, weight) for learner in learners]

        # testing
        testset_ids = (i for i, _ in enumerate(examples) if indices[i] == fold)
        results = self._test_on_data(classifiers, test_set, testset_ids)

        return results, classifiers
Example #2
0
    def one_fold_with_indices(self, learners, examples, fold, indices, preprocessors=(), weight=0):
        """Perform one fold of cross-validation like procedure using provided indices."""
        learn_set = examples.selectref(indices, fold, negate=1)
        test_set = examples.selectref(indices, fold, negate=0)
        if len(learn_set)==0 or len(test_set)==0:
            return (), ()

        # learning
        learn_set, test_set = self._preprocess_data(learn_set, test_set, preprocessors)
        #Add train bias to the lear_set
        if self.trainBias:
            learn_set = dataUtilities.concatenate([learn_set, self.trainBias], True)[0]
            
        if not learn_set:
            raise SystemError("no training examples after preprocessing")
        if not test_set:
            raise SystemError("no test examples after preprocessing")

        classifiers = [learner(learn_set, weight) for learner in learners]

        # testing
        testset_ids = (i for i, _ in enumerate(examples) if indices[i] == fold)
        results = self._test_on_data(classifiers, test_set, testset_ids)

        return results, classifiers
Example #3
0
    def proportion_test(self, learners, data, learning_proportion, times=10,
                   stratification=Orange.core.MakeRandomIndices.StratifiedIfPossible, preprocessors=(), random_generator=0,
                   callback=None, store_classifiers=False, store_examples=False, testAttrFilter=None, testFilterVal=None):
        """
        Perform a test, where learners are trained and tested on different data sets. Training and test sets are
        generated by proportionally splitting data.

        :param learners: list of learners to be tested
        :param data: a dataset used for evaluation
        :param learning_proportion: proportion of examples to be used for training
        :param times: number of test repetitions
        :param stratification: use stratification when constructing train and test sets.
        :param preprocessors: a list of preprocessors to be used on data.
        :param callback: a function that is be called after each classifier is computed.
        :param store_classifiers: if True, classifiers will be accessible in test_results.
        :param store_examples: if True, examples will be accessible in test_results.
        :return: :obj:`ExperimentResults`
        """
        examples = self.getExamplesAndSetTrainBias(data, testAttrFilter, testFilterVal)

        pick = Orange.core.MakeRandomIndices2(stratified = stratification, p0 = learning_proportion, randomGenerator = random_generator)

        examples, weight = demangle_examples(examples)

        test_type = self.check_test_type(examples, learners)
        
        test_results = orngTest.ExperimentResults(times,
                                        classifierNames = [getobjectname(l) for l in learners],
                                        domain=examples.domain,
                                        test_type = test_type,
                                        weights=weight)
        test_results.classifiers = []
        offset=0
        for time in xrange(times):
            indices = pick(examples)
            learn_set = examples.selectref(indices, 0)
            test_set = examples.selectref(indices, 1)
            #Add train bias to the lear_set
            if self.trainBias:
                learn_set = dataUtilities.concatenate([learn_set, self.trainBias], True)[0]
            classifiers, results = self._learn_and_test_on_test_data(learners, learn_set, weight, test_set, preprocessors)
            if store_classifiers:
                test_results.classifiers.append(classifiers)

            test_results.results.extend(test_results.create_tested_example(time, example)
                                        for i, example in enumerate(test_set))
            for example, classifier, result in results:
                test_results.results[offset+example].set_result(classifier, *result)
            offset += len(test_set)

            if callback:
                callback()
        return test_results
Example #4
0
def getPvalue(train, predEx, label, method="avgNN", measure=None):
    """
    method; avgNN, scaledMinNN, minNN, kNNratio
    """

    # Set label to class of predEx
    newPredEx = Orange.data.Table(predEx.domain, [predEx])
    newPredEx[0][newPredEx.domain.classVar] = label

    # Add predEx to train, but use only the attributes of train!!
    extTrain = dataUtilities.concatenate([train, newPredEx], True)
    extTrain = extTrain[0]

    # Calculate a non-conf score for each ex in train + predEx with given label
    if method == "scaledMinNN":
        # Calculate average and std of min distanses in train set
        maxDistRatio = getMinDistRatio(train)
    nonConfList = []
    for idx in range(len(extTrain)):
        if method == "scaledMinNN":
            alpha = getScore(idx, extTrain, method, maxDistRatio)
        else:
            alpha = getScore(idx, extTrain, method, None, measure)
        nonConfList.append(alpha)
        #if idx == 1:
        #    print "Breaking after one ex!!"
        #    break

    nonConfListSorted = copy.deepcopy(nonConfList)
    nonConfListSorted.sort()
    fid = open("NonConf.txt", "w")
    for ex in nonConfListSorted:
        fid.write(str(ex) + "\n")
    fid.close()

    # The last non-conf score is that of predEx
    # The p-value is the fraction of ex with alpha gt that of predEx
    trainList = nonConfList[0:len(nonConfList) - 1]
    alphaPredEx = nonConfList[len(nonConfList) - 1]
    moreNonConfList = []
    for score in trainList:
        if score > alphaPredEx:
            moreNonConfList.append(score)
    pvalue = len(moreNonConfList) / float(len(trainList))

    return pvalue
Example #5
0
def getPvalue(train, predEx, label, method = "avgNN", measure = None):
    """
    method; avgNN, scaledMinNN, minNN, kNNratio
    """

    # Set label to class of predEx
    newPredEx = Orange.data.Table(predEx.domain, [predEx])
    newPredEx[0][newPredEx.domain.classVar] = label

    # Add predEx to train
    extTrain = dataUtilities.concatenate([train, newPredEx])
    extTrain = extTrain[0]

    # Calculate a non-conf score for each ex in train + predEx with given label
    if method == "scaledMinNN":
        # Calculate average and std of min distanses in train set
        maxDistRatio = getMinDistRatio(train)
    nonConfList = []
    for idx in range(len(extTrain)):
        if method == "scaledMinNN":
            alpha = getScore(idx, extTrain, method, maxDistRatio)
        else:
            alpha = getScore(idx, extTrain, method, None, measure)
        nonConfList.append(alpha)
        #if idx == 1: 
        #    print "Breaking after one ex!!"
        #    break

    nonConfListSorted = copy.deepcopy(nonConfList)
    nonConfListSorted.sort()
    fid = open("NonConf.txt", "w")
    for ex in nonConfListSorted:
        fid.write(str(ex)+"\n")
    fid.close()

    # The last non-conf score is that of predEx
    # The p-value is the fraction of ex with alpha gt that of predEx
    trainList = nonConfList[0:len(nonConfList)-1]
    alphaPredEx = nonConfList[len(nonConfList)-1] 
    moreNonConfList = []
    for score in trainList:
        if score > alphaPredEx:
            moreNonConfList.append(score)
    pvalue = len(moreNonConfList)/float(len(trainList))

    return pvalue
Example #6
0
def getPvalue(train, predEx, label, SVMparam, method="avgNN", measure=None):
    """
    method; avgNN, scaledMinNN, minNN, kNNratio
    """

    # Set label to class of predEx
    newPredEx = Orange.data.Table(predEx.domain, [predEx])
    newPredEx[0][newPredEx.domain.classVar] = label

    # Add predEx to train
    extTrain = dataUtilities.concatenate([train, newPredEx], True)
    extTrain = extTrain[0]

    # Calculate a non-conf score for each ex in train + predEx with given label
    if method == "scaledMinNN":
        # Calculate average and std of min distanses in train set
        maxDistRatio = getMinDistRatio(train)
    nonConfList = []
    nonConfListMondrian = []
    for idx in range(len(extTrain)):
        if method == "scaledMinNN":
            alpha = getScore(idx, extTrain, method, maxDistRatio)
        else:
            alpha, SVMparam = getScore(idx, extTrain, SVMparam, method, None,
                                       measure)
        nonConfList.append(alpha)
        if extTrain[idx].get_class().value == label:
            nonConfListMondrian.append(alpha)

    #nonConfListSorted = copy.deepcopy(nonConfList)
    #nonConfListSorted.sort()
    #nonConfListMondrianSorted = copy.deepcopy(nonConfListMondrian)
    #nonConfListMondrianSorted.sort()
    #fid = open("NonConf.txt", "w")
    #for ex in nonConfListSorted:
    #    fid.write(str(ex)+"\n")
    #fid.close()

    # The last non-conf score is that of predEx
    # The p-value is the fraction of ex with alpha gt that of predEx
    pvalue = getPvalueFromList(nonConfList)
    pvalueMondrian = getPvalueFromList(nonConfListMondrian)

    return pvalue, pvalueMondrian, SVMparam
Example #7
0
def getPvalue(train, predEx, label, SVMparam, method="avgNN", measure=None):
    """
    method; avgNN, scaledMinNN, minNN, kNNratio
    """

    # Set label to class of predEx
    newPredEx = Orange.data.Table(predEx.domain, [predEx])
    newPredEx[0][newPredEx.domain.classVar] = label

    # Add predEx to train
    extTrain = dataUtilities.concatenate([train, newPredEx])
    extTrain = extTrain[0]

    # Calculate a non-conf score for each ex in train + predEx with given label
    if method == "scaledMinNN":
        # Calculate average and std of min distanses in train set
        maxDistRatio = getMinDistRatio(train)
    nonConfList = []
    nonConfListMondrian = []
    for idx in range(len(extTrain)):
        if method == "scaledMinNN":
            alpha = getScore(idx, extTrain, method, maxDistRatio)
        else:
            alpha, SVMparam = getScore(idx, extTrain, SVMparam, method, None, measure)
        nonConfList.append(alpha)
        if extTrain[idx].get_class().value == label:
            nonConfListMondrian.append(alpha)

    # nonConfListSorted = copy.deepcopy(nonConfList)
    # nonConfListSorted.sort()
    # nonConfListMondrianSorted = copy.deepcopy(nonConfListMondrian)
    # nonConfListMondrianSorted.sort()
    # fid = open("NonConf.txt", "w")
    # for ex in nonConfListSorted:
    #    fid.write(str(ex)+"\n")
    # fid.close()

    # The last non-conf score is that of predEx
    # The p-value is the fraction of ex with alpha gt that of predEx
    pvalue = getPvalueFromList(nonConfList)
    pvalueMondrian = getPvalueFromList(nonConfListMondrian)

    return pvalue, pvalueMondrian, SVMparam
Example #8
0
    def proportion_test(
            self,
            learners,
            data,
            learning_proportion,
            times=10,
            stratification=Orange.core.MakeRandomIndices.StratifiedIfPossible,
            preprocessors=(),
            random_generator=0,
            callback=None,
            store_classifiers=False,
            store_examples=False,
            testAttrFilter=None,
            testFilterVal=None):
        """
        Perform a test, where learners are trained and tested on different data sets. Training and test sets are
        generated by proportionally splitting data.

        :param learners: list of learners to be tested
        :param data: a dataset used for evaluation
        :param learning_proportion: proportion of examples to be used for training
        :param times: number of test repetitions
        :param stratification: use stratification when constructing train and test sets.
        :param preprocessors: a list of preprocessors to be used on data.
        :param callback: a function that is be called after each classifier is computed.
        :param store_classifiers: if True, classifiers will be accessible in test_results.
        :param store_examples: if True, examples will be accessible in test_results.
        :return: :obj:`ExperimentResults`
        """
        examples = self.getExamplesAndSetTrainBias(data, testAttrFilter,
                                                   testFilterVal)

        pick = Orange.core.MakeRandomIndices2(stratified=stratification,
                                              p0=learning_proportion,
                                              randomGenerator=random_generator)

        examples, weight = demangle_examples(examples)

        test_type = self.check_test_type(examples, learners)

        test_results = orngTest.ExperimentResults(
            times,
            classifierNames=[getobjectname(l) for l in learners],
            domain=examples.domain,
            test_type=test_type,
            weights=weight)
        test_results.classifiers = []
        offset = 0
        for time in xrange(times):
            indices = pick(examples)
            learn_set = examples.selectref(indices, 0)
            test_set = examples.selectref(indices, 1)
            #Add train bias to the lear_set
            if self.trainBias:
                learn_set = dataUtilities.concatenate(
                    [learn_set, self.trainBias], True)[0]
            classifiers, results = self._learn_and_test_on_test_data(
                learners, learn_set, weight, test_set, preprocessors)
            if store_classifiers:
                test_results.classifiers.append(classifiers)

            test_results.results.extend(
                test_results.create_tested_example(time, example)
                for i, example in enumerate(test_set))
            for example, classifier, result in results:
                test_results.results[offset + example].set_result(
                    classifier, *result)
            offset += len(test_set)

            if callback:
                callback()
        return test_results
Example #9
0
    def apply(self):
        incompatibleAttr = []
        if not hasattr(dataUtilities,
                       "version") or (dataUtilities.version < self.minReqVer):
            QMessageBox.critical(
                None, "Concatenate widget",
                "This widget requires module dataUtilities version " +
                str(self.minReqVer) + "\nAborting Concatenation now!")
            self.error(
                0, "This widget requires module dataUtilities version " +
                str(self.minReqVer))
            self.infoStatus.setText(
                'This widget requires module dataUtilities version ' +
                str(self.minReqVer))
            self.send("Examples", None)
            return
        self.infoStatus.setText('')
        self.warning(0)
        self.error(0)
        if self.primary:
            if not self.additional:
                newTable = self.primary
            else:
                allDatasets = [self.primary] + [
                    table for table in self.additional.values()
                ]
                newTable, status = dataUtilities.concatenate(
                    allDatasets, useFirstAsLeader=True)
                for attr in status:
                    if attr not in incompatibleAttr:
                        incompatibleAttr.append(attr)
        else:
            if not self.additional:
                newTable = None
            else:
                newTable, status = dataUtilities.concatenate(
                    [table for table in self.additional.values()],
                    useFirstAsLeader=False,
                    mergeDomains=(self.mergeAttributes == 0))
                for attr in status:
                    if attr not in incompatibleAttr:
                        incompatibleAttr.append(attr)
        if not newTable:
            status = "No data"
        else:
            # Update Status
            status = "New concatenated domain attributes: " + str(
                len(newTable.domain.attributes)) + "\n"
            status = status + "New concatenated Data examples: " + str(
                len(newTable)) + "\n"
            status = status + "New concatenated class: " + str(
                newTable.domain.classVar) + "\n"
            if len(incompatibleAttr) > 0:
                self.warning(0, "Some imcompatible attributes were converted")
                self.attrsMsg = ""
                for attr in incompatibleAttr:
                    self.attrsMsg += "  " + attr + "      (used " + str(
                        newTable.domain[attr]).replace("'" + attr + "'",
                                                       "") + ")" + "\n"
                status = status + "Incompatible Attributes converted: " + str(
                    len(incompatibleAttr)) + "    (Check original datasets)\n"
                self.viewBT.setDisabled(False)
            else:
                self.viewBT.setDisabled(True)
                self.attrsMsg = ""
        self.infoStatus.setText(status)

        self.send("Examples", newTable)
Example #10
0
    def apply(self):
        incompatibleAttr = []
        if not hasattr(dataUtilities, "version") or (dataUtilities.version < self.minReqVer):
            QMessageBox.critical(
                None,
                "Concatenate widget",
                "This widget requires module dataUtilities version "
                + str(self.minReqVer)
                + "\nAborting Concatenation now!",
            )
            self.error(0, "This widget requires module dataUtilities version " + str(self.minReqVer))
            self.infoStatus.setText("This widget requires module dataUtilities version " + str(self.minReqVer))
            self.send("Examples", None)
            return
        self.infoStatus.setText("")
        self.warning(0)
        self.error(0)
        if self.primary:
            if not self.additional:
                newTable = self.primary
            else:
                allDatasets = [self.primary] + [table for table in self.additional.values()]
                newTable, status = dataUtilities.concatenate(allDatasets, useFirstAsLeader=True)
                for attr in status:
                    if attr not in incompatibleAttr:
                        incompatibleAttr.append(attr)
        else:
            if not self.additional:
                newTable = None
            else:
                newTable, status = dataUtilities.concatenate(
                    [table for table in self.additional.values()],
                    useFirstAsLeader=False,
                    mergeDomains=(self.mergeAttributes == 0),
                )
                for attr in status:
                    if attr not in incompatibleAttr:
                        incompatibleAttr.append(attr)
        if not newTable:
            status = "No data"
        else:
            # Update Status
            status = "New concatenated domain attributes: " + str(len(newTable.domain.attributes)) + "\n"
            status = status + "New concatenated Data examples: " + str(len(newTable)) + "\n"
            status = status + "New concatenated class: " + str(newTable.domain.classVar) + "\n"
            if len(incompatibleAttr) > 0:
                self.warning(0, "Some imcompatible attributes were converted")
                self.attrsMsg = ""
                for attr in incompatibleAttr:
                    self.attrsMsg += (
                        "  "
                        + attr
                        + "      (used "
                        + str(newTable.domain[attr]).replace("'" + attr + "'", "")
                        + ")"
                        + "\n"
                    )
                status = (
                    status
                    + "Incompatible Attributes converted: "
                    + str(len(incompatibleAttr))
                    + "    (Check original datasets)\n"
                )
                self.viewBT.setDisabled(False)
            else:
                self.viewBT.setDisabled(True)
                self.attrsMsg = ""
        self.infoStatus.setText(status)

        self.send("Examples", newTable)