def one_fold_with_indices(self, learners, examples, fold, indices, preprocessors=(), weight=0): """Perform one fold of cross-validation like procedure using provided indices.""" learn_set = examples.selectref(indices, fold, negate=1) test_set = examples.selectref(indices, fold, negate=0) if len(learn_set) == 0 or len(test_set) == 0: return (), () # learning learn_set, test_set = self._preprocess_data(learn_set, test_set, preprocessors) #Add train bias to the lear_set if self.trainBias: learn_set = dataUtilities.concatenate([learn_set, self.trainBias], True)[0] if not learn_set: raise SystemError("no training examples after preprocessing") if not test_set: raise SystemError("no test examples after preprocessing") classifiers = [learner(learn_set, weight) for learner in learners] # testing testset_ids = (i for i, _ in enumerate(examples) if indices[i] == fold) results = self._test_on_data(classifiers, test_set, testset_ids) return results, classifiers
def one_fold_with_indices(self, learners, examples, fold, indices, preprocessors=(), weight=0): """Perform one fold of cross-validation like procedure using provided indices.""" learn_set = examples.selectref(indices, fold, negate=1) test_set = examples.selectref(indices, fold, negate=0) if len(learn_set)==0 or len(test_set)==0: return (), () # learning learn_set, test_set = self._preprocess_data(learn_set, test_set, preprocessors) #Add train bias to the lear_set if self.trainBias: learn_set = dataUtilities.concatenate([learn_set, self.trainBias], True)[0] if not learn_set: raise SystemError("no training examples after preprocessing") if not test_set: raise SystemError("no test examples after preprocessing") classifiers = [learner(learn_set, weight) for learner in learners] # testing testset_ids = (i for i, _ in enumerate(examples) if indices[i] == fold) results = self._test_on_data(classifiers, test_set, testset_ids) return results, classifiers
def proportion_test(self, learners, data, learning_proportion, times=10, stratification=Orange.core.MakeRandomIndices.StratifiedIfPossible, preprocessors=(), random_generator=0, callback=None, store_classifiers=False, store_examples=False, testAttrFilter=None, testFilterVal=None): """ Perform a test, where learners are trained and tested on different data sets. Training and test sets are generated by proportionally splitting data. :param learners: list of learners to be tested :param data: a dataset used for evaluation :param learning_proportion: proportion of examples to be used for training :param times: number of test repetitions :param stratification: use stratification when constructing train and test sets. :param preprocessors: a list of preprocessors to be used on data. :param callback: a function that is be called after each classifier is computed. :param store_classifiers: if True, classifiers will be accessible in test_results. :param store_examples: if True, examples will be accessible in test_results. :return: :obj:`ExperimentResults` """ examples = self.getExamplesAndSetTrainBias(data, testAttrFilter, testFilterVal) pick = Orange.core.MakeRandomIndices2(stratified = stratification, p0 = learning_proportion, randomGenerator = random_generator) examples, weight = demangle_examples(examples) test_type = self.check_test_type(examples, learners) test_results = orngTest.ExperimentResults(times, classifierNames = [getobjectname(l) for l in learners], domain=examples.domain, test_type = test_type, weights=weight) test_results.classifiers = [] offset=0 for time in xrange(times): indices = pick(examples) learn_set = examples.selectref(indices, 0) test_set = examples.selectref(indices, 1) #Add train bias to the lear_set if self.trainBias: learn_set = dataUtilities.concatenate([learn_set, self.trainBias], True)[0] classifiers, results = self._learn_and_test_on_test_data(learners, learn_set, weight, test_set, preprocessors) if store_classifiers: test_results.classifiers.append(classifiers) test_results.results.extend(test_results.create_tested_example(time, example) for i, example in enumerate(test_set)) for example, classifier, result in results: test_results.results[offset+example].set_result(classifier, *result) offset += len(test_set) if callback: callback() return test_results
def getPvalue(train, predEx, label, method="avgNN", measure=None): """ method; avgNN, scaledMinNN, minNN, kNNratio """ # Set label to class of predEx newPredEx = Orange.data.Table(predEx.domain, [predEx]) newPredEx[0][newPredEx.domain.classVar] = label # Add predEx to train, but use only the attributes of train!! extTrain = dataUtilities.concatenate([train, newPredEx], True) extTrain = extTrain[0] # Calculate a non-conf score for each ex in train + predEx with given label if method == "scaledMinNN": # Calculate average and std of min distanses in train set maxDistRatio = getMinDistRatio(train) nonConfList = [] for idx in range(len(extTrain)): if method == "scaledMinNN": alpha = getScore(idx, extTrain, method, maxDistRatio) else: alpha = getScore(idx, extTrain, method, None, measure) nonConfList.append(alpha) #if idx == 1: # print "Breaking after one ex!!" # break nonConfListSorted = copy.deepcopy(nonConfList) nonConfListSorted.sort() fid = open("NonConf.txt", "w") for ex in nonConfListSorted: fid.write(str(ex) + "\n") fid.close() # The last non-conf score is that of predEx # The p-value is the fraction of ex with alpha gt that of predEx trainList = nonConfList[0:len(nonConfList) - 1] alphaPredEx = nonConfList[len(nonConfList) - 1] moreNonConfList = [] for score in trainList: if score > alphaPredEx: moreNonConfList.append(score) pvalue = len(moreNonConfList) / float(len(trainList)) return pvalue
def getPvalue(train, predEx, label, method = "avgNN", measure = None): """ method; avgNN, scaledMinNN, minNN, kNNratio """ # Set label to class of predEx newPredEx = Orange.data.Table(predEx.domain, [predEx]) newPredEx[0][newPredEx.domain.classVar] = label # Add predEx to train extTrain = dataUtilities.concatenate([train, newPredEx]) extTrain = extTrain[0] # Calculate a non-conf score for each ex in train + predEx with given label if method == "scaledMinNN": # Calculate average and std of min distanses in train set maxDistRatio = getMinDistRatio(train) nonConfList = [] for idx in range(len(extTrain)): if method == "scaledMinNN": alpha = getScore(idx, extTrain, method, maxDistRatio) else: alpha = getScore(idx, extTrain, method, None, measure) nonConfList.append(alpha) #if idx == 1: # print "Breaking after one ex!!" # break nonConfListSorted = copy.deepcopy(nonConfList) nonConfListSorted.sort() fid = open("NonConf.txt", "w") for ex in nonConfListSorted: fid.write(str(ex)+"\n") fid.close() # The last non-conf score is that of predEx # The p-value is the fraction of ex with alpha gt that of predEx trainList = nonConfList[0:len(nonConfList)-1] alphaPredEx = nonConfList[len(nonConfList)-1] moreNonConfList = [] for score in trainList: if score > alphaPredEx: moreNonConfList.append(score) pvalue = len(moreNonConfList)/float(len(trainList)) return pvalue
def getPvalue(train, predEx, label, SVMparam, method="avgNN", measure=None): """ method; avgNN, scaledMinNN, minNN, kNNratio """ # Set label to class of predEx newPredEx = Orange.data.Table(predEx.domain, [predEx]) newPredEx[0][newPredEx.domain.classVar] = label # Add predEx to train extTrain = dataUtilities.concatenate([train, newPredEx], True) extTrain = extTrain[0] # Calculate a non-conf score for each ex in train + predEx with given label if method == "scaledMinNN": # Calculate average and std of min distanses in train set maxDistRatio = getMinDistRatio(train) nonConfList = [] nonConfListMondrian = [] for idx in range(len(extTrain)): if method == "scaledMinNN": alpha = getScore(idx, extTrain, method, maxDistRatio) else: alpha, SVMparam = getScore(idx, extTrain, SVMparam, method, None, measure) nonConfList.append(alpha) if extTrain[idx].get_class().value == label: nonConfListMondrian.append(alpha) #nonConfListSorted = copy.deepcopy(nonConfList) #nonConfListSorted.sort() #nonConfListMondrianSorted = copy.deepcopy(nonConfListMondrian) #nonConfListMondrianSorted.sort() #fid = open("NonConf.txt", "w") #for ex in nonConfListSorted: # fid.write(str(ex)+"\n") #fid.close() # The last non-conf score is that of predEx # The p-value is the fraction of ex with alpha gt that of predEx pvalue = getPvalueFromList(nonConfList) pvalueMondrian = getPvalueFromList(nonConfListMondrian) return pvalue, pvalueMondrian, SVMparam
def getPvalue(train, predEx, label, SVMparam, method="avgNN", measure=None): """ method; avgNN, scaledMinNN, minNN, kNNratio """ # Set label to class of predEx newPredEx = Orange.data.Table(predEx.domain, [predEx]) newPredEx[0][newPredEx.domain.classVar] = label # Add predEx to train extTrain = dataUtilities.concatenate([train, newPredEx]) extTrain = extTrain[0] # Calculate a non-conf score for each ex in train + predEx with given label if method == "scaledMinNN": # Calculate average and std of min distanses in train set maxDistRatio = getMinDistRatio(train) nonConfList = [] nonConfListMondrian = [] for idx in range(len(extTrain)): if method == "scaledMinNN": alpha = getScore(idx, extTrain, method, maxDistRatio) else: alpha, SVMparam = getScore(idx, extTrain, SVMparam, method, None, measure) nonConfList.append(alpha) if extTrain[idx].get_class().value == label: nonConfListMondrian.append(alpha) # nonConfListSorted = copy.deepcopy(nonConfList) # nonConfListSorted.sort() # nonConfListMondrianSorted = copy.deepcopy(nonConfListMondrian) # nonConfListMondrianSorted.sort() # fid = open("NonConf.txt", "w") # for ex in nonConfListSorted: # fid.write(str(ex)+"\n") # fid.close() # The last non-conf score is that of predEx # The p-value is the fraction of ex with alpha gt that of predEx pvalue = getPvalueFromList(nonConfList) pvalueMondrian = getPvalueFromList(nonConfListMondrian) return pvalue, pvalueMondrian, SVMparam
def proportion_test( self, learners, data, learning_proportion, times=10, stratification=Orange.core.MakeRandomIndices.StratifiedIfPossible, preprocessors=(), random_generator=0, callback=None, store_classifiers=False, store_examples=False, testAttrFilter=None, testFilterVal=None): """ Perform a test, where learners are trained and tested on different data sets. Training and test sets are generated by proportionally splitting data. :param learners: list of learners to be tested :param data: a dataset used for evaluation :param learning_proportion: proportion of examples to be used for training :param times: number of test repetitions :param stratification: use stratification when constructing train and test sets. :param preprocessors: a list of preprocessors to be used on data. :param callback: a function that is be called after each classifier is computed. :param store_classifiers: if True, classifiers will be accessible in test_results. :param store_examples: if True, examples will be accessible in test_results. :return: :obj:`ExperimentResults` """ examples = self.getExamplesAndSetTrainBias(data, testAttrFilter, testFilterVal) pick = Orange.core.MakeRandomIndices2(stratified=stratification, p0=learning_proportion, randomGenerator=random_generator) examples, weight = demangle_examples(examples) test_type = self.check_test_type(examples, learners) test_results = orngTest.ExperimentResults( times, classifierNames=[getobjectname(l) for l in learners], domain=examples.domain, test_type=test_type, weights=weight) test_results.classifiers = [] offset = 0 for time in xrange(times): indices = pick(examples) learn_set = examples.selectref(indices, 0) test_set = examples.selectref(indices, 1) #Add train bias to the lear_set if self.trainBias: learn_set = dataUtilities.concatenate( [learn_set, self.trainBias], True)[0] classifiers, results = self._learn_and_test_on_test_data( learners, learn_set, weight, test_set, preprocessors) if store_classifiers: test_results.classifiers.append(classifiers) test_results.results.extend( test_results.create_tested_example(time, example) for i, example in enumerate(test_set)) for example, classifier, result in results: test_results.results[offset + example].set_result( classifier, *result) offset += len(test_set) if callback: callback() return test_results
def apply(self): incompatibleAttr = [] if not hasattr(dataUtilities, "version") or (dataUtilities.version < self.minReqVer): QMessageBox.critical( None, "Concatenate widget", "This widget requires module dataUtilities version " + str(self.minReqVer) + "\nAborting Concatenation now!") self.error( 0, "This widget requires module dataUtilities version " + str(self.minReqVer)) self.infoStatus.setText( 'This widget requires module dataUtilities version ' + str(self.minReqVer)) self.send("Examples", None) return self.infoStatus.setText('') self.warning(0) self.error(0) if self.primary: if not self.additional: newTable = self.primary else: allDatasets = [self.primary] + [ table for table in self.additional.values() ] newTable, status = dataUtilities.concatenate( allDatasets, useFirstAsLeader=True) for attr in status: if attr not in incompatibleAttr: incompatibleAttr.append(attr) else: if not self.additional: newTable = None else: newTable, status = dataUtilities.concatenate( [table for table in self.additional.values()], useFirstAsLeader=False, mergeDomains=(self.mergeAttributes == 0)) for attr in status: if attr not in incompatibleAttr: incompatibleAttr.append(attr) if not newTable: status = "No data" else: # Update Status status = "New concatenated domain attributes: " + str( len(newTable.domain.attributes)) + "\n" status = status + "New concatenated Data examples: " + str( len(newTable)) + "\n" status = status + "New concatenated class: " + str( newTable.domain.classVar) + "\n" if len(incompatibleAttr) > 0: self.warning(0, "Some imcompatible attributes were converted") self.attrsMsg = "" for attr in incompatibleAttr: self.attrsMsg += " " + attr + " (used " + str( newTable.domain[attr]).replace("'" + attr + "'", "") + ")" + "\n" status = status + "Incompatible Attributes converted: " + str( len(incompatibleAttr)) + " (Check original datasets)\n" self.viewBT.setDisabled(False) else: self.viewBT.setDisabled(True) self.attrsMsg = "" self.infoStatus.setText(status) self.send("Examples", newTable)
def apply(self): incompatibleAttr = [] if not hasattr(dataUtilities, "version") or (dataUtilities.version < self.minReqVer): QMessageBox.critical( None, "Concatenate widget", "This widget requires module dataUtilities version " + str(self.minReqVer) + "\nAborting Concatenation now!", ) self.error(0, "This widget requires module dataUtilities version " + str(self.minReqVer)) self.infoStatus.setText("This widget requires module dataUtilities version " + str(self.minReqVer)) self.send("Examples", None) return self.infoStatus.setText("") self.warning(0) self.error(0) if self.primary: if not self.additional: newTable = self.primary else: allDatasets = [self.primary] + [table for table in self.additional.values()] newTable, status = dataUtilities.concatenate(allDatasets, useFirstAsLeader=True) for attr in status: if attr not in incompatibleAttr: incompatibleAttr.append(attr) else: if not self.additional: newTable = None else: newTable, status = dataUtilities.concatenate( [table for table in self.additional.values()], useFirstAsLeader=False, mergeDomains=(self.mergeAttributes == 0), ) for attr in status: if attr not in incompatibleAttr: incompatibleAttr.append(attr) if not newTable: status = "No data" else: # Update Status status = "New concatenated domain attributes: " + str(len(newTable.domain.attributes)) + "\n" status = status + "New concatenated Data examples: " + str(len(newTable)) + "\n" status = status + "New concatenated class: " + str(newTable.domain.classVar) + "\n" if len(incompatibleAttr) > 0: self.warning(0, "Some imcompatible attributes were converted") self.attrsMsg = "" for attr in incompatibleAttr: self.attrsMsg += ( " " + attr + " (used " + str(newTable.domain[attr]).replace("'" + attr + "'", "") + ")" + "\n" ) status = ( status + "Incompatible Attributes converted: " + str(len(incompatibleAttr)) + " (Check original datasets)\n" ) self.viewBT.setDisabled(False) else: self.viewBT.setDisabled(True) self.attrsMsg = "" self.infoStatus.setText(status) self.send("Examples", newTable)