def cf_run(learner, data, k_folds, name, widget=None): """Runs a classification filter :param learner: WekaClassifier :param data: Orange dataset :param k_folds: :param name: :param timeout: :param widget: :return: """ somelearner = learner print somelearner noisyIndices = [] selection = orange.MakeRandomIndicesCV(data, folds=k_folds) count_noisy = [0] * k_folds for test_fold in range(k_folds): # train_data = wsutil.client.arff_to_weka_instances(arff = train_arffstr, class_index = data.domain.index(data.domain.classVar))['instances'] train_data = convert_dataset_from_orange_to_scikit( data.select(selection, test_fold, negate=1)) test_inds = [ i for i in range(len(selection)) if selection[i] == test_fold ] # test_data = wsutil.client.arff_to_weka_instances(arff = test_arffstr, class_index = data.domain.index(data.domain.classVar))['instances'] test_data = convert_dataset_from_orange_to_scikit( data.select(selection, test_fold)) #print "\t\t", "Learned on", len(train_data), "examples" #file.flush() print "before cl build" # classifier = wseval.client.build_classifier(learner = somelearner, instances = train_data)['classifier'] learner.build_classifier(train_data) print "after cl build" # eval_test_data = wseval.client.apply_classifier(classifier = classifier, instances = test_data) scikit_dataset_predicted = learner.apply_classifier(test_data) print "after apply" for i in range(len(scikit_dataset_predicted.target)): #print "Test data length:", len(test_data), "Test inds length:", len(test_inds), "Eval Test data length:", len(eval_test_data) # print i, "v for zanki", eval_test_data[i]['classes'], data[test_inds[i]].getclass() # if eval_test_data[i]['classes'] != unicode(data[test_inds[i]].getclass()): if scikit_dataset_predicted.target[ i] != scikit_dataset_predicted.targetPredicted[i]: # selection_filter[int(example[meta_id].value)] = 0 noisyIndices.append(test_inds[i]) count_noisy[test_fold] += 1 # END test_data if not (widget is None): widget.progress = int((test_fold + 1) * 1.0 / k_folds * 100) widget.save() # END test_fold return {'inds': sorted(noisyIndices), 'name': get_weka_name(name)}
def cf_run(learner, data, k_folds, name, widget=None): """Runs a classification filter :param learner: WekaClassifier :param data: Orange dataset :param k_folds: :param name: :param timeout: :param widget: :return: """ somelearner = learner print somelearner noisyIndices = [] selection = orange.MakeRandomIndicesCV(data, folds=k_folds) count_noisy = [0]*k_folds for test_fold in range(k_folds): # train_data = wsutil.client.arff_to_weka_instances(arff = train_arffstr, class_index = data.domain.index(data.domain.classVar))['instances'] train_data = convert_dataset_from_orange_to_scikit( data.select(selection, test_fold, negate=1) ) test_inds = [i for i in range(len(selection)) if selection[i] == test_fold ] # test_data = wsutil.client.arff_to_weka_instances(arff = test_arffstr, class_index = data.domain.index(data.domain.classVar))['instances'] test_data = convert_dataset_from_orange_to_scikit( data.select(selection, test_fold) ) #print "\t\t", "Learned on", len(train_data), "examples" #file.flush() print "before cl build" # classifier = wseval.client.build_classifier(learner = somelearner, instances = train_data)['classifier'] learner.build_classifier(train_data) print "after cl build" # eval_test_data = wseval.client.apply_classifier(classifier = classifier, instances = test_data) scikit_dataset_predicted = learner.apply_classifier(test_data) print "after apply" for i in range(len(scikit_dataset_predicted.target)): #print "Test data length:", len(test_data), "Test inds length:", len(test_inds), "Eval Test data length:", len(eval_test_data) # print i, "v for zanki", eval_test_data[i]['classes'], data[test_inds[i]].getclass() # if eval_test_data[i]['classes'] != unicode(data[test_inds[i]].getclass()): if scikit_dataset_predicted.target[i] != scikit_dataset_predicted.targetPredicted[i]: # selection_filter[int(example[meta_id].value)] = 0 noisyIndices.append(test_inds[i]) count_noisy[test_fold] += 1 # END test_data if not(widget is None): widget.progress = int((test_fold+1)*1.0/k_folds*100) widget.save() # END test_fold return {'inds': sorted(noisyIndices), 'name': get_weka_name(name)}
def add_class_noise(input_dict): """Widget Add Class Noise """ data_scikit = input_dict['data'] if not(d.is_target_nominal(data_scikit)): raise Exception("Widget Add Class Noise accepts only datasets with nominal class!") data = u.convert_dataset_from_scikit_to_orange(data_scikit) import cf_noise_detection.noiseAlgorithms4lib as nalg noise_indices, orange_data = nalg.add_class_noise(data, input_dict['noise_level'], input_dict['rnd_seed']) data = u.convert_dataset_from_orange_to_scikit(orange_data) output_dict = {'noise_inds':noise_indices, 'noisy_data': data} return output_dict
def noiserank_select(postdata,input_dict, output_dict): try: output_dict['indices']= outselection = [int(i) for i in postdata['selected']] # data = input_dict['data'] data = u.convert_dataset_from_scikit_to_orange(input_dict['data']) selection = [0]*len(data) for i in outselection: selection[i] = 1 outdata = data.select(selection, 1) data_scikit = u.convert_dataset_from_orange_to_scikit(outdata) output_dict['selection'] = data_scikit except KeyError: output_dict['selection'] = None return output_dict
def add_class_noise(input_dict): """Widget Add Class Noise """ data_scikit = input_dict['data'] if not (d.is_target_nominal(data_scikit)): raise Exception( "Widget Add Class Noise accepts only datasets with nominal class!") data = u.convert_dataset_from_scikit_to_orange(data_scikit) import cf_noise_detection.noiseAlgorithms4lib as nalg noise_indices, orange_data = nalg.add_class_noise( data, input_dict['noise_level'], input_dict['rnd_seed']) data = u.convert_dataset_from_orange_to_scikit(orange_data) output_dict = {'noise_inds': noise_indices, 'noisy_data': data} return output_dict
def noiserank_select(postdata, input_dict, output_dict): try: output_dict['indices'] = outselection = [ int(i) for i in postdata['selected'] ] # data = input_dict['data'] data = u.convert_dataset_from_scikit_to_orange(input_dict['data']) selection = [0] * len(data) for i in outselection: selection[i] = 1 outdata = data.select(selection, 1) data_scikit = u.convert_dataset_from_orange_to_scikit(outdata) output_dict['selection'] = data_scikit except KeyError: output_dict['selection'] = None return output_dict