def cf_run(learner, data, k_folds, name, widget=None):
    """Runs a classification filter

    :param learner: WekaClassifier
    :param data: Orange dataset
    :param k_folds:
    :param name:
    :param timeout:
    :param widget:
    :return:
    """

    somelearner = learner
    print somelearner

    noisyIndices = []
    selection = orange.MakeRandomIndicesCV(data, folds=k_folds)
    count_noisy = [0] * k_folds
    for test_fold in range(k_folds):
        # train_data = wsutil.client.arff_to_weka_instances(arff = train_arffstr, class_index = data.domain.index(data.domain.classVar))['instances']
        train_data = convert_dataset_from_orange_to_scikit(
            data.select(selection, test_fold, negate=1))

        test_inds = [
            i for i in range(len(selection)) if selection[i] == test_fold
        ]
        # test_data = wsutil.client.arff_to_weka_instances(arff = test_arffstr, class_index = data.domain.index(data.domain.classVar))['instances']
        test_data = convert_dataset_from_orange_to_scikit(
            data.select(selection, test_fold))

        #print "\t\t", "Learned on", len(train_data), "examples"
        #file.flush()

        print "before cl build"
        # classifier = wseval.client.build_classifier(learner = somelearner, instances = train_data)['classifier']
        learner.build_classifier(train_data)
        print "after cl build"

        # eval_test_data = wseval.client.apply_classifier(classifier = classifier, instances = test_data)
        scikit_dataset_predicted = learner.apply_classifier(test_data)

        print "after apply"

        for i in range(len(scikit_dataset_predicted.target)):
            #print "Test data length:", len(test_data), "Test inds length:", len(test_inds), "Eval Test data length:", len(eval_test_data)
            # print i, "v for zanki", eval_test_data[i]['classes'], data[test_inds[i]].getclass()
            # if eval_test_data[i]['classes'] != unicode(data[test_inds[i]].getclass()):

            if scikit_dataset_predicted.target[
                    i] != scikit_dataset_predicted.targetPredicted[i]:
                # selection_filter[int(example[meta_id].value)] = 0
                noisyIndices.append(test_inds[i])
                count_noisy[test_fold] += 1
        # END test_data
        if not (widget is None):
            widget.progress = int((test_fold + 1) * 1.0 / k_folds * 100)
            widget.save()
    # END test_fold
    return {'inds': sorted(noisyIndices), 'name': get_weka_name(name)}
def cf_run(learner, data, k_folds, name, widget=None):
    """Runs a classification filter

    :param learner: WekaClassifier
    :param data: Orange dataset
    :param k_folds:
    :param name:
    :param timeout:
    :param widget:
    :return:
    """

    somelearner = learner
    print somelearner

    noisyIndices = []
    selection = orange.MakeRandomIndicesCV(data, folds=k_folds)
    count_noisy = [0]*k_folds
    for test_fold in range(k_folds):
        # train_data = wsutil.client.arff_to_weka_instances(arff = train_arffstr, class_index = data.domain.index(data.domain.classVar))['instances']
        train_data = convert_dataset_from_orange_to_scikit( data.select(selection, test_fold, negate=1) )

        test_inds = [i for i in range(len(selection)) if selection[i] == test_fold ]
        # test_data = wsutil.client.arff_to_weka_instances(arff = test_arffstr, class_index = data.domain.index(data.domain.classVar))['instances']
        test_data = convert_dataset_from_orange_to_scikit( data.select(selection, test_fold) )

        #print "\t\t", "Learned on", len(train_data), "examples"
        #file.flush()

        print "before cl build"
        # classifier = wseval.client.build_classifier(learner = somelearner, instances = train_data)['classifier']
        learner.build_classifier(train_data)
        print "after cl build"

        # eval_test_data = wseval.client.apply_classifier(classifier = classifier, instances = test_data)
        scikit_dataset_predicted = learner.apply_classifier(test_data)

        print "after apply"

        for i in range(len(scikit_dataset_predicted.target)):
            #print "Test data length:", len(test_data), "Test inds length:", len(test_inds), "Eval Test data length:", len(eval_test_data)  
            # print i, "v for zanki", eval_test_data[i]['classes'], data[test_inds[i]].getclass()
            # if eval_test_data[i]['classes'] != unicode(data[test_inds[i]].getclass()):

            if scikit_dataset_predicted.target[i] != scikit_dataset_predicted.targetPredicted[i]:
                # selection_filter[int(example[meta_id].value)] = 0
                noisyIndices.append(test_inds[i])
                count_noisy[test_fold] += 1
        # END test_data
        if not(widget is None):
            widget.progress = int((test_fold+1)*1.0/k_folds*100)
            widget.save()
    # END test_fold
    return {'inds': sorted(noisyIndices), 'name': get_weka_name(name)}
Example #3
0
def add_class_noise(input_dict):
    """Widget Add Class Noise
    """

    data_scikit = input_dict['data']
    if not(d.is_target_nominal(data_scikit)):
        raise Exception("Widget Add Class Noise accepts only datasets with nominal class!")

    data = u.convert_dataset_from_scikit_to_orange(data_scikit)

    import cf_noise_detection.noiseAlgorithms4lib as nalg
    noise_indices, orange_data = nalg.add_class_noise(data, input_dict['noise_level'], input_dict['rnd_seed'])

    data = u.convert_dataset_from_orange_to_scikit(orange_data)

    output_dict = {'noise_inds':noise_indices, 'noisy_data': data}

    return output_dict
Example #4
0
def noiserank_select(postdata,input_dict, output_dict):
    try:
        output_dict['indices']= outselection = [int(i) for i in postdata['selected']]

        # data = input_dict['data']
        data = u.convert_dataset_from_scikit_to_orange(input_dict['data'])

        selection = [0]*len(data)
        for i in outselection:
            selection[i] = 1
        outdata = data.select(selection, 1)

        data_scikit = u.convert_dataset_from_orange_to_scikit(outdata)

        output_dict['selection'] = data_scikit
    except KeyError:
        output_dict['selection'] = None

    return output_dict
Example #5
0
def add_class_noise(input_dict):
    """Widget Add Class Noise
    """

    data_scikit = input_dict['data']
    if not (d.is_target_nominal(data_scikit)):
        raise Exception(
            "Widget Add Class Noise accepts only datasets with nominal class!")

    data = u.convert_dataset_from_scikit_to_orange(data_scikit)

    import cf_noise_detection.noiseAlgorithms4lib as nalg
    noise_indices, orange_data = nalg.add_class_noise(
        data, input_dict['noise_level'], input_dict['rnd_seed'])

    data = u.convert_dataset_from_orange_to_scikit(orange_data)

    output_dict = {'noise_inds': noise_indices, 'noisy_data': data}

    return output_dict
Example #6
0
def noiserank_select(postdata, input_dict, output_dict):
    try:
        output_dict['indices'] = outselection = [
            int(i) for i in postdata['selected']
        ]

        # data = input_dict['data']
        data = u.convert_dataset_from_scikit_to_orange(input_dict['data'])

        selection = [0] * len(data)
        for i in outselection:
            selection[i] = 1
        outdata = data.select(selection, 1)

        data_scikit = u.convert_dataset_from_orange_to_scikit(outdata)

        output_dict['selection'] = data_scikit
    except KeyError:
        output_dict['selection'] = None

    return output_dict