예제 #1
0
def import_dataset_from_arff(arff, class_index=None):
    '''Imports Dataset From an ARFF Textual Format

    :param arff: the data in ARFF textual format
    :param classIndex: the index of the class attribute
    :return: a dataset (Bunch)
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    tmp = common.TemporaryFile(suffix='.arff')
    tmp.writeString(arff)

    source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')(
        tmp.name)
    instances = source.getDataSet()

    if class_index is None:
        print 'Warning: class is set to the last attribute!'
        class_index = instances.numAttributes() - 1
    elif class_index == -1:
        class_index = instances.numAttributes() - 1

    instances.setClassIndex(class_index)
    return convert_weka_instances_to_bunch(instances)
예제 #2
0
def weka_local_arff_to_weka_instances(input_dict):
    '''
    Reads a dataset into a format suitable for WEKA methods
    '''

    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    tmp = common.TemporaryFile(suffix='.arff')
    tmp.writeString(input_dict['arff'])

    try:
        class_index = int(input_dict['class_index'])
    except:
        class_index = None

    source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')(
        tmp.name)
    instances = source.getDataSet()

    if class_index is None:
        print 'Warning: class is set to the last attribute!'
        class_index = instances.numAttributes() - 1
    elif class_index == -1:
        class_index = instances.numAttributes() - 1

    instances.setClassIndex(class_index)

    return {'instances': common.serialize_weka_object(instances)}
예제 #3
0
def apply_mapped_classifier_get_instances(weka_classifier, original_data,
                                          data):
    '''An advanced version of the Apply Classifier method.
    Addresses incompatible training and test data, and returns a dataset with predictions.

    :param weka_classifier: WekaClassifier object
    :param original_data: original training instances, bunch
    :param data: test instances, bunch
    :return: Dataset (Bunch) object with predictions and a textual report from the InputMappedClassifier class
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    try:
        classifier = common.deserialize_weka_object(
            weka_classifier.sclassifier)
    except:
        raise Exception(
            "Only WEKA classifiers/models supported. Please provide a valid WEKA learner."
        )

    original_training_instances = ut.convert_bunch_to_weka_instances(
        original_data)
    instances = ut.convert_bunch_to_weka_instances(data)

    # serialize classifier with original instances to a file once again for the Mapped classifier
    tfile = common.TemporaryFile(flags='wb+')
    s = jp.JClass('weka.core.SerializationHelper')
    s.writeAll(tfile.name, [classifier, original_training_instances])

    # construct a MappedClassifier
    mapped_classifier = jp.JClass(
        'weka.classifiers.misc.InputMappedClassifier')()
    mapped_classifier.setIgnoreCaseForNames(True)
    mapped_classifier.setTrim(True)
    # mapped_classifier.setSuppressMappingReport(True)
    # mc.setModelHeader(original_training_instances)
    mapped_classifier.setModelPath(tfile.name)

    predictions = []
    try:
        for instance in instances:
            label = int(mapped_classifier.classifyInstance(instance))
            predictions.append(label)

        data["targetPredicted"] = predictions
    except:
        raise Exception(
            "Classifier not built. Please use the Build Classifier widget first."
        )

    report = mapped_classifier.toString()
    if MAPPING_REPORT_START in report:
        report = report[report.index(MAPPING_REPORT_START):]

    return data, report
예제 #4
0
def weka_local_apply_mapped_classifier_get_instances(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    MAPPING_REPORT_START = 'Attribute mappings:'

    classifier = common.deserialize_weka_object(input_dict['classifier'])
    original_training_instances = common.deserialize_weka_object(
        input_dict['original_training_instances'])
    instances = common.deserialize_weka_object(input_dict['instances'])

    # serialize classifier with original instances to a file once again for the Mapped classifier
    tfile = common.TemporaryFile(flags='wb+')
    s = jp.JClass('weka.core.SerializationHelper')
    s.writeAll(tfile.name, [classifier, original_training_instances])

    # construct a MappedClassifier
    mappedClassifier = jp.JClass(
        'weka.classifiers.misc.InputMappedClassifier')()
    mappedClassifier.setIgnoreCaseForNames(True)
    mappedClassifier.setTrim(True)
    #mappedClassifier.setSuppressMappingReport(True)
    #mc.setModelHeader(original_training_instances)
    mappedClassifier.setModelPath(tfile.name)

    # use the mapped classifier on new data
    classIndex = instances.classIndex()
    if classIndex == -1:
        raise ValueError('Class not set!')
    classAttribute = instances.classAttribute()
    for instance in instances:
        label = int(mappedClassifier.classifyInstance(instance))
        instance.setClassValue(classAttribute.value(label))

    report = mappedClassifier.toString()
    if MAPPING_REPORT_START in report:
        report = report[report.index(MAPPING_REPORT_START):]

    return {
        'mapping_report': report,
        'instances': common.serialize_weka_object(instances)
    }