def import_dataset_from_arff(arff, class_index=None): '''Imports Dataset From an ARFF Textual Format :param arff: the data in ARFF textual format :param classIndex: the index of the class attribute :return: a dataset (Bunch) ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() tmp = common.TemporaryFile(suffix='.arff') tmp.writeString(arff) source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')( tmp.name) instances = source.getDataSet() if class_index is None: print 'Warning: class is set to the last attribute!' class_index = instances.numAttributes() - 1 elif class_index == -1: class_index = instances.numAttributes() - 1 instances.setClassIndex(class_index) return convert_weka_instances_to_bunch(instances)
def weka_local_arff_to_weka_instances(input_dict): ''' Reads a dataset into a format suitable for WEKA methods ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() tmp = common.TemporaryFile(suffix='.arff') tmp.writeString(input_dict['arff']) try: class_index = int(input_dict['class_index']) except: class_index = None source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')( tmp.name) instances = source.getDataSet() if class_index is None: print 'Warning: class is set to the last attribute!' class_index = instances.numAttributes() - 1 elif class_index == -1: class_index = instances.numAttributes() - 1 instances.setClassIndex(class_index) return {'instances': common.serialize_weka_object(instances)}
def apply_mapped_classifier_get_instances(weka_classifier, original_data, data): '''An advanced version of the Apply Classifier method. Addresses incompatible training and test data, and returns a dataset with predictions. :param weka_classifier: WekaClassifier object :param original_data: original training instances, bunch :param data: test instances, bunch :return: Dataset (Bunch) object with predictions and a textual report from the InputMappedClassifier class ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: classifier = common.deserialize_weka_object( weka_classifier.sclassifier) except: raise Exception( "Only WEKA classifiers/models supported. Please provide a valid WEKA learner." ) original_training_instances = ut.convert_bunch_to_weka_instances( original_data) instances = ut.convert_bunch_to_weka_instances(data) # serialize classifier with original instances to a file once again for the Mapped classifier tfile = common.TemporaryFile(flags='wb+') s = jp.JClass('weka.core.SerializationHelper') s.writeAll(tfile.name, [classifier, original_training_instances]) # construct a MappedClassifier mapped_classifier = jp.JClass( 'weka.classifiers.misc.InputMappedClassifier')() mapped_classifier.setIgnoreCaseForNames(True) mapped_classifier.setTrim(True) # mapped_classifier.setSuppressMappingReport(True) # mc.setModelHeader(original_training_instances) mapped_classifier.setModelPath(tfile.name) predictions = [] try: for instance in instances: label = int(mapped_classifier.classifyInstance(instance)) predictions.append(label) data["targetPredicted"] = predictions except: raise Exception( "Classifier not built. Please use the Build Classifier widget first." ) report = mapped_classifier.toString() if MAPPING_REPORT_START in report: report = report[report.index(MAPPING_REPORT_START):] return data, report
def weka_local_apply_mapped_classifier_get_instances(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() MAPPING_REPORT_START = 'Attribute mappings:' classifier = common.deserialize_weka_object(input_dict['classifier']) original_training_instances = common.deserialize_weka_object( input_dict['original_training_instances']) instances = common.deserialize_weka_object(input_dict['instances']) # serialize classifier with original instances to a file once again for the Mapped classifier tfile = common.TemporaryFile(flags='wb+') s = jp.JClass('weka.core.SerializationHelper') s.writeAll(tfile.name, [classifier, original_training_instances]) # construct a MappedClassifier mappedClassifier = jp.JClass( 'weka.classifiers.misc.InputMappedClassifier')() mappedClassifier.setIgnoreCaseForNames(True) mappedClassifier.setTrim(True) #mappedClassifier.setSuppressMappingReport(True) #mc.setModelHeader(original_training_instances) mappedClassifier.setModelPath(tfile.name) # use the mapped classifier on new data classIndex = instances.classIndex() if classIndex == -1: raise ValueError('Class not set!') classAttribute = instances.classAttribute() for instance in instances: label = int(mappedClassifier.classifyInstance(instance)) instance.setClassValue(classAttribute.value(label)) report = mappedClassifier.toString() if MAPPING_REPORT_START in report: report = report[report.index(MAPPING_REPORT_START):] return { 'mapping_report': report, 'instances': common.serialize_weka_object(instances) }