def weka_local_generic_learner(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass(input_dict['weka_class'])() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'Generic_Weka_learner': sclassifier}
def weka_local_libsvm(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.functions.LibSVM')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'LibSVM_learner': sclassifier}
def weka_local_arff_to_weka_instances(input_dict): ''' Reads a dataset into a format suitable for WEKA methods ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() tmp = common.TemporaryFile(suffix='.arff') tmp.writeString(input_dict['arff']) try: class_index = int(input_dict['class_index']) except: class_index = None source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')( tmp.name) instances = source.getDataSet() if class_index is None: print 'Warning: class is set to the last attribute!' class_index = instances.numAttributes() - 1 elif class_index == -1: class_index = instances.numAttributes() - 1 instances.setClassIndex(class_index) return {'instances': common.serialize_weka_object(instances)}
def weka_local_arff_to_weka_instances(input_dict): ''' Reads a dataset into a format suitable for WEKA methods ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() tmp = common.TemporaryFile(suffix='.arff') tmp.writeString(input_dict['arff']) try: class_index = int(input_dict['class_index']) except: class_index = None source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')(tmp.name) instances = source.getDataSet() if class_index is None: print 'Warning: class is set to the last attribute!' class_index = instances.numAttributes() - 1 elif class_index == -1: class_index = instances.numAttributes() - 1 instances.setClassIndex(class_index) return {'instances': common.serialize_weka_object(instances)}
def weka_local_multilayer_perceptron(input_dict): '''Feedforward artificial neural network, using backpropagation to classify instances ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.functions.MultilayerPerceptron')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'Multilayer_Perceptron_learner': sclassifier}
def weka_local_smo(input_dict): '''A support vector classifier, trained using the Sequential Minimal Optimization (SMO) algorithm ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.functions.SMO')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'SMO_learner': sclassifier}
def weka_local_random_forest(input_dict): '''Random Forest learner by Weka ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.RandomForest')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'RandomForest_learner': sclassifier}
def weka_local_k_star(input_dict): '''Instance-Based learner K* by Weka ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.lazy.KStar')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'KStar_learner': sclassifier}
def weka_local_naive_bayes(input_dict): '''Naive Bayes classifier provided by Weka. Naive Bayes is a simple probabilistic classifier based on applying the Bayes' theorem. ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.bayes.NaiveBayes')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'Naive_Bayes_learner': sclassifier}
def weka_local_zeror(input_dict): '''Weka's rulesZeroR classifier: predicts the mean (for a numeric class) or the mode (for a nominal class). ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.rules.ZeroR')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'classifier': sclassifier}
def weka_local_jrip(input_dict): '''The RIPPER rule learner by Weka ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.rules.JRip')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'JRip_learner': sclassifier}
def weka_local_rep_tree(input_dict): '''A REP Tree, which is a fast decision tree learner. Builds a decision/regression tree using information gain/variance and prunes it using reduced-error pruning ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.REPTree')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'REPTree_learner': sclassifier}
def weka_local_j48(input_dict): '''Weka decision tree learner J48 ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.J48')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'J48_learner': sclassifier}
def weka_local_ibk(input_dict): '''K-nearest neighbours classifier by Weka ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.lazy.IBk')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'IBk_learner': sclassifier}
def weka_local_random_tree(input_dict): '''A tree that considers K randomly chosen attributes at each node, and performs no pruning ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.RandomTree')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'RandomTree_learner': sclassifier}
def k_star(params=None): '''Instance-Based learner K* by Weka :param params: parameters in textual form to pass to the KStar Weka class :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.lazy.KStar')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def random_forest(params=None): '''Random Forest learner by Weka :param params: parameters in textual form to pass to the RandomForest Weka class :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.RandomForest')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def rep_tree(params=None): '''A REP Tree, which is a fast decision tree learner. Builds a decision/regression tree using information gain/variance and prunes it using reduced-error pruning :param params: parameters in textual form to pass to the REPTree Weka class :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.REPTree')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def naive_bayes(params=None): '''Naive Bayes classifier provided by Weka. Naive Bayes is a simple probabilistic classifier based on applying the Bayes' theorem. :param params: parameters in textual form to pass to the NaiveBayes Weka class :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.bayes.NaiveBayes')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def rules_jrip(params=None): '''The RIPPER rule learner by Weka :param params: parameters in textual form to pass to the JRip Weka class :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.rules.JRip')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def rules_zeror(params=None): '''Weka's rulesZeroR classifier: predicts the mean (for a numeric class) or the mode (for a nominal class). :param params: parameters in textual form to pass to the rulesZeroR Weka class :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.rules.ZeroR')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def ibk(params=None): '''K-nearest neighbours classifier by Weka :param params: parameters in textual form to pass to the IBk Weka class :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.lazy.IBk')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def logistic(params=None): '''Logistic regression by Weka :param params: parameters in textual form to pass to the Logistic Weka class :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.functions.Logistic')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def smo(params=None): '''A support vector classifier, trained using the Sequential Minimal Optimization (SMO) algorithm :param params: parameters in textual form to pass to the SMO Weka class :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.functions.SMO')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def multilayer_perceptron(params=None): '''Feedforward artificial neural network, using backpropagation to classify instances :param params: parameters in textual form to pass to the MultilayerPerceptron Weka class :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.functions.MultilayerPerceptron')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def random_tree(params=None): '''A tree that considers K randomly chosen attributes at each node, and performs no pruning :param params: parameters in textual form to pass to the RandomTree Weka class :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.RandomTree')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def j48(params=None): '''Weka decision tree learner J48 :param params: parameters in textual form to pass to the J48 Weka class (e.g. "-C 0.25 -M 2") :return: a WekaClassifier object ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.J48')() model.setOptions(common.parse_options(params)) return WekaClassifier(common.serialize_weka_object(model))
def weka_local_build_classifier(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() instances = common.deserialize_weka_object(input_dict['instances']) classifier = common.deserialize_weka_object(input_dict['learner']) if instances.classIndex() == -1: instances.setClassIndex(instances.numAttributes() - 1) # raise ValueError('Class not set!') classifier.buildClassifier(instances) sclassifier = common.serialize_weka_object(classifier) return {'classifier': sclassifier}
def build_classifier(self, data): """Builds a classifier :param data: bunch """ if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() instances = ut.convert_bunch_to_weka_instances(data) classifier = common.deserialize_weka_object(self.sclassifier) if instances.classIndex() == -1: instances.setClassIndex(instances.numAttributes() - 1) # raise ValueError('Class not set!') classifier.buildClassifier(instances) self.sclassifier = common.serialize_weka_object(classifier)
def weka_local_apply_mapped_classifier_get_instances(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() MAPPING_REPORT_START = 'Attribute mappings:' classifier = common.deserialize_weka_object(input_dict['classifier']) original_training_instances = common.deserialize_weka_object( input_dict['original_training_instances']) instances = common.deserialize_weka_object(input_dict['instances']) # serialize classifier with original instances to a file once again for the Mapped classifier tfile = common.TemporaryFile(flags='wb+') s = jp.JClass('weka.core.SerializationHelper') s.writeAll(tfile.name, [classifier, original_training_instances]) # construct a MappedClassifier mappedClassifier = jp.JClass( 'weka.classifiers.misc.InputMappedClassifier')() mappedClassifier.setIgnoreCaseForNames(True) mappedClassifier.setTrim(True) #mappedClassifier.setSuppressMappingReport(True) #mc.setModelHeader(original_training_instances) mappedClassifier.setModelPath(tfile.name) # use the mapped classifier on new data classIndex = instances.classIndex() if classIndex == -1: raise ValueError('Class not set!') classAttribute = instances.classAttribute() for instance in instances: label = int(mappedClassifier.classifyInstance(instance)) instance.setClassValue(classAttribute.value(label)) report = mappedClassifier.toString() if MAPPING_REPORT_START in report: report = report[report.index(MAPPING_REPORT_START):] return { 'mapping_report': report, 'instances': common.serialize_weka_object(instances) }
def weka_local_apply_classifier_and_get_instances(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() # print("Instances: %s" % type(input_dict['instances'])) instances = common.deserialize_weka_object(input_dict['instances']) if instances.classIndex() == -1: instances.setClassIndex(instances.numAttributes() - 1) # last attribute is class classifier_serialized = input_dict['classifier'] try: classifier = common.deserialize_weka_object(classifier_serialized) classAttribute = instances.classAttribute() for instance in instances: label_ind = int(classifier.classifyInstance(instance)) instance.setClassValue(classAttribute.value(label_ind)) return {'instances': common.serialize_weka_object(instances)} except: raise Exception("Classifier not built. Please use the Build Classifier widget first.")
def weka_local_apply_mapped_classifier_get_instances(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() MAPPING_REPORT_START = 'Attribute mappings:' classifier = common.deserialize_weka_object(input_dict['classifier']) original_training_instances = common.deserialize_weka_object(input_dict['original_training_instances']) instances = common.deserialize_weka_object(input_dict['instances']) # serialize classifier with original instances to a file once again for the Mapped classifier tfile = common.TemporaryFile(flags='wb+') s = jp.JClass('weka.core.SerializationHelper') s.writeAll(tfile.name, [classifier, original_training_instances]) # construct a MappedClassifier mappedClassifier = jp.JClass('weka.classifiers.misc.InputMappedClassifier')() mappedClassifier.setIgnoreCaseForNames(True) mappedClassifier.setTrim(True) #mappedClassifier.setSuppressMappingReport(True) #mc.setModelHeader(original_training_instances) mappedClassifier.setModelPath(tfile.name) # use the mapped classifier on new data classIndex = instances.classIndex() if classIndex == -1: raise ValueError('Class not set!') classAttribute = instances.classAttribute() for instance in instances: label = int(mappedClassifier.classifyInstance(instance)) instance.setClassValue(classAttribute.value(label)) report = mappedClassifier.toString() if MAPPING_REPORT_START in report: report = report[report.index(MAPPING_REPORT_START):] return {'mapping_report':report, 'instances':common.serialize_weka_object(instances)}
def weka_local_apply_classifier_and_get_instances(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() # print("Instances: %s" % type(input_dict['instances'])) instances = common.deserialize_weka_object(input_dict['instances']) if instances.classIndex() == -1: instances.setClassIndex(instances.numAttributes() - 1) # last attribute is class classifier_serialized = input_dict['classifier'] try: classifier = common.deserialize_weka_object(classifier_serialized) classAttribute = instances.classAttribute() for instance in instances: label_ind = int(classifier.classifyInstance(instance)) instance.setClassValue(classAttribute.value(label_ind)) return {'instances': common.serialize_weka_object(instances)} except: raise Exception( "Classifier not built. Please use the Build Classifier widget first." )