def main(): """ Runs a associator from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Executes an associator from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-t", metavar="train", dest="train", required=True, help="training set file") parser.add_argument("associator", help="associator classname, e.g., weka.associations.Apriori") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional associator options") parsed = parser.parse_args() jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: associator = Associator(classname=parsed.associator) if len(parsed.option) > 0: associator.options = parsed.option loader = converters.loader_for_file(parsed.train) data = loader.load_file(parsed.train) associator.build_associations(data) print(str(associator)) except Exception as e: print(e) finally: jvm.stop()
def main(): """ Runs a datagenerator from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Executes a data generator from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("datagenerator", help="data generator classname, e.g., " + "weka.datagenerators.classifiers.classification.LED24") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional data generator options") parsed = parser.parse_args() jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: generator = DataGenerator(classname=parsed.datagenerator) if len(parsed.option) > 0: generator.options = parsed.option DataGenerator.make_data(generator, parsed.option) except Exception as e: print(e) finally: jvm.stop()
def affective_vectorizer(tweets, filename): ''' Vectorizes the tweets and saves the vectors as csv. :param tweets: list of tweets :param filename: name of the saved file ''' jvm.start(packages=True) install_package('AffectiveTweets') data = dataset.create_instances_from_lists([[t] for t in tweets]) filter = Filter( classname= 'weka.filters.unsupervised.attribute.TweetToLexiconFeatureVector', options=[ '-F', '-D', '-R', '-A', '-T', '-L', '-N', '-P', '-J', '-H', '-Q', '-stemmer', 'weka.core.stemmers.NullStemmer', '-stopwords-handler', 'weka.core.tokenizers.TweetNLPTokenizer', '-I', '1', '-U', '-tokenizer', 'weka.core.tokenizers.TweetNLPTokenizer' ]) filter.inputformat(data) filtered_data = filter.filter(data) converters.save_any_file(filtered_data, 'data/affect-vectors/' + filename) jvm.stop()
def start_search(file_name, type): start = time.clock() # time Preprocessing tile_set, characteristic, nmrClass = read_file(file_name) calculate_char_heuristic(tile_set, characteristic) # do before you add the place holder tiles tile_set = generate_placeholders(tile_set, characteristic, nmrClass) # gets place holder tiles # kill jvm that was started after calling read file # the jvm is used for the machine learning filtering # so that weka can be run jvm.stop() calculate_order_heuristic(tile_set) # add up heuristic from all tiles and make starting node heuristic_val = 0 for tile in tile_set: heuristic_val += tile.heuristic_cost # print tile.heuristic_order_cost, # print tile.get_tile() root = Node(tile_set, [], heuristic_val, characteristic,0,0, heuristic=heuristic_val) # makes start state for search end = time.clock() # time Preprocessing print "Preprocessing Time: " + str(end-start) # picks algorithm if (int(type) == 0): # uniform cost search best_solution, node_count = aStar([root]) output_soultion(best_solution, node_count) elif (int(type) == 1): # puzzle building best_solution = puzzle_building_search([root])
def PredecirUnaTemporada(path): jvm.start() insta = CrearInstanciaParaPredecir(path) atributos = "" file = open('ModelData/wekaHeader.arff', 'r') atributos = file.readlines() file.close() file = open('ModelData/predictionFiles/inst.arff', 'w') file.writelines(atributos) file.write("\n" + insta + '\n') file.close() objects = serialization.read_all("ModelData/77PercentModelPaisajes.model") classifier = Classifier(jobject=objects[0]) loader = Loader() data = loader.load_file("ModelData/predictionFiles/inst.arff") data.class_is_last() clases = ["invierno", "verano", "otono", "primavera"] prediccion = "" for index, inst in enumerate(data): pred = classifier.classify_instance(inst) dist = classifier.distribution_for_instance(inst) prediccion = clases[int(pred)] jvm.stop() return prediccion
def run(): jvm.start() load_csv = Loader("weka.core.converters.CSVLoader") data_csv = load_csv.load_file( "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.csv" ) saver = Saver("weka.core.converters.ArffSaver") saver.save_file( data_csv, "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.arff" ) load_arff = Loader("weka.core.converters.ArffLoader") data_arff = load_arff.load_file( "/Users/imeiliasantoso/web_graduate_project4/predict_page/predict_data.arff" ) data_arff.class_is_last() global j48 J48_class = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"]) J48_class.build_classifier(data_arff) evaluationj48 = Evaluation(data_arff) evaluationj48.crossvalidate_model(J48_class, data_arff, 10, Random(100)) j48 = str(evaluationj48.percent_correct) jvm.stop() return j48
def main(): data_provider = DataProvider(test_size=0.2) jvm.start() trainig_data = data_provider.get_weka_training_data() # for weka try: weka_bayes_network = WekaBayesNetwork(trainig_data['labels'], ALL_ALGORITHMS['k2']) weka_bayes_network.train_bayes(trainig_data['train_set']) weka_bayes_network.draw_graph() results = weka_bayes_network.predict_and_compare( trainig_data['test_set']) # trainig_data = data_provider.get_training_data() # for simple # chow_liu_bayes_network = SimpleBayesNetwork(trainig_data['feature_names'], ALL_ALGORITHMS['chowLiu']) # chow_liu_bayes_network.train_bayes(trainig_data['x_train'], trainig_data['y_train']) # chow_liu_bayes_network.draw_graph() # results = chow_liu_bayes_network.predict_and_compare(trainig_data['x_test'], trainig_data['y_test']) print('correct_recurrences:', results['correct_recurrences']) print('correct_no_recurrences:', results['correct_no_recurrences']) print('incorrect_recurrences:', results['incorrect_recurrences']) print('incorrect_no_recurrences:', results['incorrect_no_recurrences']) except: print('Exception was caught!') tb = traceback.format_exc() print(tb) jvm.stop()
def main(): try: jvm.start() loader = Loader(classname="weka.core.converters.CSVLoader") data = loader.load_file("./data/adult.csv") data.class_is_last() # set class attribute # randomize data folds = k seed = 1 rnd = Random(seed) rand_data = Instances.copy_instances(data) rand_data.randomize(rnd) if rand_data.class_attribute.is_nominal: rand_data.stratify(folds) NaiveBayes(rand_data, folds, seed, data) DecisionTree(rand_data, folds, seed, data) except Exception as e: raise e finally: jvm.stop()
def Feature_Selection(infile): directory = os.getcwd() + '/' csvpath = directory + infile jvm.start(packages=True, max_heap_size="4g") print "\n\n" print "Loaded file: ", infile csvloader = Loader(classname="weka.core.converters.CSVLoader") csvdata = csvloader.load_file(csvpath) remover = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", " 1"]) remover.inputformat(csvdata) filtered_data = remover.filter(csvdata) filtered_data.class_is_last() search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"]) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) attribs = AttributeSelection() attribs.search(search) attribs.evaluator(evaluator) attribs.select_attributes(filtered_data) print "Summary of Attribute Selection: " print attribs.results_string jvm.stop() return
def remove_package(pkg): if packages.is_installed(pkg): print("Removing %s..." % pkg) packages.uninstall_package(pkg) print("Removed %s, please re-run script!" % pkg) jvm.stop() sys.exit(0) print('No such package is installed')
def start_up_weka(path): try: jvm.start() uninstall_unofficial_packages(path) install_packages(path) finally: jvm.stop()
def main(): """ Runs a clusterer from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Performs clustering from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-t", metavar="train", dest="train", required=True, help="training set file") parser.add_argument("-T", metavar="test", dest="test", help="test set file") parser.add_argument("-d", metavar="outmodel", dest="outmodel", help="model output file name") parser.add_argument("-l", metavar="inmodel", dest="inmodel", help="model input file name") parser.add_argument("-p", metavar="attributes", dest="attributes", help="attribute range") parser.add_argument("-x", metavar="num folds", dest="numfolds", help="number of folds") parser.add_argument("-s", metavar="seed", dest="seed", help="seed value for randomization") parser.add_argument("-c", metavar="class index", dest="classindex", help="1-based class attribute index") parser.add_argument("-g", metavar="graph", dest="graph", help="graph output file (if supported)") parser.add_argument("clusterer", help="clusterer classname, e.g., weka.clusterers.SimpleKMeans") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional clusterer options") parsed = parser.parse_args() jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) params = [] if parsed.train is not None: params.extend(["-t", parsed.train]) if parsed.test is not None: params.extend(["-T", parsed.test]) if parsed.outmodel is not None: params.extend(["-d", parsed.outmodel]) if parsed.inmodel is not None: params.extend(["-l", parsed.inmodel]) if parsed.attributes is not None: params.extend(["-p", parsed.attributes]) if parsed.numfolds is not None: params.extend(["-x", parsed.numfolds]) if parsed.seed is not None: params.extend(["-s", parsed.seed]) if parsed.classindex is not None: params.extend(["-c", parsed.classindex]) if parsed.graph is not None: params.extend(["-g", parsed.graph]) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: clusterer = Clusterer(classname=parsed.clusterer) if len(parsed.option) > 0: clusterer.options = parsed.option print(ClusterEvaluation.evaluate_clusterer(clusterer, params)) except Exception as e: print(e) finally: jvm.stop()
def stop(): """ Stop a weka connection. May be called multiple times, but note that a new connection cannot be started after calling this. """ if MODULE_SUPPORTED: jvm.stop()
def install_package(pkg): # install weka package if necessary if not packages.is_installed(pkg): print("Installing %s..." % pkg) packages.install_package(pkg) print("Installed %s, please re-run script!" % pkg) jvm.stop() sys.exit(0) print('Package already installed.')
def metadata_path_start(now, args, datasets_names, queue=None): """ Creates a metadata path for this run. :param now: current time, as a datetime.datetime object. :param args: args object, as generated by argparse library :param datasets_names: names of datasets, as a list :param queue: queue of processes """ jvm.start() str_time = now.strftime('%d-%m-%Y-%H-%M-%S') joined = os.getcwd() if not os.path.isabs(args.metadata_path) else '' to_process = [args.metadata_path, str_time] for path in to_process: joined = os.path.join(joined, path) if not os.path.exists(joined): os.mkdir(joined) with open(os.path.join(joined, 'parameters.json'), 'w') as f: json.dump({k: getattr(args, k) for k in args.__dict__}, f, indent=2) these_paths = [] for dataset_name in datasets_names: local_joined = os.path.join(joined, dataset_name) these_paths += [local_joined] if not os.path.exists(local_joined): os.mkdir(local_joined) os.mkdir(os.path.join(local_joined, 'overall')) y_tests = [] class_name = None for n_fold in range(1, 11): train_data, test_data = read_datasets( os.path.join(args.datasets_path, dataset_name), n_fold) y_tests += [test_data.values(test_data.class_attribute.index)] class_name = train_data.class_attribute.name # concatenates array of y's pd.DataFrame(np.concatenate(y_tests), columns=[class_name ]).to_csv(os.path.join(local_joined, 'overall', 'y_test.txt'), index=False) jvm.stop() if queue is not None: queue.put(these_paths) return joined
def run_multi_subset_experiment(alg_names, ds_names): try: jvm.start() for alg_name in alg_names: run_subset_experiment(alg_name, ds_names) except Exception as e: print(traceback.format_exc()) finally: jvm.stop()
def quit(self): """Ask if user want to quit. If yes, close the GUI. """ if tkMessageBox.askokcancel("Quit", "Do you want to quit?"): try: import weka.core.jvm as jvm jvm.stop() except: pass self._root.quit() self._root.destroy()
def main(argv): global input global outputModel global inputModel global outputPrediction global isTest global my_list input = " " outputModel = " " inputModel = " " outputPrediction = " " counter = 0 isTest = 0 my_list = [] try: opts, args = getopt.getopt(argv, "hi:o:m:", ["ifile=", "ofile=", "mfile="]) print len(opts) except getopt.GetoptError: sys.exit(2) if len(opts) == 3: isTest = 1 else: isTest = 0 for opt, arg in opts: print arg if len(opts) == 3: if opt == "-i": input = arg elif opt == "-o": outputPrediction = arg elif opt == "-m": inputModel = arg else: if opt == "-i": input = arg elif opt == "-o": outputModel = arg if isTest == 0: import weka.core.jvm as jvm jvm.start() excractFeatures() if isTest == 1: import weka.core.jvm as jvm jvm.start() excractFeatures() jvm.stop() sys.exit()
def generate_folds(dataset_path, output_folder, n_folds=10, random_state=None): """ Given a dataset df, generate n_folds for it and store them in <output_folder>/<dataset_name>. :type dataset_path: str :param dataset_path: Path to dataset with .arff file extension (i.e my_dataset.arff) :type output_folder: str :param output_folder: Path to store both index file with folds and fold files. :type n_folds: int :param n_folds: Optional - Number of folds to split the dataset into. Defaults to 10. :type random_state: int :param random_state: Optional - Seed to use in the splitting process. Defaults to None (no seed). """ import warnings warnings.filterwarnings('error') dataset_name = dataset_path.split('/')[-1].split('.')[0] af = load_arff(dataset_path) df = load_dataframe(af) skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state) fold_iter = skf.split(df[df.columns[:-1]], df[df.columns[-1]]) fold_index = dict() jvm.start() csv_loader = Loader(classname="weka.core.converters.CSVLoader") arff_saver = Saver(classname='weka.core.converters.ArffSaver') for i, (arg_rest, arg_test) in enumerate(fold_iter): fold_index[i] = list(arg_test) _temp_path = 'temp_%s_%d.csv' % (dataset_name, i) fold_data = df.loc[arg_test] # type: pd.DataFrame fold_data.to_csv(_temp_path, sep=',', index=False) java_arff_dataset = csv_loader.load_file(_temp_path) java_arff_dataset.relationname = af['relation'] java_arff_dataset.class_is_last() arff_saver.save_file(java_arff_dataset, os.path.join(output_folder, '%s_fold_%d.arff' % (dataset_name, i))) os.remove(_temp_path) json.dump( fold_index, open(os.path.join(output_folder, dataset_name + '.json'), 'w'), indent=2 ) jvm.stop() warnings.filterwarnings('default')
def start_search(file_name, type): tile_set, characteristic, nmrClass = read_file(file_name) # gets data from file tile_set = generate_placeholders(tile_set, characteristic, nmrClass) # gets place holder tiles jvm.stop() root = Node(tile_set, [], 0.0, characteristic,0,0) # makes start state for search # picks algorithm if (int(type) == 0): # uniform cost search best_solution, node_count = uniform_cost([root]) output_soultion(best_solution, node_count) elif (int(type) == 1): # puzzle building best_solution = puzzle_building_search([root])
def evaluate(request, data): decoded_data = unquote(data) if data else [] parsed_keys = parse_qs(decoded_data) parsed_string = parsed_keys['keywords'][0] filename = parsed_keys['filename'][0] quantity = int(parsed_keys['quantity'][0]) startdate = parsed_keys['startdate'][0] enddate = parsed_keys['enddate'][0] language = parsed_keys['language'][0] file_location = get_file_destinantion(filename) if not path.exists(file_location): return render(request, 'model_not_found.html') keywords_parsed = parsed_string.split(',') query = ' OR '.join(keywords_parsed) tweets = [] jvm.start() model = load_classification_model(filename) tweetCriteria = manager\ .TweetCriteria()\ .setQuerySearch(query)\ .setSince(startdate)\ .setUntil(enddate)\ .setLang(language)\ .setMaxTweets(quantity) tweets = manager.TweetManager.getTweets(tweetCriteria) cleaned_tweets = get_cleaned_tweets(tweets) dataset = create_dataset(cleaned_tweets) predictions = evaluate_model_and_testset(model, dataset) jvm.stop() labels = model['classes'] values = get_value_of_classes(model['classes'], predictions) fill_color = generate_random_colors(model['classes']) predictions_colors = get_predictions_colors(predictions, model['classes'], fill_color) data = { "keywords": keywords_parsed if keywords_parsed else [], "tweets": zip(tweets, cleaned_tweets, predictions, predictions_colors), "size": len(tweets), "labels": labels, "values": values, "fill_color": fill_color, } return render(request, 'evaluator.html', { "data": data })
def assign_classify(file_location, output="classified.out", model="naivebayes.model"): data = read_csv_file(file_location) jvm.start() # load clusters obj = serialization.read(model) classifier = Classifier(jobject=obj) # create file with cluster group with open(output, 'w') as cluster_file: for index, attrs in enumerate(data): inst = Instance.create_instance(attrs[1:]) pred = classifier.classify_instance(inst) print(str(index + 1) + ": label index=" + str(pred)) jvm.stop()
def predict(attributes): jvm.start() file_path = print_to_file(attributes) # load the saved model objects = serialization.read_all("/Users/hosyvietanh/Desktop/data_mining/trained_model.model") classifier = Classifier(jobject=objects[0]) loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(file_path) data.class_is_last() for index, inst in enumerate(data): pred = classifier.classify_instance(inst) dist = classifier.distribution_for_instance(inst) return int(pred) jvm.stop()
def playback_speed_checker(inputFile, dirRef): TRAINING_ARFF = 'dataset_playback.arff' inputRef = "" # Start JVM jvm.start() jvm.start(system_cp=True, packages=True) jvm.start(max_heap_size="512m") # Find reference file for file in os.listdir(dirRef): if str(file).find(str(os.path.basename(inputFile))) != -1: inputRef = os.path.join(dirRef, file) break # Calculation distance (result, distance) = dtw_checker(inputFile, inputRef) # Loading data loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(TRAINING_ARFF) data.class_is_last() # set class attribute # Train the classifier #cls = Classifier(classname="weka.classifiers.functions.SMO") cls = Classifier(classname="weka.classifiers.trees.J48", options = ["-C", "0.3", "-M", "10"]) cls.build_classifier(data) # Classify instance speed_instance = Instance.create_instance(numpy.ndarray(distance), classname='weka.core.DenseInstance', weight=1.0) speed_instance.dataset = data # Classify instance speed_flag = cls.classify_instance(speed_instance) if (distance == 0): speed_class = 'nominal' else: if speed_flag == 0: speed_class = 'down_speed' if speed_flag == 0: speed_class = 'up_speed' # print os.path.basename(inputFile) + ' --- ' + speed_class # Stop JVM jvm.stop() print "SPEED IS: " + speed_class return speed_class
def start_search(file_name, type): tile_set, characteristic, nmrClass = read_file( file_name) # gets data from file tile_set = generate_placeholders(tile_set, characteristic, nmrClass) # gets place holder tiles jvm.stop() root = Node(tile_set, [], 0.0, characteristic, 0, 0) # makes start state for search # picks algorithm if (int(type) == 0): # uniform cost search best_solution, node_count = uniform_cost([root]) output_soultion(best_solution, node_count) elif (int(type) == 1): # puzzle building best_solution = puzzle_building_search([root])
def query_instance(attributes, model="out.model"): """ get the cluster for defined attributes :params attributes: array or list :returns: cluster id """ jvm.start() # create instance inst = Instance(attributes) # load model obj = serialization.read(model) # load cluster and get the cluster_id cluster = Clusterer(jobject=obj) cluster_id = cluster.cluster_instance(inst) jvm.stop() return cluster_id
def main(): """ Runs attribute selection from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Performs attribute selection from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-i", metavar="input", dest="input", required=True, help="input file") parser.add_argument("-c", metavar="class index", dest="classindex", help="1-based class attribute index") parser.add_argument("-s", metavar="search", dest="search", help="search method, classname and options") parser.add_argument("-x", metavar="num folds", dest="numfolds", help="number of folds") parser.add_argument("-n", metavar="seed", dest="seed", help="the seed value for randomization") parser.add_argument("evaluator", help="evaluator classname, e.g., weka.attributeSelection.CfsSubsetEval") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional evaluator options") parsed = parser.parse_args() jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) params = [] if parsed.input is not None: params.extend(["-i", parsed.input]) if parsed.classindex is not None: params.extend(["-c", parsed.classindex]) if parsed.search is not None: params.extend(["-s", parsed.search]) if parsed.numfolds is not None: params.extend(["-x", parsed.numfolds]) if parsed.seed is not None: params.extend(["-n", parsed.seed]) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: evaluation = ASEvaluation(classname=parsed.evaluator) if len(parsed.option) > 0: evaluation.options = parsed.option print(AttributeSelection.attribute_selection(evaluation, params)) except Exception as e: print(e) finally: jvm.stop()
def dict2arff(self, fileIn, fileOut): ''' :param fileIn: name of csv file :param fileOut: name of new arff file :return: ''' dataIn = os.path.join(self.dataDir, fileIn) dataOut = os.path.join(self.dataDir, fileOut) logger.info('[%s] : [INFO] Starting conversion of %s to %s', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), dataIn, dataOut) try: jvm.start() convertCsvtoArff(dataIn, dataOut) except Exception as inst: pass finally: logger.error('[%s] : [ERROR] Exception occured while converting to arff with %s and %s', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), type(inst), inst.args) jvm.stop() logger.info('[%s] : [INFO] Finished conversion of %s to %s', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), dataIn, dataOut)
def riaa_checker(inputFile): TRAINING_ARFF = 'C:\Users\ASUS\Desktop\IGNASI\SMC\Workspace\dataset_riaa.arff' # Start JVM jvm.start() jvm.start(system_cp=True, packages=True) jvm.start(max_heap_size="512m") # Calculation of bark bands information (absolute_bark, relative_bark, bark_ratios) = compute_bark_spectrum(inputFile) # Loading data loader = Loader(classname="weka.core.converters.ArffLoader") data = loader.load_file(TRAINING_ARFF) data.class_is_last() # set class attribute # Train the classifier cls = Classifier(classname="weka.classifiers.functions.SMO") #cls = Classifier(classname="weka.classifiers.trees.J48", options = ["-C", "0.3", "-M", "10"]) cls.build_classifier(data) # Classify instance bark_instance = Instance.create_instance(bark_ratios, classname='weka.core.DenseInstance', weight=1.0) bark_instance.dataset = data # Classify instance riaa_flag = cls.classify_instance(bark_instance) if riaa_flag == 0: riaa_class = 'riaa_ok' else: riaa_class = 'riaa_ko' # print os.path.basename(inputFile) + ' --- ' + riaa_class # Stop JVM jvm.stop() print "RIAA FILTERING?: " + riaa_class return riaa_class
def batch_riaa_checking(inputDir): # Start JVM jvm.start() jvm.start(system_cp=True, packages=True) jvm.start(max_heap_size="512m") riaa_ok = 0 riaa_ko = 0 for file in os.listdir(inputDir): if file.endswith(".wav"): riaa_flag = riaa_checker(os.path.join(inputDir, file)) if (riaa_flag == 'riaa_ko'): riaa_ko+=1 if (riaa_flag == 'riaa_ok'): riaa_ok+=1 # Stop JVM jvm.stop() return (riaa_ko, riaa_ok)
def classify(train, test, name="RF", tuning=False): jvm.start() if isinstance(train, list) and isinstance(test, list): train = weka_instance(train) trn_data = converters.load_any_file(train) test = weka_instance(test) tst_data = converters.load_any_file(test) elif os.path.isfile(train) and os.path.isfile(test): trn_data = converters.load_any_file(train) tst_data = converters.load_any_file(test) else: trn = csv_as_ndarray(train) tst = csv_as_ndarray(test) trn_data = converters.ndarray_to_instances(trn, relation="Train") tst_data = converters.ndarray_to_instances(tst, relation="Test") trn_data.class_is_last() tst_data.class_is_last() # t = time() if tuning: opt = tune(train) else: opt = default_opt # print("Time to tune: {} seconds".format(time() - t)) cls = Classifier(classname=classifiers[name.lower()], options=opt) cls.build_classifier(trn_data) distr = [cls.distribution_for_instance(inst)[1] for inst in tst_data] preds = [cls.classify_instance(inst) for inst in tst_data] jvm.stop() return preds, distr
import weka.core.jvm as jvm jvm.start() jvm.start(system_cp=True, packages=True) jvm.start(packages="/usr/local/lib/python2.7/dist-packages/weka") jvm.start(max_heap_size="512m") data_dir="CSDMC2010_SPAM/CSDMC2010_SPAM/TRAINING" from weka.classifiers import Classifier cls = Classifier(classname="weka.classifiers.trees.J48") cls.options = ["-C", "0.3"] print(cls.options) jvm.stop()
def evaluate_j48(datasets_path, intermediary_path): # for examples on how to use this function, refer to # http://pythonhosted.org/python-weka-wrapper/examples.html#build-classifier-on-dataset-output-predictions import weka.core.jvm as jvm from weka.core.converters import Loader from weka.classifiers import Classifier from sklearn.metrics import precision_score, accuracy_score, f1_score from networkx.drawing.nx_agraph import graphviz_layout jvm.start() json_results = { 'runs': { '1': dict() } } try: for dataset in os.listdir(datasets_path): dataset_name = dataset.split('.')[0] json_results['runs']['1'][dataset_name] = dict() loader = Loader(classname="weka.core.converters.ArffLoader") y_pred_all = [] y_true_all = [] heights = [] n_nodes = [] for n_fold in it.count(): try: train_s = loader.load_file( os.path.join(intermediary_path, '%s_fold_%d_train.arff' % (dataset_name, n_fold))) val_s = loader.load_file( os.path.join(intermediary_path, '%s_fold_%d_val.arff' % (dataset_name, n_fold))) test_s = loader.load_file( os.path.join(intermediary_path, '%s_fold_%d_test.arff' % (dataset_name, n_fold))) train_s.relationname = dataset_name val_s.relationname = dataset_name test_s.relationname = dataset_name train_s.class_is_last() val_s.class_is_last() test_s.class_is_last() warnings.warn('WARNING: appending validation set in training set.') for inst in val_s: train_s.add_instance(inst) cls = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25", "-M", "2"]) # cls = Classifier(classname="weka.classifiers.trees.REPTree", # options=["-M", "2", "-V", "0.001", "-N", "3", "-S", "1", "-L", "-1", "-I", "0.0"]) cls.build_classifier(train_s) warnings.warn('WARNING: will only work for binary splits!') graph = cls.graph.encode('ascii') out = StringIO.StringIO(graph) G = nx.Graph(nx.nx_pydot.read_dot(out)) # TODO plotting! # fig = plt.figure(figsize=(40, 30)) # pos = graphviz_layout(G, root='N0', prog='dot') # # edgelist = G.edges(data=True) # nodelist = G.nodes(data=True) # # edge_labels = {(x1, x2): v['label'] for x1, x2, v in edgelist} # node_colors = {node_id: ('#98FB98' if 'shape' in _dict else '#0099FF') for node_id, _dict in nodelist} # node_colors['N0'] = '#FFFFFF' # node_colors = node_colors.values() # # nx.draw_networkx_nodes(G, pos, node_color=node_colors) # nx.draw_networkx_edges(G, pos, style='dashed', arrows=False) # nx.draw_networkx_labels(G, pos, {k: v['label'] for k, v in G.node.iteritems()}) # nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels) # plt.axis('off') # plt.show() # exit(0) # TODO plotting! heights += [max(map(len, nx.shortest_path(G, source='N0').itervalues()))] n_nodes += [len(G.node)] y_test_true = [] y_test_pred = [] # y_train_true = [] # y_train_pred = [] # for index, inst in enumerate(train_s): # y_train_true += [inst.get_value(inst.class_index)] # y_train_pred += [cls.classify_instance(inst)] for index, inst in enumerate(test_s): y_test_true += [inst.get_value(inst.class_index)] y_test_pred += [cls.classify_instance(inst)] y_true_all += y_test_true y_pred_all += y_test_pred except Exception as e: break json_results['runs']['1'][dataset_name] = { 'confusion_matrix': confusion_matrix(y_true_all, y_pred_all).tolist(), 'height': heights, 'n_nodes': n_nodes, } # interprets json_results = json.load(open('/home/henry/Desktop/j48/j48_results.json', 'r')) n_runs = len(json_results['runs'].keys()) some_run = json_results['runs'].keys()[0] n_datasets = len(json_results['runs'][some_run].keys()) df = pd.DataFrame( columns=['run', 'dataset', 'test_acc', 'height mean', 'height std', 'n_nodes mean', 'n_nodes std'], index=np.arange(n_runs * n_datasets), dtype=np.float32 ) df['dataset'] = df['dataset'].astype(np.object) count_row = 0 for n_run, run in json_results['runs'].iteritems(): for dataset_name, dataset in run.iteritems(): conf_matrix = np.array(dataset['confusion_matrix'], dtype=np.float32) test_acc = np.diag(conf_matrix).sum() / conf_matrix.sum() height_mean = np.mean(dataset['height']) height_std = np.std(dataset['height']) n_nodes_mean = np.mean(dataset['n_nodes']) n_nodes_std = np.std(dataset['n_nodes']) df.loc[count_row] = [ int(n_run), str(dataset_name), float(test_acc), float(height_mean), float(height_std), float(n_nodes_mean), float(n_nodes_std) ] count_row += 1 print df json.dump(json_results, open('j48_results.json', 'w'), indent=2) df.to_csv('j48_results.csv', sep=',', quotechar='\"', index=False) finally: jvm.stop()
def runner(self, cdat, heap_size = 16384, seed = None, verbose = True): self.set_status(Pipeline.RUNNING) self.logs.append('Initializing Pipeline') para = self.config self.logs.append('Reading Pipeline Configuration') head = '' name = get_rand_uuid_str() self.logs.append('Reading Input File') for i, stage in enumerate(self.stages): if stage.code in ('dat.fle', 'prp.bgc', 'prp.nrm', 'prp.pmc', 'prp.sum'): self.stages[i].status = Pipeline.RUNNING if stage.code == 'dat.fle': head = os.path.abspath(stage.value.path) name, _ = os.path.splitext(stage.value.name) self.logs.append('Parsing to ARFF') path = os.path.join(head, '{name}.arff'.format(name = name)) # This bug, I don't know why, using Config.schema instead. # cdat.toARFF(path, express_config = para.Preprocess.schema, verbose = verbose) for i, stage in enumerate(self.stages): if stage.code in ('dat.fle', 'prp.bgc', 'prp.nrm', 'prp.pmc', 'prp.sum'): self.stages[i].status = Pipeline.COMPLETE self.logs.append('Saved ARFF at {path}'.format(path = path)) self.logs.append('Splitting to Training and Testing Sets') JVM.start(max_heap_size = '{size}m'.format(size = heap_size)) load = Loader(classname = 'weka.core.converters.ArffLoader') # data = load.load_file(path) # save = Saver(classname = 'weka.core.converters.ArffSaver') data = load.load_file(os.path.join(head, 'iris.arff')) # For Debugging Purposes Only data.class_is_last() # For Debugging Purposes Only # data.class_index = cdat.iclss for i, stage in enumerate(self.stages): if stage.code == 'prp.kcv': self.stages[i].status = Pipeline.RUNNING self.logs.append('Splitting Training Set') # TODO - Check if this seed is worth it. seed = assign_if_none(seed, random.randint(0, 1000)) opts = ['-S', str(seed), '-N', str(para.Preprocess.FOLDS)] wobj = Filter(classname = 'weka.filters.supervised.instance.StratifiedRemoveFolds', options = opts + ['-V']) wobj.inputformat(data) tran = wobj.filter(data) self.logs.append('Splitting Testing Set') wobj.options = opts test = wobj.filter(data) for i, stage in enumerate(self.stages): if stage.code == 'prp.kcv': self.stages[i].status = Pipeline.COMPLETE self.logs.append('Performing Feature Selection') feat = [ ] for comb in para.FEATURE_SELECTION: if comb.USE: for i, stage in enumerate(self.stages): if stage.code == 'ats': search = stage.value.search.name evaluator = stage.value.evaluator.name if search == comb.Search.NAME and evaluator == comb.Evaluator.NAME: self.stages[i].status = Pipeline.RUNNING srch = ASSearch(classname = 'weka.attributeSelection.{classname}'.format( classname = comb.Search.NAME, options = assign_if_none(comb.Search.OPTIONS, [ ]) )) ewal = ASEvaluation(classname = 'weka.attributeSelection.{classname}'.format( classname = comb.Evaluator.NAME, options = assign_if_none(comb.Evaluator.OPTIONS, [ ]) )) attr = AttributeSelection() attr.search(srch) attr.evaluator(ewal) attr.select_attributes(tran) meta = addict.Dict() meta.search = comb.Search.NAME meta.evaluator = comb.Evaluator.NAME meta.features = [tran.attribute(index).name for index in attr.selected_attributes] feat.append(meta) for i, stage in enumerate(self.stages): if stage.code == 'ats': search = stage.value.search.name evaluator = stage.value.evaluator.name if search == comb.Search.NAME and evaluator == comb.Evaluator.NAME: self.stages[i].status = Pipeline.COMPLETE models = [ ] for model in para.MODEL: if model.USE: summary = addict.Dict() self.logs.append('Modelling {model}'.format(model = model.LABEL)) summary.label = model.LABEL summary.name = model.NAME summary.options = assign_if_none(model.OPTIONS, [ ]) for i, stage in enumerate(self.stages): if stage.code == 'lrn' and stage.value.name == model.NAME: self.stages[i].status = Pipeline.RUNNING for i, instance in enumerate(data): iclass = list(range(instance.num_classes)) options = assign_if_none(model.OPTIONS, [ ]) classifier = Classifier(classname = 'weka.classifiers.{classname}'.format(classname = model.NAME), options = options) classifier.build_classifier(tran) serializer.write(os.path.join(head, '{name}.{classname}.model'.format( name = name, classname = model.NAME )), classifier) self.logs.append('Testing model {model}'.format(model = model.LABEL)) evaluation = Evaluation(tran) evaluation.test_model(classifier, test) summary.summary = evaluation.summary() frame = pd.DataFrame(data = evaluation.confusion_matrix) axes = sns.heatmap(frame, cbar = False, annot = True) b64str = get_b64_plot(axes) summary.confusion_matrix = addict.Dict({ 'value': evaluation.confusion_matrix.tolist(), 'plot': b64str }) self.logs.append('Plotting Learning Curve for {model}'.format(model = model.LABEL)) buffer = io.BytesIO() plot_classifier_errors(evaluation.predictions, tran, test, outfile = buffer, wait = False) b64str = buffer_to_b64(buffer) summary.learning_curve = b64str buffer = io.BytesIO() plot_roc(evaluation, class_index = iclass, outfile = buffer, wait = False) b64str = buffer_to_b64(buffer) summary.roc_curve = b64str buffer = io.BytesIO() plot_prc(evaluation, class_index = iclass, outfile = buffer, wait = False) b64str = buffer_to_b64(buffer) summary.prc_curve = b64str if classifier.graph: summary.graph = classifier.graph for i, instance in enumerate(test): prediction = classifier.classify_instance(instance) for i, stage in enumerate(self.stages): if stage.code == 'lrn' and stage.value.name == model.NAME: self.stages[i].status = Pipeline.COMPLETE models.append(summary) self.gist.models = models JVM.stop() JSON.write(os.path.join(head, '{name}.cgist'.format(name = name)), self.gist) self.logs.append('Pipeline Complete') self.set_status(Pipeline.COMPLETE)
def stop(self): jvm.stop()
def __del__(self): #stop JVM print "jvm stop" jvm.stop()
def main(): """ Specify list of files to multi_file_curve, classify, and export results as csv. """ try: # start up a JVM to run weka on jvm.start(max_heap_size='512m') # classifiers naive_bayes = Classifier(classname='weka.classifiers.bayes.NaiveBayes') zero_r = Classifier(classname='weka.classifiers.rules.ZeroR') bayes_net = Classifier(classname='weka.classifiers.bayes.BayesNet', options=['-D', '-Q', 'weka.classifiers.bayes.net.search.local.K2', '--', '-P', '1', '-S', 'BAYES', '-E', 'weka.classifiers.bayes.net.estimate.SimpleEstimator', '--', '-A', '0.5']) d_tree = Classifier(classname='weka.classifiers.trees.J48', options=['-C', '0.25', '-M', '2']) file_list = [ 'data/aggregated_data.csv' ] name_list = [ 'multi-class' ] # classify and export percent_range = range(1, 101, 1) zero_r_curves = multi_file_curve(classifier=zero_r, classifier_name='zero_r', name_list=name_list, in_file_list=file_list, percentages=percent_range) naive_bayes_curves = multi_file_curve(classifier=naive_bayes, classifier_name='naive_bayes', name_list=name_list, in_file_list=file_list, percentages=percent_range) bayes_net_curves = multi_file_curve(classifier=bayes_net, classifier_name='bayes_net', name_list=name_list, in_file_list=file_list, percentages=percent_range) d_tree_curves = multi_file_curve(classifier=d_tree, classifier_name='d_tree', name_list=name_list, in_file_list=file_list, percentages=percent_range) # export csv_header = [ 'approach', 'classifier', 'percentage_dataset_training', 'accuracy', 'f_measure' ] with open('analysis/learning_curves.csv', 'wb') as f: csv_writer = csv.writer(f, delimiter=',') csv_writer.writerow(csv_header) for r in zero_r_curves: csv_writer.writerow(r) for r in naive_bayes_curves: csv_writer.writerow(r) for r in bayes_net_curves: csv_writer.writerow(r) for r in d_tree_curves: csv_writer.writerow(r) except RuntimeError: typ, value, tb = sys.exc_info() print typ print value print tb traceback.print_exc() pdb.post_mortem(tb) finally: jvm.stop()
def run_classifier(path, prot, sel, cols, prot_vals, beta): DIs = dict() jvm.start() for i in range(len(cols)-1): loader = Loader(classname="weka.core.converters.CSVLoader") data = loader.load_file(path) # remove selected attribute from the data # NOTE: options are ONE indexed, not ZERO indexed remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \ options=["-R", str(sel[2]+1)]) remove.inputformat(data) data = remove.filter(data) # if running for only one attribue, remove all others (except protected) if i > 0: for j in range(1, prot[2]+1): if i != j: remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", \ options=["-R", ("1" if i>j else "2")]) remove.inputformat(data) data = remove.filter(data) # set prot attribute as Class attribute data.class_is_last() # run classifier cls = Classifier(classname="weka.classifiers.bayes.NaiveBayes") cls.build_classifier(data) # count the number of each combination pos_and_pred = float(0.0) pos_and_not_pred = float(0.0) neg_and_pred = float(0.0) neg_and_not_pred = float(0.0) for ind, inst in enumerate(data): if cls.classify_instance(inst): if prot_vals[ind] == prot[1]: pos_and_pred += 1 else: neg_and_pred += 1 else: if prot_vals[ind] == prot[1]: pos_and_not_pred += 1 else: neg_and_not_pred += 1 # calculate DI BER = ((pos_and_not_pred / (pos_and_pred + pos_and_not_pred)) + \ (neg_and_pred / (neg_and_pred + neg_and_not_pred))) * 0.5 if BER > 0.5: BER = 1 - BER DI = 1 - ((1 - 2 * BER) / (beta + 1 - 2 * BER)) if i == 0: # consider changing this to a 'code word' instead of 'all' DIs["all"] = DI else: DIs[cols[i-1]] = DI jvm.stop() return DIs
def main(): """ Runs a filter from the command-line. Calls JVM start/stop automatically. Use -h to see all options. """ parser = argparse.ArgumentParser( description='Executes a filter from the command-line. Calls JVM start/stop automatically.') parser.add_argument("-j", metavar="classpath", dest="classpath", help="additional classpath, jars/directories") parser.add_argument("-X", metavar="heap", dest="heap", help="max heap size for jvm, e.g., 512m") parser.add_argument("-i", metavar="input1", dest="input1", required=True, help="input file 1") parser.add_argument("-o", metavar="output1", dest="output1", required=True, help="output file 1") parser.add_argument("-r", metavar="input2", dest="input2", help="input file 2") parser.add_argument("-s", metavar="output2", dest="output2", help="output file 2") parser.add_argument("-c", metavar="classindex", default="-1", dest="classindex", help="1-based class attribute index") parser.add_argument("filter", help="filter classname, e.g., weka.filters.AllFilter") parser.add_argument("option", nargs=argparse.REMAINDER, help="additional filter options") parsed = parser.parse_args() if parsed.input2 is None and parsed.output2 is not None: raise Exception("No second input file provided ('-r ...')!") jars = [] if parsed.classpath is not None: jars = parsed.classpath.split(os.pathsep) params = [] if parsed.input1 is not None: params.extend(["-i", parsed.input1]) if parsed.output1 is not None: params.extend(["-o", parsed.output1]) if parsed.input2 is not None: params.extend(["-r", parsed.input2]) if parsed.output2 is not None: params.extend(["-s", parsed.output2]) if parsed.classindex is not None: params.extend(["-c", parsed.classindex]) jvm.start(jars, max_heap_size=parsed.heap, packages=True) logger.debug("Commandline: " + join_options(sys.argv[1:])) try: flter = Filter(parsed.filter) if len(parsed.option) > 0: flter.options = parsed.option loader = Loader(classname="weka.core.converters.ArffLoader") in1 = loader.load_file(parsed.input1) cls = parsed.classindex if str(parsed.classindex) == "first": cls = "0" if str(parsed.classindex) == "last": cls = str(in1.num_attributes - 1) in1.class_index = int(cls) flter.inputformat(in1) out1 = flter.filter(in1) saver = Saver(classname="weka.core.converters.ArffSaver") saver.save_file(out1, parsed.output1) if parsed.input2 is not None: in2 = loader.load_file(parsed.input2) in2.class_index = int(cls) out2 = flter.filter(in2) saver.save_file(out2, parsed.output2) except Exception as e: print(e) finally: jvm.stop()
def testing(): logging.disable("weka") print "PROSES KLASIFIKASI\n------------------" jvm.start() pruning = 0 while pruning < 2: persen_train = 0 while persen_train < 4: fitur_hapus = 15 while fitur_hapus >= 0: list_akurasi = [] list_recall = [] list_presisi = [] list_fmeasure = [] list_roc = [] count = 0 nama = "hasilTest/" if(pruning == 0): nama += "unpruning" if(persen_train == 0): nama += "40" elif(persen_train == 1): nama += "50" elif(persen_train == 2): nama += "60" else: nama += "70" else: nama += "pruning" if(persen_train == 0): nama += "40" elif(persen_train == 1): nama += "50" elif(persen_train == 2): nama += "60" else: nama += "70" if(fitur_hapus > 0): nama += "removeF" + str(fitur_hapus) + ".txt" else: nama += "normal.txt" f = open(nama, "w") if(pruning == 0): nama = "unpruning" print "Tanpa Pruning" f.write("Hasil Decision Tree C4.5 tanpa Pruning (unpruning)\n") if(persen_train == 0): nama += "40" f.write("Dengan Training Set sebesar 40%\n") elif(persen_train == 1): nama += "50" f.write("Dengan Training Set sebesar 50%\n") elif(persen_train == 2): nama += "60" f.write("Dengan Training Set sebesar 60%\n") else: nama += "70" f.write("Dengan Training Set sebesar 70%\n") else: nama = "pruning" print "Dengan Pruning" f.write("Hasil Decision Tree C4.5 Pruning\n") if(persen_train == 0): nama += "40" f.write("Dengan Training Set sebesar 40%\n") elif(persen_train == 1): nama += "50" f.write("Dengan Training Set sebesar 50%\n") elif(persen_train == 2): nama += "60" f.write("Dengan Training Set sebesar 60%\n") else: nama += "70" f.write("Dengan Training Set sebesar 70%\n") if(fitur_hapus > 0): f.write("Menggunakan remove pada fitur " + str(fitur_hapus) + "\n\n") else: f.write("\n") f.write("No. Akurasi Recall Presisi F-Measure ROC\n") if persen_train == 0: print "40% Data Training" elif persen_train == 1: print "50% Data Training" elif persen_train == 2: print "60% Data Training" else: print "70% Data Training" print "Fitur yang dihapus:", fitur_hapus print "\nNo.\tAkurasi\tRecall\tPresisi\tF-Measure\tROC" while count < 100: loader = Loader(classname = "weka.core.converters.ArffLoader") data = loader.load_file("hasil.arff") data.class_is_last() if(fitur_hapus > 0): remove = Filter(classname = "weka.filters.unsupervised.attribute.Remove", options = ["-R", str(fitur_hapus)]) remove.inputformat(data) data_baru = remove.filter(data) data_baru.class_is_last() else: data_baru = loader.load_file("hasil.arff") data_baru.class_is_last() filter = Filter(classname = "weka.filters.unsupervised.instance.Randomize", options = ["-S", str(int(time.time()))]) filter.inputformat(data_baru) data_random = filter.filter(data_baru) data_random.class_is_last() if(pruning == 0): classifier = Classifier(classname = "weka.classifiers.trees.J48", options = ["-U"]) else: classifier = Classifier(classname = "weka.classifiers.trees.J48", options = ["-C", "0.25"]) evaluation = Evaluation(data_random) if(persen_train == 0): evaluation.evaluate_train_test_split(classifier, data_random, percentage = 40) elif(persen_train == 1): evaluation.evaluate_train_test_split(classifier, data_random, percentage = 50) elif(persen_train == 2): evaluation.evaluate_train_test_split(classifier, data_random, percentage = 60) else: evaluation.evaluate_train_test_split(classifier, data_random, percentage = 70) f.write(str(count + 1) + str( ". " ) + str(evaluation.weighted_true_positive_rate) + str( " " ) + str(evaluation.weighted_recall) + str( " " ) + str(evaluation.weighted_precision) + str( " " ) + str(evaluation.weighted_f_measure) + str( " " ) + str(evaluation.weighted_area_under_roc) + "\n") print count + 1, evaluation.weighted_true_positive_rate, evaluation.weighted_recall, evaluation.weighted_precision, evaluation.weighted_f_measure, evaluation.weighted_area_under_roc list_akurasi.append(evaluation.weighted_true_positive_rate) list_recall.append(evaluation.weighted_recall) list_presisi.append(evaluation.weighted_precision) list_fmeasure.append(evaluation.weighted_f_measure) list_roc.append(evaluation.weighted_area_under_roc) count += 1 time.sleep(1) list_akurasi.sort() list_recall.sort() list_presisi.sort() list_fmeasure.sort() list_roc.sort() f.write( "" + "\n") f.write( "Rata-Rata" + "\n") f.write( "Akurasi:" + str(sum(list_akurasi) / 100.0) + "\n") f.write( "Recall:" + str(sum(list_recall) / 100.0) + "\n") f.write( "Presisi:" + str(sum(list_presisi) / 100.0) + "\n") f.write( "F-Measure:" + str(sum(list_fmeasure) / 100.0) + "\n") f.write( "ROC:" + str(sum(list_roc) / 100.0) + "\n") f.write( "" + "\n") f.write( "Max" + "\n") f.write( "Akurasi:" + str(list_akurasi[-1] ) + "\n") f.write( "Recall:" + str(list_recall[-1] ) + "\n") f.write( "Presisi:" + str(list_presisi[-1] ) + "\n") f.write( "F-Measure:" + str(list_fmeasure[-1] ) + "\n") f.write( "ROC:" + str(list_roc[-1] ) + "\n") f.write( "" + "\n") f.write( "Min" + "\n") f.write( "Akurasi:" + str(list_akurasi[0] ) + "\n") f.write( "Recall:" + str(list_recall[0] ) + "\n") f.write( "Presisi:" + str(list_presisi[0] ) + "\n") f.write( "F-Measure:" + str(list_fmeasure[0] ) + "\n") f.write( "ROC:" + str(list_roc[0] ) + "\n") f.write( "" + "\n") print "" print "Rata-Rata" print "Akurasi:", sum(list_akurasi) / 100.0 print "Recall:", sum(list_recall) / 100.0 print "Presisi:", sum(list_presisi) / 100.0 print "F-Measure:", sum(list_fmeasure) / 100.0 print "ROC:", sum(list_roc) / 100.0 print "" print "Max" print "Akurasi:", list_akurasi[-1] print "Recall:", list_recall[-1] print "Presisi:", list_presisi[-1] print "F-Measure:", list_fmeasure[-1] print "ROC:", list_roc[-1] print "" print "Min" print "Akurasi:", list_akurasi[0] print "Recall:", list_recall[0] print "Presisi:", list_presisi[0] print "F-Measure:", list_fmeasure[0] print "ROC:", list_roc[0] print "" f.close() fitur_hapus -= 1 persen_train += 1 pruning += 1 jvm.stop()