def main(): """ Just runs some example code. """ # load a dataset anneal_file = helper.get_data_dir() + os.sep + "anneal.arff" helper.print_info("Loading dataset: " + anneal_file) loader = Loader("weka.core.converters.ArffLoader") anneal_data = loader.load_file(anneal_file) anneal_data.class_is_last() # perform attribute selection helper.print_title("Attribute selection") search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"]) evaluation = ASEvaluation( classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) attsel = AttributeSelection() attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(anneal_data) print("# attributes: " + str(attsel.number_attributes_selected)) print("attributes (as numpy array): " + str(attsel.selected_attributes)) print("attributes (as list): " + str(list(attsel.selected_attributes))) print("result string:\n" + attsel.results_string) # perform ranking helper.print_title("Attribute ranking (2-fold CV)") search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-N", "-1"]) evaluation = ASEvaluation("weka.attributeSelection.InfoGainAttributeEval") attsel = AttributeSelection() attsel.ranking(True) attsel.folds(2) attsel.crossvalidation(True) attsel.seed(42) attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(anneal_data) print("ranked attributes:\n" + str(attsel.ranked_attributes)) print("result string:\n" + attsel.results_string)
def main(): """ Just runs some example code. """ # load a dataset anneal_file = helper.get_data_dir() + os.sep + "anneal.arff" helper.print_info("Loading dataset: " + anneal_file) loader = Loader("weka.core.converters.ArffLoader") anneal_data = loader.load_file(anneal_file) anneal_data.class_is_last() # perform attribute selection helper.print_title("Attribute selection") search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"]) evaluation = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) attsel = AttributeSelection() attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(anneal_data) print("# attributes: " + str(attsel.number_attributes_selected)) print("attributes: " + str(attsel.selected_attributes)) print("result string:\n" + attsel.results_string) # perform ranking helper.print_title("Attribute ranking (2-fold CV)") search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-N", "-1"]) evaluation = ASEvaluation("weka.attributeSelection.InfoGainAttributeEval") attsel = AttributeSelection() attsel.ranking(True) attsel.folds(2) attsel.crossvalidation(True) attsel.seed(42) attsel.search(search) attsel.evaluator(evaluation) attsel.select_attributes(anneal_data) print("ranked attributes:\n" + str(attsel.ranked_attributes)) print("result string:\n" + attsel.results_string)
""" data_dir = "\\\\egr-1l11qd2\\CLS_lab\\Junya Zhao\\Data driven model _paper [June 25_2018\\FeatureSelection\\EvlSearch\\" globbed_files = glob.glob(data_dir + "*.csv") for csv in globbed_files: data = converters.load_any_file(csv) data.class_is_last() search = ASSearch(classname="weka.attributeSelection.EvolutionarySearch", options=[ "-population-size", "200", "-generations", "500", "-crossover-probability", "0.6" ]) evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "E", "1"]) attsel = AttributeSelection() attsel.folds(10) attsel.crossvalidation(True) attsel.seed(1) attsel.search(search) attsel.evaluator(evaluator) attsel.select_attributes(data) evl = Evaluation(data) print("# attributes: " + str(attsel.number_attributes_selected)) print("attributes: " + str(attsel.selected_attributes)) print("result string:\n" + attsel.results_string) print(evl) # write the report for each file with open(f"{csv}._report.csv", "a") as outfile: outfile.write(attsel.results_string) #with open(f"{csv}._label.txt","a") as output: #output.write(str(attsel.selected_attributes))
evl, search = setup aseval = ASEvaluation(classname="weka.attributeSelection.WrapperSubsetEval", options=evl) assearch = ASSearch(classname="weka.attributeSelection.BestFirst", options=search) print("\n--> Attribute selection\n") print(aseval.to_commandline()) print(assearch.to_commandline()) attsel = AttributeSelection() attsel.evaluator(aseval) attsel.search(assearch) attsel.select_attributes(data) print(attsel.results_string) # cross-validation aseval = ASEvaluation(classname="weka.attributeSelection.WrapperSubsetEval", options=["-F", "10", "-B", "weka.classifiers.trees.J48"]) assearch = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "0", "-N", "5"]) print("\n--> Attribute selection (cross-validation)\n") print(aseval.to_commandline()) print(assearch.to_commandline()) attsel = AttributeSelection() attsel.evaluator(aseval) attsel.search(assearch) attsel.crossvalidation(True) attsel.select_attributes(data) print(attsel.results_string) jvm.stop()