Example #1
0
def try_params(n_instances, params, base, train, valid, test, istest):
    n_instances = int(round(n_instances))
    # print "n_instances:", n_instances
    pprint(params)

    L = list([])

    if params['outputDetailedInfo'] == True:
        L.append("-D")

    param_search = rk.get_params()

    search = rk.get_class(param_search)

    # search = ASSearch(classname="weka.attributeSelection.Ranker")
    evaluator = ASEvaluation(
        classname="weka.attributeSelection.CorrelationAttributeEval",
        options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
Example #2
0
def Feature_Selection(infile):
    directory = os.getcwd() + '/'
    csvpath = directory + infile

    jvm.start(packages=True, max_heap_size="4g")
    print "\n\n"
    print "Loaded file: ", infile
    csvloader = Loader(classname="weka.core.converters.CSVLoader")
    csvdata = csvloader.load_file(csvpath)

    remover = Filter(classname="weka.filters.unsupervised.attribute.Remove",
                     options=["-R", " 1"])
    remover.inputformat(csvdata)
    filtered_data = remover.filter(csvdata)
    filtered_data.class_is_last()

    search = ASSearch(classname="weka.attributeSelection.BestFirst",
                      options=["-D", "1", "-N", "5"])
    evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval",
                             options=["-P", "1", "-E", "1"])
    attribs = AttributeSelection()
    attribs.search(search)
    attribs.evaluator(evaluator)
    attribs.select_attributes(filtered_data)
    print "Summary of Attribute Selection: "
    print attribs.results_string
    jvm.stop()
    return
Example #3
0
def try_params(n_instances, params, base, train, valid, test, istest):
    n_instances = int(round(n_instances))
    pprint(params)

    L = list()
    if params['missing_merge'] == True:
        L.append("-M")


    if params['search'] == 'GreedyStepwise':
        param_search = gs.get_params()
        search = gs.get_search(param_search)
    elif params['search'] == 'BestFirst':
        param_search = bf.get_params()
        search = bf.get_search(param_search)
    elif params['search'] == 'Ranker':
        param_search = rk.get_params()
        search = rk.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection."+params['search'])
    evaluator = ASEvaluation(classname="weka.attributeSelection.GainRatioAttributeEval", options=L)

    clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
Example #4
0
def try_params(n_instances, params, base, train, valid, test, istest):
    n_instances = int(round(n_instances))
    pprint(params)

    L = list()

    if params['missingSeparate'] == True:
        L.append("-M")

    if params['locallyPredictive'] == False:
        L.append("-L")

    if params['search'] == 'GreedyStepwise':
        param_search = gs.get_params()
        search = gs.get_class(param_search)
    else:
        param_search = bf.get_params()
        search = bf.get_class(param_search)

    # search = ASSearch(classname="weka.attributeSelection."+params['search'])
    evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval",
                             options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("base", base.jobject)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
Example #5
0
def get_evaluator(params, base):
    pprint(params)

    L = list()

    if params['missingSeparate'] == True:
        L.append("-M")

    if params['locallyPredictive'] == False:
        L.append("-L")

    if params['search'] == 'GreedyStepwise':
        param_search = gs.get_params()
        search = gs.get_search(param_search)
    else:
        param_search = bf.get_params()
        search = bf.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection."+params['search'])
    evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval",
                             options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    return clf
Example #6
0
def get_evaluator(params, base):
    pprint(params)

    L = list([])

    if params['missingMerge'] == False:
        L.append("-M")

    if params['binarizeNumericAttributes'] == True:
        L.append("-B")

    param_search = rk.get_params()

    search = rk.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection.Ranker")
    evaluator = ASEvaluation(
        classname="weka.attributeSelection.InfoGainAttributeEval", options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    return clf
Example #7
0
def get_evaluator(params, base):

    pprint(params)

    L = list([])

    if params['outputDetailedInfo'] == True:
        L.append("-D")

    param_search = rk.get_params()

    search = rk.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection.Ranker")
    evaluator = ASEvaluation(
        classname="weka.attributeSelection.CorrelationAttributeEval",
        options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    return clf
Example #8
0
def get_evaluator(params, base):
    pprint(params)

    L = list([])

    if params['weightByDistance'] == True:
        L.append("-W")

    L.append("-M")
    L.append(str(params['sampleSize']))

    L.append("-K")
    L.append(str(params['numNeighbours']))

    L.append("-A")
    L.append(str(params['sigma']))

    param_search = rk.get_params()

    search = rk.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection.Ranker")
    evaluator = ASEvaluation(classname="weka.attributeSelection.ReliefFAttributeEval", options=L)

    clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    return clf
Example #9
0
def get_evaluator(params, base):
    pprint(params)

    L = list()

    if params['missing_merge'] == True:
        L.append("-M")

    # if params['search'] == 'GreedyStepwise':
    #     param_search = gs.get_params()
    #     search = gs.get_search(param_search)
    # elif params['search'] == 'BestFirst':
    #     param_search = bf.get_params()
    #     search = bf.get_search(param_search)
    # elif params['search'] == 'Ranker':
    param_search = rk.get_params()
    search = rk.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection."+params['search'])
    evaluator = ASEvaluation(
        classname="weka.attributeSelection.SymmetricalUncertAttributeEval",
        options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    return clf
Example #10
0
def try_params(n_instances, params, base, train, valid, test, istest):
    n_instances = int(round(n_instances))
    # print "n_instances:", n_instances
    pprint(params)

    L = list([])

    if params['missingMerge'] == False:
        L.append("-M")

    if params['binarizeNumericAttributes'] == True:
        L.append("-B")

    # print L

    search = ASSearch(classname="weka.attributeSelection.Ranker")
    evaluator = ASEvaluation(classname="weka.attributeSelection.InfoGainAttributeEval", options=L)

    clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
Example #11
0
    def evaluator(self):
        """
        Returns the evaluator.

        :return: the evaluator in use
        :rtype: ASEvaluation
        """
        return ASEvaluation(jobject=javabridge.call(self.jobject, "getEvaluator", "()Lweka/attributeSelection/ASEvaluation;"))
Example #12
0
def main():
    """
    Just runs some example code.
    """

    # load a dataset
    anneal_file = helper.get_data_dir() + os.sep + "anneal.arff"
    helper.print_info("Loading dataset: " + anneal_file)
    loader = Loader("weka.core.converters.ArffLoader")
    anneal_data = loader.load_file(anneal_file)
    anneal_data.class_is_last()

    # perform attribute selection
    helper.print_title("Attribute selection")
    search = ASSearch(classname="weka.attributeSelection.BestFirst",
                      options=["-D", "1", "-N", "5"])
    evaluation = ASEvaluation(
        classname="weka.attributeSelection.CfsSubsetEval",
        options=["-P", "1", "-E", "1"])
    attsel = AttributeSelection()
    attsel.search(search)
    attsel.evaluator(evaluation)
    attsel.select_attributes(anneal_data)
    print("# attributes: " + str(attsel.number_attributes_selected))
    print("attributes (as numpy array): " + str(attsel.selected_attributes))
    print("attributes (as list): " + str(list(attsel.selected_attributes)))
    print("result string:\n" + attsel.results_string)

    # perform ranking
    helper.print_title("Attribute ranking (2-fold CV)")
    search = ASSearch(classname="weka.attributeSelection.Ranker",
                      options=["-N", "-1"])
    evaluation = ASEvaluation("weka.attributeSelection.InfoGainAttributeEval")
    attsel = AttributeSelection()
    attsel.ranking(True)
    attsel.folds(2)
    attsel.crossvalidation(True)
    attsel.seed(42)
    attsel.search(search)
    attsel.evaluator(evaluation)
    attsel.select_attributes(anneal_data)
    print("ranked attributes:\n" + str(attsel.ranked_attributes))
    print("result string:\n" + attsel.results_string)
def all_feature(file):
    jvm.start(packages=True)
    data = converters.load_any_file(file)
    data.class_is_last()

    search = ASSearch(classname="weka.attributeSelection.Ranker",
                      options=["-T", "-1.7976931348623157E308", "-N", "-1"])
    attsel = AttributeSelection()
    attsel.search(search)

    evaluator = ASEvaluation(
        classname="weka.attributeSelection.ChiSquaredAttributeEval")
    attsel.evaluator(evaluator)
    attsel.select_attributes(data)
    t = attsel.ranked_attributes[:, 0]
    chi = t.astype(int)

    evaluator = ASEvaluation(
        classname="weka.attributeSelection.InfoGainAttributeEval")
    attsel.evaluator(evaluator)
    attsel.select_attributes(data)
    t = attsel.ranked_attributes[:, 0]
    info_gain = t.astype(int)

    evaluator = ASEvaluation(
        classname="weka.attributeSelection.GainRatioAttributeEval")
    attsel.evaluator(evaluator)
    attsel.select_attributes(data)
    t = attsel.ranked_attributes[:, 0]
    gain_ratio = t.astype(int)

    evaluator = ASEvaluation(
        classname="weka.attributeSelection.SymmetricalUncertAttributeEval")
    attsel.evaluator(evaluator)
    attsel.select_attributes(data)
    t = attsel.ranked_attributes[:, 0]
    symmetric_uncertainty = t.astype(int)

    jvm.stop()

    return chi, info_gain, gain_ratio, symmetric_uncertainty
Example #14
0
 def showAttributeRanking(self, data):
     search = ASSearch(
         classname="weka.attributeSelection.Ranker",
         options=["-T", "-1.7976931348623157E308", "-N", "-1"])
     evaluator = ASEvaluation(
         classname="weka.attributeSelection.InfoGainAttributeEval")
     attsel = AttributeSelection()
     attsel.set_search(search)
     attsel.set_evaluator(evaluator)
     attsel.select_attributes(data)
     print("# attributes: " + str(attsel.get_number_attributes_selected()))
     print("attributes: " + str(attsel.get_selected_attributes()))
     print("result string:\n" + attsel.to_results_string())
Example #15
0
 def filter_data(self, data):
     print("Filtering Data..\n")
     flter = Filter(
         classname="weka.filters.supervised.attribute.AttributeSelection")
     aseval = ASEvaluation(
         classname="weka.attributeSelection.CfsSubsetEval",
         options=["-P", "1", "-E", "1"])
     assearch = ASSearch(classname="weka.attributeSelection.BestFirst",
                         options=["-D", "1", "-N", "5"])
     flter.set_property("evaluator", aseval.jobject)
     flter.set_property("search", assearch.jobject)
     flter.inputformat(data)
     filtered = flter.filter(data)
     return filtered
Example #16
0
 def featureSelection(self):
     alg_search = ASSearch(
         classname="weka.attributeSelection.GeneticSearch",
         options=["-Z", "1024", "-G", "20", "-C", "0.6", "-M", "0.3"])
     alg_evaluation = ASEvaluation(
         classname="weka.attributeSelection.CfsSubsetEval",
         options=["-P", "1", "-E", "1"])
     feature_selection = AttributeSelection()
     feature_selection.search(alg_search)
     feature_selection.evaluator(alg_evaluation)
     feature_selection.select_attributes(self.original_data)
     self.selected_features = feature_selection.selected_attributes
     self.num_features = feature_selection.number_attributes_selected
     self.data_selected = feature_selection.reduce_dimensionality(
         self.original_data)
def use_filter(data):
    """
    Uses the AttributeSelection filter for attribute selection.
    :param data: the dataset to use
    :type data: Instances
    """
    print("\n2. Filter")
    flter = Filter(classname="weka.filters.supervised.attribute.AttributeSelection")
    aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
    assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
    flter.set_property("evaluator", aseval.jobject)
    flter.set_property("search", assearch.jobject)
    flter.inputformat(data)
    filtered = flter.filter(data)
    print(str(filtered))
def use_low_level(data):
    """
    Uses the attribute selection API directly.
    :param data: the dataset to use
    :type data: Instances
    """
    print("\n3. Low-level")
    attsel = AttributeSelection()
    aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
    assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
    attsel.jwrapper.setEvaluator(aseval.jobject)
    attsel.jwrapper.setSearch(assearch.jobject)
    attsel.select_attributes(data)
    indices = attsel.selected_attributes
    print("selected attribute indices (starting with 0):\n" + str(indices.tolist()))
Example #19
0
def cfs(table, cores):
    loader = Loader("weka.core.converters.CSVLoader")
    anneal_data = loader.load_file(table)
    anneal_data.class_is_last()
    logger.info("Running attribute selection for: " + str(table.split("/")[-1]) + ". Please, wait a moment.")
    search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "0", "-N", "5"])
    evaluation = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-Z", "-P", cores, "-E", cores])
    attsel = AttributeSelection()
    attsel.search(search)
    attsel.evaluator(evaluation)
    attsel.select_attributes(anneal_data)
    logger.info("Selected attributes: " + str(attsel.selected_attributes))
    anneal_data.delete(index=None) # TO-DO: Borrar instancias aun no funciona

    return list(attsel.selected_attributes)
Example #20
0
def relieff(filter_data, feature_names):
    # define search and evaluation for ReliefF
    search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"])
    # last param is number of nearest neighbors
    evaluation = ASEvaluation(classname="weka.attributeSelection.ReliefFAttributeEval",
                              options=["-M", "-1", "-D", "1", "-K", "10"])

    # run the ReliefF alg
    relieff = AttributeSelection()
    relieff.search(search)
    relieff.evaluator(evaluation)
    relieff.select_attributes(filter_data)
    results = relieff.selected_attributes

    # weka wrapper returns the class col number with the results, so slice -1
    return [feature_names[i] for i in results[:-1]]
Example #21
0
def use_filter(data):
    """
    Uses the AttributeSelection filter for attribute selection.
    :param data: the dataset to use
    :type data: Instances
    """
    print("\n2. Filter")
    flter = wfilters.AttributeSelection()
    aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
    assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
    flter.evaluator = aseval
    flter.search = assearch
    flter.inputformat(data)
    filtered = flter.filter(data)
    print(str(filtered))
    print("Evaluator:\n", flter.evaluator)
    print("Search:\n", flter.search)
def feature_selection_weka(x_train, y_train, x_test, input_path, features):
    percent = int(x_train.shape[1] * (features / 100.0))
    if not os.path.exists('Weka'):
        os.mkdir('Weka')

    if not os.path.exists(input_path +
                          f'selected_features_weka_{features}.csv'):
        x_train = x_train.loc[:, (x_train != x_train.iloc[0]).any()]
        sava_data = x_train.copy()
        sava_data.columns = [str(a) + "a" for a in range(sava_data.shape[1])]
        sava_data['target'] = y_train
        sava_data.to_csv('Weka/train_weka_format.csv', index=False)

        from weka.attribute_selection import ASEvaluation, AttributeSelection, ASSearch
        from weka.core.converters import Loader, Saver
        loader = Loader(classname="weka.core.converters.CSVLoader")
        data = loader.load_file('Weka/train_weka_format.csv',
                                class_index='last')

        search = ASSearch(classname="weka.attributeSelection.GreedyStepwise",
                          options=["-C", "-R", "-N", f"{percent}"])
        evaluator = ASEvaluation(
            classname="weka.attributeSelection.CfsSubsetEval",
            options=["-P", "1", "-E", "1", "-L"])
        attsel = AttributeSelection()
        attsel.search(search)
        attsel.evaluator(evaluator)
        attsel.select_attributes(data)
        ranked_attributes = pd.DataFrame(attsel.ranked_attributes,
                                         columns=['Feature', 'Rank'])
        ranked_attributes['Feature'] = ranked_attributes['Feature'].astype(int)
        set_of_features = ranked_attributes.loc[:percent - 1, 'Feature']

        x_train.iloc[:, set_of_features].to_csv(
            input_path + f'selected_features_weka_{features}.csv')
        selected_features = x_train.iloc[:, set_of_features].columns
    else:
        selected_features = pd.read_csv(
            input_path + f'selected_features_weka_{features}.csv',
            index_col=0).columns

    x_train_filtered = x_train.loc[:, selected_features]
    x_val_filtered = x_test.loc[:, selected_features]

    return x_train_filtered, x_val_filtered
Example #23
0
def information_gain(filter_data, feature_names):
    # last param determines how many attributes are returned
    # 2nd param controls the score threshold
    search = ASSearch(classname="weka.attributeSelection.Ranker",
                      options=["-T", "-1.7976931348623157E308", "-N", "-1"])
    # has no params
    evaluation = ASEvaluation(
        classname="weka.attributeSelection.InfoGainAttributeEval", options=[])

    # run the Information Gain alg
    info_gain = AttributeSelection()
    info_gain.search(search)
    info_gain.evaluator(evaluation)
    info_gain.select_attributes(filter_data)
    results = info_gain.selected_attributes

    # weka wrapper returns the class col number with the results, so slice -1
    return [feature_names[i] for i in results[:-1]]
def use_classifier(data):
    """
    Uses the meta-classifier AttributeSelectedClassifier for attribute selection.
    :param data: the dataset to use
    :type data: Instances
    """
    print("\n1. Meta-classifier")
    classifier = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier")
    aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval")
    assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"])
    base = Classifier(classname="weka.classifiers.trees.J48")
    # setting nested options is always a bit tricky, getting all the escaped double quotes right
    # simply using the bean property for setting Java objects is often easier and less error prone
    classifier.set_property("classifier", base.jobject)
    classifier.set_property("evaluator", aseval.jobject)
    classifier.set_property("search", assearch.jobject)
    evaluation = Evaluation(data)
    evaluation.crossvalidate_model(classifier, data, 10, Random(1))
    print(evaluation.summary())
Example #25
0
def get_IG(ofile_dir, loader):
	data = loader.load_file(ofile_dir)
	data.class_is_last()

	evaluator = ASEvaluation(classname="weka.attributeSelection.InfoGainAttributeEval")
	search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-T", "-1.7976931348623157E308", "-N", "-1"])
	attsel = AttributeSelection()
	attsel.search(search)
	attsel.evaluator(evaluator)

	attsel.select_attributes(data)

	results = {}

	if attsel.number_attributes_selected < 2:
		flag = 0
		output = attsel.results_string
		for i in output.split('\n'):
			if (flag != 0):
				if len(i.split(' '))>2:
					t=[]
					for f in i.split(' '):
						if f!='':
							t.append(f)
					r_tax = ''
					for c in range(len(t)):
						if c>1:
							r_tax = r_tax+t[c]+' '
					results.update({str(r_tax.strip()): float(t[0].strip())})
				else:
					break
			if "Ranked attributes" in i:
				flag = 1
		mean_score = sum(results.values())/len(results.values())
		os.system("rm -r "+ofile_dir)
	else:
		results = dict([(str(data.attribute(attr[0]).name), attr[1]) for attr in attsel.ranked_attributes])
		mean_score = attsel.ranked_attributes[:,1].mean()
	
	return results, mean_score
Example #26
0
def try_params(n_instances, params, base, train, valid, test, istest):

    n_instances = int(round(n_instances))
    # print "n_instances:", n_instances
    pprint(params)

    L = list([])

    if params['weightByDistance'] == True:
        L.append("-W")

    L.append("-M")
    L.append(str(params['sampleSize']))

    L.append("-K")
    L.append(str(params['numNeighbours']))

    L.append("-A")
    L.append(str(params['sigma']))

    # print L

    search = ASSearch(classname="weka.attributeSelection.Ranker")
    evaluator = ASEvaluation(
        classname="weka.attributeSelection.ReliefFAttributeEval", options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    if istest:
        result = test_weka_classifier(clf, train, test)
    else:
        result = train_and_eval_weka_classifier(clf, train, valid, n_instances)

    return result
Example #27
0
def get_evaluator(params, base):
    pprint(params)

    L = list()

    if params['use_training'] == True:
        L.append("-D")

    L.append("-S")
    L.append(str(params['seed']))

    L.append("-B")
    L.append(str(params['minimum_bucket']))

    #
    # if params['search'] == 'GreedyStepwise':
    #     param_search = gs.get_params()
    #     search = gs.get_search(param_search)
    # elif params['search'] == 'BestFirst':
    #     param_search = bf.get_params()
    #     search = bf.get_search(param_search)
    # elif params['search'] == 'Ranker':
    param_search = rk.get_params()
    search = rk.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection."+params['search'])
    evaluator = ASEvaluation(
        classname="weka.attributeSelection.OneRAttributeEval", options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    return clf
Example #28
0
def get_evaluator(params, base):
    pprint(params)

    L = list()

    if params['center'] == True:
        L.append("-C")

    L.append("-A")
    L.append(str(params['max_a']))

    L.append("-R")
    L.append(str(params['variance']))


    # if params['search'] == 'GreedyStepwise':
    #     param_search = gs.get_params()
    #     search = gs.get_search(param_search)
    # elif params['search'] == 'BestFirst':
    #     param_search = bf.get_params()
    #     search = bf.get_search(param_search)
    # elif params['search'] == 'Ranker':
    param_search = rk.get_params()
    search = rk.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection."+params['search'])
    evaluator = ASEvaluation(classname="weka.attributeSelection.PrincipalComponents", options=L)

    clf = Classifier(classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)


    return clf
Example #29
0
def get_evaluator(params, base):
    pprint(params)

    L = list()

    L.append("-E")
    L.append(str(params['ev_measure']))

    L.append("-R")
    L.append(str(params['seed']))

    L.append("-T")
    L.append(str(params['threshold']))

    if params['search'] == 'GreedyStepwise':
        param_search = gs.get_params()
        search = gs.get_search(param_search)
    elif params['search'] == 'BestFirst':
        param_search = bf.get_params()
        search = bf.get_search(param_search)
    elif params['search'] == 'Ranker':
        param_search = rk.get_params()
        search = rk.get_search(param_search)

    # search = ASSearch(classname="weka.attributeSelection."+params['search'])
    evaluator = ASEvaluation(
        classname="weka.attributeSelection.WrapperSubsetEval", options=L)

    clf = Classifier(
        classname="weka.classifiers.meta.AttributeSelectedClassifier")

    clf.set_property("evaluator", evaluator.jobject)
    clf.set_property("search", search.jobject)
    clf.set_property("base", base.jobject)

    return clf
    pkg.install_package(chisq_name)
    print("pkg %s installed, please restart" % chisq_name)
    jvm.stop()
    sys.exit(1)
"""
data_dir = "\\\\egr-1l11qd2\\CLS_lab\\Junya Zhao\\Data driven model _paper [June 25_2018\\FeatureSelection\\EvlSearch\\"
globbed_files = glob.glob(data_dir + "*.csv")
for csv in globbed_files:
    data = converters.load_any_file(csv)
    data.class_is_last()
    search = ASSearch(classname="weka.attributeSelection.EvolutionarySearch",
                      options=[
                          "-population-size", "200", "-generations", "500",
                          "-crossover-probability", "0.6"
                      ])
    evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval",
                             options=["-P", "1", "E", "1"])
    attsel = AttributeSelection()
    attsel.folds(10)
    attsel.crossvalidation(True)
    attsel.seed(1)
    attsel.search(search)
    attsel.evaluator(evaluator)
    attsel.select_attributes(data)
    evl = Evaluation(data)
    print("# attributes: " + str(attsel.number_attributes_selected))
    print("attributes: " + str(attsel.selected_attributes))
    print("result string:\n" + attsel.results_string)
    print(evl)
    # write the report for each file
    with open(f"{csv}._report.csv", "a") as outfile:
        outfile.write(attsel.results_string)
Example #31
0
        ["-D", "0", "-N", "1"]
    ),
    (
        ["-F", "10", "-T", "-1", "-B", "weka.classifiers.trees.J48"],
        ["-D", "1", "-N", "2"]
    ),
    (
        ["-F", "10", "-T", "-1", "-B", "weka.classifiers.trees.J48"],
        ["-D", "2", "-N", "2"]
    ),
)

# attribute selection
for setup in setups:
    evl, search = setup
    aseval = ASEvaluation(classname="weka.attributeSelection.WrapperSubsetEval",
                          options=evl)
    assearch = ASSearch(classname="weka.attributeSelection.BestFirst",
                        options=search)
    print("\n--> Attribute selection\n")
    print(aseval.to_commandline())
    print(assearch.to_commandline())
    attsel = AttributeSelection()
    attsel.evaluator(aseval)
    attsel.search(assearch)
    attsel.select_attributes(data)
    print(attsel.results_string)

# cross-validation
aseval = ASEvaluation(classname="weka.attributeSelection.WrapperSubsetEval",
                      options=["-F", "10", "-B", "weka.classifiers.trees.J48"])
assearch = ASSearch(classname="weka.attributeSelection.BestFirst",