# user configuration delim = ',' disc_type = "static" task_name = "discovery" max_len = 5 beamsize = 100 ncutpoints = 5 target_type = "numeric" results = "" datasetnames = ["baseball","autoMPG8","dee","ele-1","forestFires","concrete",\ "treasury","wizmir","abalone","puma32h","ailerons","elevators",\ "bikesharing","california","house"] for datasetname in datasetnames: print("Dataset: " + datasetname) filename = "./data/numeric target/" + datasetname + ".csv" df = pd.read_csv(filename, delimiter=delim) model = SSDC(target_type, max_depth=max_len, beam_width=beamsize, iterative_beam_width=1, n_cutpoints=ncutpoints, task=task_name, discretization=disc_type, gain="normalized") model.fit(df) # fit to new data model.measures["nsamples_train"] = len(df) results = attach_results(model, results, datasetname) print2folder(model, results, "SSDpp")
# -*- coding: utf-8 -*- import pandas as pd import sys sys.path from src.util.results2folder import attach_results, print2folder from _classes import SSDC # load data datasetname = "baseball" delim = ',' filename = "./data/numeric target/" + datasetname + ".csv" df = pd.read_csv(filename, delimiter=delim) # user configuration task_name = "discovery" target_type = "numeric" # load class and fit to data model = SSDC(task=task_name) model.fit(df) #print("model measures : " +str(model.measures) + "\n") print(model)
disc_type = "static" task_name = "discovery" max_len = 5 beamsize = 100 ncutpoints = 5 target_type = "numeric" # load data filename = "./data/application/" + datasetname + ".csv" df = pd.read_csv(filename, delimiter=delim) # load model model = SSDC(target_type, max_depth=max_len, beam_width=beamsize, iterative_beam_width=1, n_cutpoints=ncutpoints, task=task_name, discretization=disc_type, gain="normalized", max_rules=4) model.fit(df) print(model) #ruleset = model.rule_sets #overlap = [] #subgroup_sets_support= [set(rset) for rset in ruleset] #rules_usg = [] #subgroup_sets_usage= [] #statistic_rules = [] #for r in range(len(subgroup_sets_support)): # previous_sets = [subgroup_sets_support[ii] for ii in range(r)]
beamsize = 100 ncutpoints = 5 iterative = 1 target_type = "numeric" maxdepth_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] datasetnames = ["baseball","autoMPG8","dee","ele-1","forestFires","concrete",\ "treasury","wizmir","abalone","puma32h","ailerons","elevators",\ "bikesharing","california","house"] results = "" for datasetname in datasetnames: print("Dataset name: " + datasetname) for maxdepth in maxdepth_list: print(" maximum depth of search: " + str(maxdepth)) filename = "./data/numeric target/" + datasetname + ".csv" df = pd.read_csv(filename, delimiter=delim) model = SSDC(target_type, max_depth=maxdepth, beam_width=beamsize, iterative_beam_width=iterative, n_cutpoints=ncutpoints, task=task_name, discretization=disc_type, gain="normalized") model.fit(df) model.measures["maxdepth"] = maxdepth results = attach_results(model, results, datasetname) results = results.rstrip(", \n") print2folder(model, results, "maxdepth_results")