예제 #1
0
# user configuration
delim = ','
disc_type = "static"
task_name = "discovery"
max_len = 5
beamsize = 100
ncutpoints = 5
target_type = "numeric"
results = ""
datasetnames = ["baseball","autoMPG8","dee","ele-1","forestFires","concrete",\
                "treasury","wizmir","abalone","puma32h","ailerons","elevators",\
                "bikesharing","california","house"]

for datasetname in datasetnames:
    print("Dataset: " + datasetname)
    filename = "./data/numeric target/" + datasetname + ".csv"
    df = pd.read_csv(filename, delimiter=delim)
    model = SSDC(target_type,
                 max_depth=max_len,
                 beam_width=beamsize,
                 iterative_beam_width=1,
                 n_cutpoints=ncutpoints,
                 task=task_name,
                 discretization=disc_type,
                 gain="normalized")
    model.fit(df)
    # fit to new data
    model.measures["nsamples_train"] = len(df)
    results = attach_results(model, results, datasetname)
print2folder(model, results, "SSDpp")
예제 #2
0
# -*- coding: utf-8 -*-

import pandas as pd
import sys

sys.path
from src.util.results2folder import attach_results, print2folder

from _classes import SSDC

# load data
datasetname = "baseball"
delim = ','
filename = "./data/numeric target/" + datasetname + ".csv"
df = pd.read_csv(filename, delimiter=delim)

# user configuration
task_name = "discovery"
target_type = "numeric"

# load class and fit to data
model = SSDC(task=task_name)
model.fit(df)
#print("model measures : " +str(model.measures) + "\n")
print(model)
예제 #3
0
disc_type = "static"
task_name = "discovery"
max_len = 5
beamsize = 100
ncutpoints = 5
target_type = "numeric"
# load data
filename = "./data/application/" + datasetname + ".csv"
df = pd.read_csv(filename, delimiter=delim)

# load model
model = SSDC(target_type,
             max_depth=max_len,
             beam_width=beamsize,
             iterative_beam_width=1,
             n_cutpoints=ncutpoints,
             task=task_name,
             discretization=disc_type,
             gain="normalized",
             max_rules=4)
model.fit(df)
print(model)

#ruleset = model.rule_sets
#overlap = []
#subgroup_sets_support= [set(rset) for rset in ruleset]
#rules_usg = []
#subgroup_sets_usage= []
#statistic_rules = []
#for r in range(len(subgroup_sets_support)):
#    previous_sets = [subgroup_sets_support[ii] for ii in range(r)]
beamsize = 100
ncutpoints = 5
iterative = 1
target_type = "numeric"
maxdepth_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]

datasetnames = ["baseball","autoMPG8","dee","ele-1","forestFires","concrete",\
                "treasury","wizmir","abalone","puma32h","ailerons","elevators",\
                "bikesharing","california","house"]
results = ""
for datasetname in datasetnames:
    print("Dataset name: " + datasetname)
    for maxdepth in maxdepth_list:
        print("   maximum depth of search: " + str(maxdepth))

        filename = "./data/numeric target/" + datasetname + ".csv"
        df = pd.read_csv(filename, delimiter=delim)
        model = SSDC(target_type,
                     max_depth=maxdepth,
                     beam_width=beamsize,
                     iterative_beam_width=iterative,
                     n_cutpoints=ncutpoints,
                     task=task_name,
                     discretization=disc_type,
                     gain="normalized")
        model.fit(df)
        model.measures["maxdepth"] = maxdepth
        results = attach_results(model, results, datasetname)
results = results.rstrip(", \n")
print2folder(model, results, "maxdepth_results")