Esempio n. 1
0
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize


# find the Wisconsin breast cancer data set
dataFile = os.path.dirname(os.path.realpath(__file__))
dataFile = os.path.abspath(dataFile + "../../datasets/breast-cancer-wisconsin.csv")

# Normalize the Wisconsin file.

norm = Normalize()
data_file_work = norm.load_csv(dataFile)
norm.delete_unknowns(data_file_work)
norm.col_delete(data_file_work, 0)
norm.col_replace(data_file_work, 9, 4, 1, 0)

for i in xrange(0, 9):
    norm.make_col_numeric(data_file_work, i)

df = pd.DataFrame(data_file_work)
df.columns = ["clump_thickness", "size_uniformity", "shape_uniformity", "marginal_adhesion", "epithelial_size",
              "bare_nucleoli", "bland_chromatin", "normal_nucleoli", "mitoses", "class"]

train_cols = df.columns[0:9]

# Perform the logistic regression.
logit = sm.Logit(df['class'], df[train_cols])

# fit the model
Esempio n. 2
0
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize

k = 3

# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_data = norm.load_csv(irisFile)

# Prepare the iris data set.
classes = norm.col_extract(iris_data, 4)
norm.col_delete(iris_data, 4)
for i in range(0, 4):
    norm.make_col_numeric(iris_data, i)

# Cluster the Iris data set.
res, idx = kmeans2(np.array(iris_data), k)

for cluster_num in range(0, k):
    print( "Cluster #" + str(cluster_num + 1))
    for i in range(0, len(idx)):
        if idx[i] == cluster_num:
            print( str(iris_data[i]) + "," + classes[i])
Esempio n. 3
0
                           "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize

k = 3

# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_data = norm.load_csv(irisFile)

# Prepare the iris data set.
classes = norm.col_extract(iris_data, 4)
norm.col_delete(iris_data, 4)
for i in range(0, 4):
    norm.make_col_numeric(iris_data, i)

# Cluster the Iris data set.
res, idx = kmeans2(np.array(iris_data), k)

for cluster_num in range(0, k):
    print("Cluster #" + str(cluster_num + 1))
    for i in range(0, len(idx)):
        if idx[i] == cluster_num:
            print(str(iris_data[i]) + "," + classes[i])
Esempio n. 4
0
                           "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize

# find the Wisconsin breast cancer data set
dataFile = os.path.dirname(os.path.realpath(__file__))
dataFile = os.path.abspath(dataFile +
                           "../../datasets/breast-cancer-wisconsin.csv")

# Normalize the Wisconsin file.

norm = Normalize()
data_file_work = norm.load_csv(dataFile)
norm.delete_unknowns(data_file_work)
norm.col_delete(data_file_work, 0)
norm.col_replace(data_file_work, 9, 4, 1, 0)

for i in range(0, 9):
    norm.make_col_numeric(data_file_work, i)

df = pd.DataFrame(data_file_work)
df.columns = [
    "clump_thickness", "size_uniformity", "shape_uniformity",
    "marginal_adhesion", "epithelial_size", "bare_nucleoli", "bland_chromatin",
    "normal_nucleoli", "mitoses", "class"
]

train_cols = df.columns[0:9]

# Perform the logistic regression.