aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh") sys.path.append(aifh_dir) from normalize import Normalize # find the Wisconsin breast cancer data set dataFile = os.path.dirname(os.path.realpath(__file__)) dataFile = os.path.abspath(dataFile + "../../datasets/breast-cancer-wisconsin.csv") # Normalize the Wisconsin file. norm = Normalize() data_file_work = norm.load_csv(dataFile) norm.delete_unknowns(data_file_work) norm.col_delete(data_file_work, 0) norm.col_replace(data_file_work, 9, 4, 1, 0) for i in xrange(0, 9): norm.make_col_numeric(data_file_work, i) df = pd.DataFrame(data_file_work) df.columns = ["clump_thickness", "size_uniformity", "shape_uniformity", "marginal_adhesion", "epithelial_size", "bare_nucleoli", "bland_chromatin", "normal_nucleoli", "mitoses", "class"] train_cols = df.columns[0:9] # Perform the logistic regression. logit = sm.Logit(df['class'], df[train_cols]) # fit the model
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh") sys.path.append(aifh_dir) from normalize import Normalize k = 3 # find the Iris data set irisFile = os.path.dirname(os.path.realpath(__file__)) irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv") # Read the Iris data set. print('Reading CSV file: ' + irisFile) norm = Normalize() iris_data = norm.load_csv(irisFile) # Prepare the iris data set. classes = norm.col_extract(iris_data, 4) norm.col_delete(iris_data, 4) for i in range(0, 4): norm.make_col_numeric(iris_data, i) # Cluster the Iris data set. res, idx = kmeans2(np.array(iris_data), k) for cluster_num in range(0, k): print( "Cluster #" + str(cluster_num + 1)) for i in range(0, len(idx)): if idx[i] == cluster_num: print( str(iris_data[i]) + "," + classes[i])
"aifh") sys.path.append(aifh_dir) from normalize import Normalize k = 3 # find the Iris data set irisFile = os.path.dirname(os.path.realpath(__file__)) irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv") # Read the Iris data set. print('Reading CSV file: ' + irisFile) norm = Normalize() iris_data = norm.load_csv(irisFile) # Prepare the iris data set. classes = norm.col_extract(iris_data, 4) norm.col_delete(iris_data, 4) for i in range(0, 4): norm.make_col_numeric(iris_data, i) # Cluster the Iris data set. res, idx = kmeans2(np.array(iris_data), k) for cluster_num in range(0, k): print("Cluster #" + str(cluster_num + 1)) for i in range(0, len(idx)): if idx[i] == cluster_num: print(str(iris_data[i]) + "," + classes[i])
"aifh") sys.path.append(aifh_dir) from normalize import Normalize # find the Wisconsin breast cancer data set dataFile = os.path.dirname(os.path.realpath(__file__)) dataFile = os.path.abspath(dataFile + "../../datasets/breast-cancer-wisconsin.csv") # Normalize the Wisconsin file. norm = Normalize() data_file_work = norm.load_csv(dataFile) norm.delete_unknowns(data_file_work) norm.col_delete(data_file_work, 0) norm.col_replace(data_file_work, 9, 4, 1, 0) for i in range(0, 9): norm.make_col_numeric(data_file_work, i) df = pd.DataFrame(data_file_work) df.columns = [ "clump_thickness", "size_uniformity", "shape_uniformity", "marginal_adhesion", "epithelial_size", "bare_nucleoli", "bland_chromatin", "normal_nucleoli", "mitoses", "class" ] train_cols = df.columns[0:9] # Perform the logistic regression.