def cardio_re(): epss = [3, 4, 5] for e in epss: mat = loadmat('../data/cardio.mat') X_car = mat['X'] y = pd.DataFrame(mat['y']) y = y[0].to_numpy() X_car = pd.DataFrame(X_car) print(X_car.head(12)) print(y) # clusterlmat = kmeansm(X_car, 2, 176, 10, 0.05, 21) clusterlmat = dbscan.dbscanp(X_car, 21, eps=e, minpts=10, factor=1) # print(clusterlmat[0][13]) y_t = clusterlmat[0][21].to_numpy() print(y_t) index = 0 for i in y_t.copy(): if i == -1: y_t[index] = 1 else: y_t[index] = 0 index += 1 print(y_t) print(f1_score(y, y_t, average='weighted')) print(assess.falsealarmrate(y, [0], y_t, 1)) print(adjusted_rand_score(y, y_t)) print(jaccard_score(y, y_t)) print(e)
def pima_re(): epss = [(45, 0.2, 0.9), (35, 0.35, 2), (25, 0.2, 0.0005)] for t in epss: data = pd.read_csv('../data/pimaindiansdiabetes.csv', header=None) print(data.head(12)) y = data[8].to_numpy() print(y) # clusterlmat = kmeansm(X_car, 2, 176, 10, 0.05, 21) clusterlmat = dbscan.dbscanp( data, 8, eps=t[2], minpts=t[0], factor=t[1], initialization=dbscan.Initialization.UNIFORM, plot=True) # print(clusterlmat[0][13]) y_t = clusterlmat[0][9].to_numpy() # print(y_t) index = 0 for i in y_t.copy(): if i == -1: y_t[index] = 1 else: y_t[index] = 0 index += 1 print(y_t) print(f1_score(y, y_t, average='weighted')) print(assess.falsealarmrate(y, [0], y_t, 1)) print(adjusted_rand_score(y, y_t)) print(jaccard_score(y, y_t)) print(t[2])
def glass_re(): epss = [0.4, 0.9, 1, 1.2, 2] for e in epss: mat = loadmat('../data/glass.mat') X_car = mat['X'] y = pd.DataFrame(mat['y']) y = y[0].to_numpy() X_car = pd.DataFrame(X_car) print(X_car.head(12)) print(y) # clusterlmat = kmeansm(X_car, 2, 176, 10, 0.05, 21) clusterlmat = dbscan.dbscanp(X_car, 21, eps=e, minpts=8, factor=0.5, initialization=dbscan.Initialization.KCENTRE) # print(clusterlmat[0][13]) y_t = clusterlmat[0][9].to_numpy() # print(y_t) index = 0 for i in y_t.copy(): if i == -1: y_t[index] = 1 else: y_t[index] = 0 index += 1 print(y_t) print(f1_score(y, y_t, average='weighted')) print(assess.falsealarmrate(y, [0], y_t, 1)) print(adjusted_rand_score(y, y_t)) print(jaccard_score(y, y_t)) print(e)
def pima_pre_norm_re(): epss = [(45, 0.02, 0.9), (35, 0.0001, 2), (25, 0.2, 0.00005)] for t in epss: data = pd.read_csv('../data/pimaindiansdiabetes.csv', header=None) print(data.head(12)) y = data[8].to_numpy() print(y) scaler = MinMaxScaler() arr_scaled = scaler.fit_transform(data) data2 = pd.DataFrame(arr_scaled) # pca = PCA(n_components=7) # principalcomponents = pca.fit(data2.iloc[, 0:8]) # principledf = pd.DataFrame(principalcomponents) # clusterlmat = kmeansm(X_car, 2, 176, 10, 0.05, 21) clusterlmat = dbscan.dbscanp(data2, 8, eps=t[2], minpts=t[0], factor=t[1], initialization=dbscan.Initialization.NONE, plot=False) # print(clusterlmat[0][13]) y_t = clusterlmat[0][9].to_numpy() # print(y_t) index = 0 for i in y_t.copy(): if i == -1: y_t[index] = 1 else: y_t[index] = 0 index += 1 print(y_t) print(f1_score(y, y_t, average='weighted')) print(assess.falsealarmrate(y, [0], y_t, 1)) print(adjusted_rand_score(y, y_t)) print(jaccard_score(y, y_t)) print(t[2])
def pima_pre_norm_and_pca_re(): epss = [(270, 0.5, 0.5), (270, 0.5, 0.4), (270, 0.5, 0.35), (270, 0.5, 0.3), (270, 0.5, 0.2), (270, 0.5, 0.1)] epss2 = [(280, 0.5, 0.5), (280, 0.5, 0.4), (280, 0.5, 0.35), (280, 0.5, 0.3), (280, 0.5, 0.2), (280, 0.5, 0.1)] epss3 = [(290, 0.5, 0.5), (290, 0.5, 0.4), (290, 0.5, 0.35), (290, 0.5, 0.3), (290, 0.5, 0.2), (290, 0.5, 0.1)] for t in epss3: data = pd.read_csv('../data/pimaindiansdiabetes.csv', header=None) # print(data.head(12)) y = data[8].to_numpy() print(y) scaler = MinMaxScaler() arr_scaled = scaler.fit_transform(data) data2 = pd.DataFrame(arr_scaled) pca = PCA(n_components=7) principalcomponents = pca.fit_transform(data2.iloc[:, 0:8]) principledf = pd.DataFrame(data=principalcomponents) # print(principledf.head(12)) # clusterlmat = kmeansm(X_car, 2, 176, 10, 0.05, 21) clusterlmat = dbscan.dbscanp( principledf, 7, eps=t[2], minpts=t[0], factor=t[1], initialization=dbscan.Initialization.KCENTRE, plot=False) # print(clusterlmat[0][13]) y_t = clusterlmat[0][7].to_numpy() # print(y_t) index = 0 for i in y_t.copy(): if i == -1: y_t[index] = 1 else: y_t[index] = 0 index += 1 print("cluster labels:", y_t) print("eps: ", t[2]) f_sc = f1_score(y, y_t, average='weighted') fa = assess.falsealarmrate(y, [0], y_t, 1) ard = adjusted_rand_score(y, y_t) js = jaccard_score(y, y_t) print(f_sc) print(fa) print(ard) print(js) print(t[2]) rr = [ t[0], t[2], t[1], f_sc, fa, ard, js, dbscan.Initialization.KCENTRE ] with open('../data/pima_pca/dbscan.pima.pca.result.csv', 'a') as fd: writer = csv.writer(fd) writer.writerow(rr)