/
test.py
123 lines (82 loc) · 2.68 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import pandas as pd
from sklearn.preprocessing import LabelEncoder, Imputer
from data_generator import gen_test_df
from tabulate import tabulate
from density_analysis import KDE, MeanShift, DBSCAN
import density_analysis as sd
def csv_loader():
df = pd.read_csv("./datasets/abalone.data.csv", delimiter=",", header=None)
return df
def load_abalone():
df = csv_loader()
df_m = DataFrameManager(df)
# transform names
names = "sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,rings"
df_m.set_column_names(names.split(","))
# transform column as label
df_m.encode_column_as_label("sex")
return df
class DataFrameManager():
def __init__(self, df):
self.df = df
def set_column_names(self, names):
df = self.df
if isinstance(names, type({})):
df.rename(columns = names, inplace = True)
else: # expected array-like
df.columns = names
def encode_column_as_label(self, column_name):
df = self.df
column = df[column_name]
le = LabelEncoder()
le.fit(column)
new_column = le.transform(column)
df[column_name] = new_column
def count_column_as_label(self, column_name):
df = self.df
label_count = df[column_name].value_counts()
print label_count
def impute_column_missing_value(self):
Imputer(missing_values='NaN', strategy='mean', axis=0, verbose=0, copy=False)
def test_KDE():
df = gen_test_df(n_features=3)
model = KDE(df)
report_df = model.sampling_pdf(n_sample=20)
print report_df
model = KDE(df, model_type="sklearn")
report_df = model.sampling_pdf(n_sample=20)
print report_df
def test_ClusterAnalysis():
df = gen_test_df(n_features=3)
model = MeanShift(df)
centers = model.get_centers()
outliers = model.get_outliers()
print centers
print outliers
def test_GMM():
df = gen_test_df(n_features=3)
centers, is_converged = sd.report_gmm(df, n_components=5)
print centers
print is_converged
def test_report():
df = load_abalone()
# report_df = sd.report_kde(df, model_type="sklearn", n_sample=50, type_settings={"rings":"o"})
# print report_df
centers, is_converged = sd.report_gmm(df, n_components=5)
print centers
print is_converged
centers, outliers = sd.report_meanshift(df)
print centers
print outliers
# df = load_abalone()
# df = gen_test_df(n_features=3)
def main():
pass
# count
# df_m.count_column_as_label("sex")
# print df
if __name__ == '__main__':
# test_KDE()
# test_ClusterAnalysis()
# test_GMM()
test_report()