예제 #1
0

#Preprocessing the data between 0 and 1
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

means_init = np.array([X[y == i].mean(axis=0) for i in range(3)])

##############################################################################################################################
#For Expected Maximization
em(X_train, X_test, y_train, y_test,component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, file_no = "phish")

#############################################################################################################################
#For KMeans
kmeans(X_train, X_test, y_train, y_test,  component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, file_no = "phish")














component_list, array_aic, array_bic, array_homo_1, array_comp_1, array_sil_1, array_avg_log = em(
    X_train_transformed,
    X_test_transformed,
    y_train,
    y_test,
    init_means=means_init,
    component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11],
    num_class=2,
    toshow=0)

print("KMeans")
component_list, array_homo_2, array_comp_2, array_sil_2, array_var = kmeans(
    X_train_transformed,
    X_test_transformed,
    y_train,
    y_test,
    init_means=means_init,
    component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11],
    num_class=2,
    toshow=0)

# In[9]:

#Writing data to file
component_list = np.array(component_list).reshape(-1, 1)
array_aic = np.array(array_aic).reshape(-1, 1)
array_bic = np.array(array_bic).reshape(-1, 1)
array_homo_1 = np.array(array_homo_1).reshape(-1, 1)
array_comp_1 = np.array(array_comp_1).reshape(-1, 1)
array_sil_1 = np.array(array_sil_1).reshape(-1, 1)
array_avg_log = np.array(array_avg_log).reshape(-1, 1)
# ### 1. NBA games datasets

# In[5]:


# Splitting data into training sets and testing sets
X_train, X_test, y_train, y_test = train_test_split(X1,Y1, test_size = 0.2)

#Preprocessing the data between 0 and 1
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
means_init = np.array([X1[Y1 == i].mean(axis=0) for i in range(2)])

kmeans(X_train, X_test, y_train, y_test, init_means = means_init, component_list = [3,4,5,6,7,8,9,10,11], num_class = 2)


# ### 2. LOL games datasets

# In[6]:


# Splitting data into training sets and testing sets
X_train, X_test, y_train, y_test = train_test_split(X2,Y2, test_size = 0.2)

#Preprocessing the data between 0 and 1
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
means_init = np.array([X1[Y1 == i].mean(axis=0) for i in range(7)])
예제 #4
0
y_test = y_test.as_matrix()

#Preprocessing the data between 0 and 1
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

means_init = np.array([X[y == i].mean(axis=0) for i in range(3)])

##############################################################################################################################
#For Expected Maximization
em(X_train,
   X_test,
   y_train,
   y_test,
   init_means=means_init,
   component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11],
   num_class=7,
   file_no="wine")

#############################################################################################################################
#For KMeans
kmeans(X_train,
       X_test,
       y_train,
       y_test,
       init_means=means_init,
       component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11],
       num_class=7,
       file_no="wine")
예제 #5
0
# In[31]:

##############################################################################################################################
#For Expected Maximization
em(dataset,
   X_train,
   X_test,
   y_train,
   y_test,
   init_means=means_init,
   component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
   num_class=2)

# In[32]:

#############################################################################################################################
#For KMeans
kmeans(dataset,
       X_train,
       X_test,
       y_train,
       y_test,
       init_means=means_init,
       component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
       num_class=2)

# In[ ]:

# In[ ]:
예제 #6
0
print("Clustering PCA")

#Reducing the dimensions with optimal number of components
pca_new = PCA(n_components = gridSearch.best_estimator_.named_steps['pca'].n_components)
pca_new.fit(X)
X_transformed_f = pca_new.transform(X)



#clustering experiments
#clustering experiments
print("Expected Maximization")
component_list, array_aic, array_bic, array_homo_1, array_comp_1, array_sil_1, array_avg_log = em(X_train_transformed, X_test_transformed, y_train, y_test,  component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, toshow =0, file_no = "wine_pca")

print("KMeans")
component_list, array_homo_2, array_comp_2, array_sil_2, array_var = kmeans(X_train_transformed, X_test_transformed, y_train, y_test,  component_list = [3,4,5,6,7,8,9,10,11], num_class = 7, toshow =0, file_no = "wine_pca")



#Writing data to file
component_list = np.array(component_list).reshape(-1,1)
array_aic = np.array(array_aic).reshape(-1,1)
array_bic = np.array(array_bic).reshape(-1,1)
array_homo_1 = np.array(array_homo_1).reshape(-1,1)
array_comp_1 = np.array(array_comp_1).reshape(-1,1)
array_sil_1 = np.array(array_sil_1).reshape(-1,1)
array_avg_log = np.array(array_avg_log).reshape(-1,1)
array_homo_2 = np.array(array_homo_2).reshape(-1,1)
array_comp_2 = np.array(array_comp_2).reshape(-1,1)
array_sil_2 = np.array(array_sil_2).reshape(-1,1)
array_var = np.array(array_var).reshape(-1,1)
예제 #7
0
y_train = y_train.as_matrix() - 1
y_test = y_test.as_matrix() - 1

#Preprocessing the data between 0 and 1
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

means_init = np.array([X[y == i].mean(axis=0) for i in range(11)])

##############################################################################################################################
#For Expected Maximization
_ = em(X_train,
       X_test,
       y_train,
       y_test,
       init_means=means_init,
       component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
       num_class=11,
       toshow=1)

#############################################################################################################################
#For KMeans
_ = kmeans(X_train,
           X_test,
           y_train,
           y_test,
           init_means=means_init,
           component_list=[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
           num_class=11,
           toshow=1)