def cluster(): """Cluster all databases. Call this after major changes to the database (initial load, etc.). """ for key in completion_databases: # Look up the completion database for this task and parameter. If it's # empty, we don't care completion_database = completion_databases[key] if len(completion_database) == 0: continue # Assemble a database of the completion positions that sklearn can # understand position_database = [] for item in completion_database: position_database.append(item.pos) # Perform the clustering labels = MyDBSCAN(position_database, EPS, MIN_SAMPLES) # Assemble actual cluster lists from the label vector cluster_count = len(set(labels)) - (1 if -1 in labels else 0) completion_clusters[key] = [] key_clusters = completion_clusters[key] for i in range(cluster_count): key_clusters.append([]) for index, comp in enumerate(completion_database): label = labels[index] if label == -1: continue key_clusters[label - 1].append(comp)
# In[116]: X_normalized = StandardScaler().fit_transform(X) # In[117]: X_normalized # #### MaxPts is the maximum number of tags each cluster can hold # In[118]: print('Running my implementation...') my_labels, label_index = np.array( MyDBSCAN(X_normalized, eps=.2, MinPts=20, MaxPts=100)) # In[119]: dict(zip(nonNAN_index, list(zip(label_index, my_labels)))) # In[120]: core_samples_mask = np.zeros_like(my_labels, dtype=bool) core_samples_mask[np.array(range(0, len(my_labels)))] = True labels = my_labels # In[121]: # Plot result import matplotlib.pyplot as plt
# Create three gaussian blobs to use as our clustering data. centers = [[1, 1], [-1, -1], [1, -1]] X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4, random_state=0) X = StandardScaler().fit_transform(X) ############################################################################### # My implementation of DBSCAN # # Run my DBSCAN implementation. print('Running my implementation...') my_labels = MyDBSCAN(X, eps=0.3, MinPts=10) ############################################################################### # Scikit-learn implementation of DBSCAN # print('Runing scikit-learn implementation...') db = DBSCAN(eps=0.3, min_samples=10).fit(X) skl_labels = db.labels_ # Scikit learn uses -1 to for NOISE, and starts cluster labeling at 0. I start # numbering at 1, so increment the skl cluster numbers by 1. for i in range(0, len(skl_labels)): if not skl_labels[i] == -1: skl_labels[i] += 1
else: datast.append( [row[0], row[3], row[28], row[29], row[30], row[26]]) for r in datamov: datamovlat.append([r[2], r[3]]) datamovspd.append([r[4], r[5]]) datamovlat = StandardScaler().fit_transform(datamovlat) datamovspd = StandardScaler().fit_transform(datamovspd) print("Running my implementation...") #print(datamovlat) list_labels = MyDBSCAN(datamovlat, datamovspd, eps=0.3, MinPts=5) db = array(list_labels) #core_samples_mask = np.zeros_like(list_labels, dtype=bool) #core_samples_mask[db.core_sample_indices_] = True n_clusters_ = len(set(list_labels)) - (1 if -1 in list_labels else 0) #print(type(db)) unique_labels = set(list_labels) x = datamovlat[:, 0] y = datamovlat[:, 1] for j in (range(len(unique_labels))): print(j)
nonNAN_index = plot_df[['cL_Lat', 'cL_Long']].dropna().index X = np.array(plot_df.iloc[nonNAN_index,:][['cL_Lat', 'cL_Long']] ) # In[6]: from dbscan import MyDBSCAN from sklearn.preprocessing import StandardScaler # In[7]: X_normalized = StandardScaler().fit_transform(X) my_labels = np.array(MyDBSCAN(X_normalized, eps=.4, MinPts=20, MaxPts = 100)) core_samples_mask = np.zeros_like(my_labels, dtype=bool) core_samples_mask[np.array(range(0,len(my_labels)))] = True labels = my_labels clusterDict = {} for label in labels: if label in clusterDict.keys(): clusterDict[label] += 1 else: clusterDict[label] = 1 dbscan_labeled = check_result_df.iloc[nonNAN_index,:] dbscan_labeled["Gateway_Label"] = my_labels # In[8]:
def predict(): X = fit() labels = MyDBSCAN(X, eps=0.2, min_pts=3) return labels