def cluster():
    """Cluster all databases. Call this after major changes to the database
    (initial load, etc.).
    """
    for key in completion_databases:
        # Look up the completion database for this task and parameter. If it's
        # empty, we don't care
        completion_database = completion_databases[key]

        if len(completion_database) == 0:
            continue

        # Assemble a database of the completion positions that sklearn can
        # understand
        position_database = []

        for item in completion_database:
            position_database.append(item.pos)

        # Perform the clustering
        labels = MyDBSCAN(position_database, EPS, MIN_SAMPLES)

        # Assemble actual cluster lists from the label vector
        cluster_count = len(set(labels)) - (1 if -1 in labels else 0)
        completion_clusters[key] = []
        key_clusters = completion_clusters[key]

        for i in range(cluster_count):
            key_clusters.append([])

        for index, comp in enumerate(completion_database):
            label = labels[index]

            if label == -1:
                continue

            key_clusters[label - 1].append(comp)
# In[116]:

X_normalized = StandardScaler().fit_transform(X)

# In[117]:

X_normalized

# #### MaxPts is the maximum number of tags each cluster can hold

# In[118]:

print('Running my implementation...')
my_labels, label_index = np.array(
    MyDBSCAN(X_normalized, eps=.2, MinPts=20, MaxPts=100))

# In[119]:

dict(zip(nonNAN_index, list(zip(label_index, my_labels))))

# In[120]:

core_samples_mask = np.zeros_like(my_labels, dtype=bool)
core_samples_mask[np.array(range(0, len(my_labels)))] = True
labels = my_labels

# In[121]:

# Plot result
import matplotlib.pyplot as plt
# Create three gaussian blobs to use as our clustering data.
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750,
                            centers=centers,
                            cluster_std=0.4,
                            random_state=0)

X = StandardScaler().fit_transform(X)

###############################################################################
# My implementation of DBSCAN
#

# Run my DBSCAN implementation.
print('Running my implementation...')
my_labels = MyDBSCAN(X, eps=0.3, MinPts=10)

###############################################################################
# Scikit-learn implementation of DBSCAN
#

print('Runing scikit-learn implementation...')
db = DBSCAN(eps=0.3, min_samples=10).fit(X)
skl_labels = db.labels_

# Scikit learn uses -1 to for NOISE, and starts cluster labeling at 0. I start
# numbering at 1, so increment the skl cluster numbers by 1.
for i in range(0, len(skl_labels)):
    if not skl_labels[i] == -1:
        skl_labels[i] += 1
            else:
                datast.append(
                    [row[0], row[3], row[28], row[29], row[30], row[26]])

    for r in datamov:
        datamovlat.append([r[2], r[3]])
        datamovspd.append([r[4], r[5]])

    datamovlat = StandardScaler().fit_transform(datamovlat)
    datamovspd = StandardScaler().fit_transform(datamovspd)

print("Running my implementation...")

#print(datamovlat)

list_labels = MyDBSCAN(datamovlat, datamovspd, eps=0.3, MinPts=5)
db = array(list_labels)

#core_samples_mask = np.zeros_like(list_labels, dtype=bool)
#core_samples_mask[db.core_sample_indices_] = True

n_clusters_ = len(set(list_labels)) - (1 if -1 in list_labels else 0)
#print(type(db))

unique_labels = set(list_labels)

x = datamovlat[:, 0]
y = datamovlat[:, 1]

for j in (range(len(unique_labels))):
    print(j)
nonNAN_index = plot_df[['cL_Lat', 'cL_Long']].dropna().index
X = np.array(plot_df.iloc[nonNAN_index,:][['cL_Lat', 'cL_Long']] )


# In[6]:


from dbscan import MyDBSCAN
from sklearn.preprocessing import StandardScaler


# In[7]:


X_normalized = StandardScaler().fit_transform(X)
my_labels = np.array(MyDBSCAN(X_normalized, eps=.4, MinPts=20, MaxPts = 100))
core_samples_mask = np.zeros_like(my_labels, dtype=bool)
core_samples_mask[np.array(range(0,len(my_labels)))] = True
labels = my_labels
clusterDict = {}
for label in labels:
    if label in clusterDict.keys():
        clusterDict[label] += 1
    else:
        clusterDict[label] = 1
dbscan_labeled = check_result_df.iloc[nonNAN_index,:]
dbscan_labeled["Gateway_Label"] = my_labels


# In[8]:
Beispiel #6
0
def predict():
    X = fit()
    labels = MyDBSCAN(X, eps=0.2, min_pts=3)
    return labels