コード例 #1
0
ファイル: outliers.py プロジェクト: hilfialkaff/nfl-combine
def scan_points(points):
    scans = {}
    for pos in points:
        X = points[pos]
        (eps, min_pts) = get_params(pos)

        scan = DBScan(eps, min_pts)
        scan.fit(X)

        scans[pos] = scan
    return scans
コード例 #2
0
ファイル: estimatePose.py プロジェクト: JamesKlee/part3
	def dbscan_estimate(self, pf):
		eps = 0.5
		minposes = 4
		
		# Run DBSCAN
		db = DBScan(pf.particlecloud, eps, minposes)
		db.run()

        # Get the estimate
		guess = db.getguess()
		self.largestClusterSize = db.largestClusterSize
		
		return guess
コード例 #3
0
def main():

    data_iris = genfromtxt('iris-data.csv', delimiter=',')
    data_iris_values = np.delete(data_iris, np.s_[4], 1)

    data_iris = genfromtxt('iris-data.csv', delimiter=',', dtype=int)
    data_iris_labels = np.delete(data_iris, np.s_[0:4], 1)
    data_iris_labels = np.concatenate(data_iris_labels)

    labels = DBScan(data_iris, 1, 15)
    print(labels)

    score = 0
    maxscore = 0
    for i in range(0, len(data_iris_labels)):
        if (data_iris_labels[i] == labels[i]):
            score += 1
        maxscore += 1
    accuracy = score / maxscore
    print('Accuracy score (DBScan): %.2f' % accuracy)
コード例 #4
0
plt.figure()
sum_squared_errors = []
for n_clusters in range(2, 10):
    kmeans = KMeans(n_clusters=n_clusters, max_iter=100)
    kmeans.fit(iris_data, normalize=True)
    sse = kmeans.sum_squared_error()
    sum_squared_errors.append(sse)

plt.plot(sum_squared_errors)
plt.xlabel('# of clusters')
plt.ylabel('SSE')
plt.show()


# TODO 7: DBSCAN nad Iris podacima, prikazati rezultate na grafiku isto kao kod K-means
dbscan = DBScan(epsilon=0.5, min_points=3)
dbscan.fit(iris_data)

colors = {0: 'red', 1: 'green', 2: 'blue'}
plt.figure()

for idx, cluster in enumerate(dbscan.clusters):
    for datum in cluster.data:  # iscrtavanje tacaka
        plt.scatter(datum[0], datum[1], c=colors[idx])

plt.xlabel('Sepal width')
plt.ylabel('Petal length')
plt.show()

コード例 #5
0
def test_dbscan(db, radius=0.3, min_pts=50):
    dbscan = DBScan(db, radius, min_pts)
    clusters = dbscan.cluster()
    plot_clusters(clusters)

    print('Found %d clusters' % len(clusters))
コード例 #6
0
    data.append((x1, y1))
    data.append((x2, y2))

plt.show()

# TODO 5: K-means nad ovim podacima
kmeans = KMeans(n_clusters=2, max_iter=100)
kmeans.fit(data)

colors = {0: 'red', 1: 'green'}
plt.figure()
for idx, cluster in enumerate(kmeans.clusters):
    plt.scatter(cluster.center[0], cluster.center[1], c=colors[idx], marker='x', s=200)  # iscrtavanje centara
    for datum in cluster.data:  # iscrtavanje tacaka
        plt.scatter(datum[0], datum[1], c=colors[idx])

plt.show()

# TODO 7: DBSCAN nad ovim podacima
dbscan = DBScan(epsilon=1.2, min_points=3)
dbscan.fit(data)

colors = {0: 'red', 1: 'pink', 2: 'yellow', 3: 'cyan', 4: 'green', 5: 'blue'}
plt.figure()

for idx, cluster in enumerate(dbscan.clusters):
    for datum in cluster.data:  # iscrtavanje tacaka
        plt.scatter(datum[0], datum[1], c=colors[idx%6])

plt.show()
コード例 #7
0
class Main:

    exp_execute = 4

    ############################################
    # Experiment DBScan 1
    # Verify distances with and without scale
    ############################################

    if exp_execute == 1:

        for d, scale in zip([0, 0, 1, 1], [False, True, False, True]):

            datasets = read_datasets()

            if scale:
                datasets = scale_datasets(datasets)

            datasets = split_data(datasets)

            dist = 1.0
            min_neighb = 3

            model = DBScan(distance=dist, min_neighbors=min_neighb)
            dataset = datasets[d]

            res = model.fit(dataset['train'].to_numpy())
            res = model.get_description_dist()
            print(res)

    ############################################
    # Experiment DBScan 2
    # Execute model for Dataset 1
    ############################################

    if exp_execute == 2:

        datasets = read_datasets()

        datasets = scale_datasets(datasets)
        datasets = split_data(datasets)

        for dist in [0.05, 0.1, 0.15]:

            df_merge = datasets[0]['train'].copy()
            df_merge['Type'] = -2
            df_merge['Cluster'] = -2
            df_merge['Experiment'] = 'remove'

            for min_neighb in [3, 5, 8]:

                model = DBScan(distance=dist, min_neighbors=min_neighb)
                dataset = datasets[0]

                res = model.fit(dataset['train'].to_numpy())
                pred = model.predict(dataset['train'].to_numpy(), res,
                                     dataset['test'].to_numpy())

                # plot(dataset['train'].copy(), res, file_name=f'dbscan_vardist_{dist}_{min_neighb}.png')

                write_results(datasets,
                              f'dbscan_vardist_{dist}_{min_neighb}_datasets')
                write_results(res, f'dbscan_vardist_{dist}_{min_neighb}_res')
                write_results(pred, f'dbscan_vardist_{dist}_{min_neighb}_pred')

                df_merge = pd.concat([
                    df_merge,
                    merge_result(dataset['train'].copy(), res,
                                 f'Distance: {dist} Neighbor: {min_neighb}')
                ])

            plot_all(df_merge, file_name=f'all_dbscan_{dist}.png')

    ############################################
    # Experiment DBScan 3
    # Execute model for Dataset 1
    ############################################

    if exp_execute == 3:

        datasets = read_datasets()

        datasets = scale_datasets(datasets)
        datasets = split_data(datasets)

        dist = 0.1
        min_neighb = 3

        model = DBScan(distance=dist, min_neighbors=min_neighb)
        dataset = datasets[0]

        res = model.fit(dataset['train'].to_numpy())
        pred = model.predict(dataset['train'].to_numpy(), res,
                             dataset['test'].to_numpy())

        plot_pred(dataset['test'].copy(),
                  pred,
                  file_name=f'dbscan_pred_{dist}_{min_neighb}.png')

    ############################################
    # Experiment DBScan 4
    # Execute model for Dataset 2
    ############################################

    if exp_execute == 4:

        datasets = read_datasets()

        datasets = scale_datasets(datasets)
        datasets = split_data(datasets)

        for dist in [0.05, 0.1, 0.15]:

            df_merge = datasets[1]['train'].copy()
            df_merge['Type'] = -2
            df_merge['Cluster'] = -2
            df_merge['Experiment'] = 'remove'

            for min_neighb in [3, 5, 8]:
                model = DBScan(distance=dist, min_neighbors=min_neighb)
                dataset = datasets[1]

                res = model.fit(dataset['train'].to_numpy())
                pred = model.predict(dataset['train'].to_numpy(), res,
                                     dataset['test'].to_numpy())

                # plot(dataset['train'].copy(), res, file_name=f'dbscan_vardist_{dist}_{min_neighb}.png')

                pca = PCA(n_components=2, random_state=42)
                df_pca = pca.fit_transform(['train'].copy())

                write_results(
                    datasets, f'dbscan_vardist_{dist}_{min_neighb}_datasets_2')
                write_results(res, f'dbscan_vardist_{dist}_{min_neighb}_res_2')
                write_results(pred,
                              f'dbscan_vardist_{dist}_{min_neighb}_pred_2')

                df_merge = pd.concat([
                    df_merge,
                    merge_result(df_pca, res,
                                 f'Distance: {dist} Neighbor: {min_neighb}')
                ])

            plot_all(df_merge, file_name=f'all_dbscan_{dist}_2.png')