Python DBSCAN.items Examples

Programming Language: Python

Namespace/Package Name: sklearn.cluster

Class/Type: DBSCAN

Method/Function: items

Examples at hotexamples.com: 1

Python DBSCAN.items - 1 examples found. These are the top rated real world Python examples of sklearn.cluster.DBSCAN.items extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

fit_predict(30)

DBSCAN(30)

fit(30)

get_params(13)

max(11)

labels_(7)

fit_transform(3)

components_(2)

__class__(2)

core_sample_indices_(2)

append(2)

_estimator_type(2)

__init__(2)

as_matrix(1)

create_clusters(1)

_get_predict_signature(1)

_get_covars(1)

_fitid(1)

get_max_distance(1)

items(1)

__str__(1)

start(1)

Example #1

Show file

File: cluster_algo.py Project: zcyyc/PUAD

def dbscan(sim_matrix, radius, min_samples, dist_measure, max_radius):
    start = datetime.now()
    result = DBSCAN(eps=radius,
                    min_samples=min_samples,
                    metric='precomputed',
                    n_jobs=-1).fit(sim_matrix)
    end = datetime.now()
    getLogger(__name__).info('dbscan running time:{}'.format(end - start))
    core_sample_mask = np.zeros_like(result.labels_, dtype=bool)
    core_sample_mask[result.core_sample_indices_] = True
    labels_cal = result.labels_

    # for i in range(0,len(result.labels_)):
    #     print(TRAIN_KPI[i], core_sample_mask[i], labels_cal[i])

    # Number of clusters in labels calculated by DBSCAN, ignoring noise if present
    num_clusters = len(set(labels_cal)) - (1 if -1 in labels_cal else 0)

    # print('number of clusters: %d' % num_clusters)
    getLogger(__name__).info('number of clusters: %d' % num_clusters)

    cluster = {}
    medoids = []

    for cla in range(0, num_clusters):
        cluster[cla] = []
        # print('class %d: %d' % (cla, labels_cal.tolist().count(cla)))
        getLogger(__name__).info('class %d: %d' %
                                 (cla, labels_cal.tolist().count(cla)))
        index = [idx for idx, e in enumerate(labels_cal) if e == cla]
        for id in index:
            cluster[cla].append(TRAIN_KPI[id])
            # print(TRAIN_KPI[id])

        medoid, min_dist = get_the_medoids(sim_matrix, index)
        medoids.append(medoid)
        # print(medoid, min_dist)
        getLogger(__name__).info(medoid)
        getLogger(__name__).info(min_dist)

    # assign the 'noisy' curve in DBSCAN and find the real noise.(sim to all the medoids are larger than threshold)
    index = [idx for idx, e in enumerate(labels_cal) if e == -1]
    cluster[-1] = []
    # assign according to the sim to the medoid of each cluster.
    for uuid in index:
        data_arr = data_dict[TRAIN_KPI[uuid]]
        cla, it_dist = assignment(medoids,
                                  data_arr,
                                  dist_category=dist_measure)
        cluster[cla].append(TRAIN_KPI[uuid])
        labels_cal[uuid] = cla
        print('KPI %s belongs to class %d' % (TRAIN_KPI[uuid], cla))
        getLogger(__name__).info('KPI %s belongs to class %d' %
                                 (TRAIN_KPI[uuid], cla))
        all_kpi.append(TRAIN_KPI[uuid])
        all_cla.append(cla)
        all_dist.append(it_dist)
    dataframe = pd.DataFrame({
        'uuid': all_kpi,
        'cluster': all_cla,
        'dist': all_dist
    })
    dataframe.to_csv("/home/jialingxiang/NewDTWFrame/SplitKPI/all_dist.csv",
                     index=False,
                     sep=',')

    # assign method 2: assign according to its nearest clustered curve.
    '''
    for uuid in index:
        cla, new_labels = assign_to_nearest(sim_matrix, uuid, labels_cal)
        cluster[cla].append(TRAIN_KPI[uuid])
        print('KPI %d belongs to class %d' %(TRAIN_KPI[uuid], cla))
        labels_cal = new_labels
    '''

    # print(cluster)
    result = {}
    for key in cluster.keys():
        # print('class %d: %d' % (key, len(cluster[key])))
        getLogger(__name__).info('class %d: %d' % (key, len(cluster[key])))
        for value in cluster[key]:
            result[value] = key
    result_df = pd.DataFrame(list(result.items()), columns=['uuid', 'cluster'])
    result_df.to_hdf(EXP_ROOT + 'cluster_result_r%f.hdf' % max_radius,
                     '/cluster_result',
                     mode='w',
                     format='table')

    medoids_dict = {}
    for i in range(0, len(medoids)):
        medoids_dict[i] = medoids[i]
    medoids_df = pd.DataFrame(list(medoids_dict.items()),
                              columns=['cluster', 'medoid'])
    medoids_df.to_hdf(EXP_ROOT + 'medoids_r%f.hdf' % max_radius,
                      '/medoids',
                      mode='w',
                      format='table')

    return medoids, labels_cal