Python OPTICS Beispiele, sklearn.cluster.OPTICS Python Beispiele

Beispiel #1

0

Datei anzeigen

    def sort_bacteria_in_cluster(self):
        """:
        Sorts the bacteria in the biofilm into bac_clusters. Clusters are calculated with the OPTICS algorithm.
        Return value is a list of the bac_clusters containing the respective bacteria.

        """
        # sort data in the format of a 3xN matrix where N is the number of bacteria.
        data = self.position_matrix.transpose()

        model = OPTICS(min_samples=2, metric='euclidean')

        model.fit_predict(data)

        clusters = [[] for _ in range(0, len(np.unique(model.labels_)))]
        for bacteria, index in zip(self.bacteria, model.labels_):
            # sort bacteria in bac_clusters according to the assigned labels

            clusters[index].append(bacteria)

        # check if all bacteria where assigned
        sum = 0
        for cluster in clusters:
            sum += len(cluster)
        if sum != len(self.bacteria):
            raise ValueError(f"{abs(sum - len(self.bacteria))} bacteria where not sorted in a cluster.")

        return clusters

Beispiel #2

0

Datei anzeigen

Datei: test_modeling.py Projekt: Coldwater30/bachelor-thesis

    def setUp(self):
        n_points_per_cluster = 250
        np.random.seed(0)
        C1 = np.zeros((n_points_per_cluster, 3))
        C2 = np.zeros((n_points_per_cluster, 3))
        C3 = np.zeros((n_points_per_cluster, 3))
        C4 = np.zeros((n_points_per_cluster, 3))
        C5 = np.zeros((n_points_per_cluster, 3))
        C6 = np.zeros((n_points_per_cluster, 3))
        C1[:, 1:3] = ([-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2))
        C2[:, 1:3] = ([4, -1] + .1 * np.random.randn(n_points_per_cluster, 2))
        C3[:, 1:3] = ([0, -2] + .2 * np.random.randn(n_points_per_cluster, 2))
        C4[:, 1:3] = ([-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2))
        C5[:, 1:3] = ([3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2))
        C6[:, 1:3] = ([5, 6] + 2 * np.random.randn(n_points_per_cluster, 2))
        X = np.vstack(
            (C1[:, 1:3], C2[:, 1:3], C3[:, 1:3], C4[:, 1:3], C5[:,
                                                                1:3], C6[:,
                                                                         1:3]))

        clust = OPTICS(min_samples=50, xi=.05, min_cluster_size=.05)
        # Run the fit
        clust.fit(X)
        self.tbhg = modeling.TBH()
        self.tbhg.optics = clust
        self.tbhg.locH = (C1, C2, C3, C4, C5, C6)
        # self.tbhg = TBHG(clust)
        pass

Beispiel #3

0

Datei anzeigen

Datei: utils.py Projekt: anuprulez/clade_prediction

def find_cluster_indices(output_seqs, batch_size, datatype="train_y"):
    ## Cluster the output set of sequences and chooose sequences randomly from each cluster
    ###
    print("Clustering {}".format(datatype))
    features = convert_to_array(output_seqs)
    from sklearn.cluster import DBSCAN
    clustering_type = OPTICS(min_samples=2, min_cluster_size=2)
    #DBSCAN(eps=0.5, min_samples=2).fit(features) #OPTICS(min_samples=2, min_cluster_size=2)
    cluster_labels = clustering_type.fit_predict(features)
    print("Number of clusters: {}".format(str(len(list(set(cluster_labels))))))
    x = list()
    y = list()
    cluster_indices_dict = dict()
    for i, l in enumerate(cluster_labels):
        x.append(output_seqs[i])
        y.append(l)
        if l not in cluster_indices_dict:
            cluster_indices_dict[l] = list()
        cluster_indices_dict[l].append(i)
    scatter_df = pd.DataFrame(list(zip(x, y)),
                              columns=["output_seqs", "clusters"])
    scatter_df.to_csv(
        "data/generated_files/clustered_output_seqs_data_{}.csv".format(
            datatype))
    return cluster_labels, cluster_indices_dict, scatter_df

Beispiel #4

0

Datei anzeigen

Datei: demo93_clusteringevaluation_optics_visualaid.py Projekt: mahnooranjum/Programming_DataScience

def visual(c, X, y):
    from sklearn.cluster import OPTICS
    cluster_object = OPTICS(min_cluster_size=100)
    y_pred = cluster_object.fit_predict(X)
    colors = [
        'red', 'green', 'blue', 'cyan', 'black', 'yellow', 'magenta', 'brown',
        'orange', 'silver', 'goldenrod', 'olive', 'dodgerblue'
    ]
    clusters = np.unique(y_pred)
    print("Cluster Labels")
    print(clusters)
    print("Evaluation")
    evaluation_labels(y, y_pred)
    evaluation(X, y_pred)
    for cluster in clusters:
        row_idx = np.where(y == cluster)
        plt.scatter(X[row_idx, 0], X[row_idx, 1])
    plt.title('Dataset')
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.legend()
    plt.show()

    plt.figure()
    for cluster in clusters:
        row_idx = np.where(y_pred == cluster)
        plt.scatter(X[row_idx, 0], X[row_idx, 1])
    plt.title('Cluster')
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.legend()
    plt.show()

Beispiel #5

0

Datei anzeigen

def perform_optics_clustering(data, program_options: Options) -> ClusteredData:
    # The data that will be returned
    clustered_data = ClusteredData(data, list(), program_options=program_options)

    op = OPTICS(min_samples=program_options.OPTICS_MIN_SAMPLES, n_jobs=-1)
    op.fit(data)
    optic_labels = op.labels_

    for k in range(optic_labels.max() + 1):
        class_members = optic_labels == k
        nodes_in_cluster = data[class_members]
        # optics has no way of telling you the final cluster centres so have to calculate it yourself
        cluster_centre = nodes_in_cluster.mean(axis=0)
        cluster = Cluster(cluster_centre=cluster_centre, nodes=nodes_in_cluster, cluster_type=ClusterType.FULL_CLUSTER,
                          program_options=program_options)
        clustered_data.add_cluster(cluster)

    if optic_labels.min() == -1:
        class_members = optic_labels == -1
        # There are unclassified nodes
        unclassified_nodes = data[class_members]
        for unclassified_node in unclassified_nodes:
            cluster_to_add = Cluster(unclassified_node, [unclassified_node],
                                     cluster_type=ClusterType.UNCLASSIFIED_NODE_CLUSTER,
                                     program_options=program_options)
            clustered_data.add_unclassified_node(cluster_to_add)

    return clustered_data

Beispiel #6

0

Datei anzeigen

Datei: cluster_models.py Projekt: rupakc/Large-Scale-Preprocessing-Evaluation

def get_clustered_data(data_matrix,
                       clustering_algorithm=model_constants.KMEANS,
                       distance_metric='euclidean',
                       num_clusters=3):
    if clustering_algorithm.lower() == model_constants.AFFINITY_PROP:
        aff_prop = AffinityPropagation(affinity=distance_metric)
        aff_prop.fit(data_matrix)
        return aff_prop.labels_, aff_prop
    elif clustering_algorithm.lower() == model_constants.DBSCAN:
        dbscan = DBSCAN(metric=distance_metric)
        dbscan.fit(data_matrix)
        return dbscan.labels_, dbscan
    elif clustering_algorithm.lower() == model_constants.OPTICS:
        optics = OPTICS(metric=distance_metric)
        optics.fit(data_matrix)
        return optics.labels_, optics
    elif clustering_algorithm.lower() == model_constants.MEANSHIFT:
        mean_shift = MeanShift()
        mean_shift.fit(data_matrix)
        return mean_shift.labels_, mean_shift
    elif clustering_algorithm.lower() == model_constants.BIRCH:
        birch = Birch(n_clusters=num_clusters)
        birch.fit(data_matrix)
        return birch.labels_, birch
    elif clustering_algorithm.lower() == model_constants.AGGLOMERATIVE:
        agglomerative = AgglomerativeClustering(n_clusters=num_clusters,
                                                affinity=distance_metric)
        agglomerative.fit(data_matrix)
        return agglomerative.labels_, agglomerative
    else:
        kmeans = KMeans(n_clusters=num_clusters, random_state=42)
        kmeans.fit(data_matrix)
        return kmeans.labels_, kmeans

Beispiel #7

0

Datei anzeigen

Datei: actions.py Projekt: bykov-alexei/Coursework

def cameras():
    conn, cursor = connect()
    query = "SELECT title, x, y, rstp, F, current_frame FROM cameras"
    cursor.execute(query)
    cameras = cursor.fetchall()

    query = "SELECT * FROM occurrences WHERE DATE(`timestamp`)=CURDATE() AND e1 IS NOT NULL"
    cursor.execute(query)
    today_occurrences = cursor.fetchall()
    arr = [[to["e%i" % i] for i in range(1, 129)] for to in today_occurrences]
    arr = np.array(arr)
    
    model = OPTICS()
    model.fit(arr)
    indices = np.arange(len(today_occurrences))
    result_occurrences = []
    for i in range(np.max(model.labels_) + 1):
        person_indices = indices[model.labels_ == i]
        print(person_indices)
        if len(person_indices) < 4:
            continue
        index = np.random.choice(person_indices)
        result_occurrences.append('/'+'/'.join(today_occurrences[index]['human_picture'].split('/')[1:]))
    
    conn.close()
    print(len(result_occurrences))
 
    return render_template('cameras.html', cameras=cameras, today_occurrences=result_occurrences)

Beispiel #8

0

Datei anzeigen

def optics_clustering(principal_components, principal_df):
    final_df = pd.concat([principal_df], axis=1)
    model = OPTICS(eps=5, min_samples=2)
    # fit model and predict clusters
    yhat = model.fit_predict(principal_components)
    # retrieve unique clusters
    clusters = unique(yhat)
    final_df['Segment'] = model.labels_
    # create scatter plot for samples from each cluster
    for cluster in clusters:
        # get row indexes for samples with this cluster
        row_ix = where(yhat == cluster)
        # create scatter of these samples
        plt.scatter(principal_components[row_ix, 0],
                    principal_components[row_ix, 1],
                    s=75)
    final_df.rename({
        0: 'PC1',
        1: 'PC2',
        2: 'PC3',
        'y': 'Race'
    },
                    axis=1,
                    inplace=True)
    print(final_df)
    plt.title("OPTICS Clustering")
    add_race_labels(final_df)
    calc_silhouette(data=principal_components,
                    prediction=yhat,
                    n_clusters=len(clusters))
    return final_df

Beispiel #9

0

Datei anzeigen

Datei: bindingdb.py Projekt: prtos/prot_repr

def cluster_proteins_by_sim(prot_graph_fname):
    print('here')
    with open(prot_graph_fname, 'rb') as fd:
        nodes, adj_mat = pkl.load(fd)

    model = OPTICS(min_cluster_size=5, n_jobs=-1)
    clusters = model.fit_predict(adj_mat)
    print(Counter(clusters))

    transformer = eGTM()
    x, y = transformer.fit_transform(adj_mat).T
    cmap = plt.get_cmap('jet', np.max(clusters) + 2)
    cmap.set_under('gray')

    fig, ax = plt.subplots()
    ax.scatter(x, y, c=clusters, s=10, cmap=cmap)
    outfile = os.path.join(os.path.dirname(prot_graph_fname),
                           'protein_egtm_clusters.png')
    plt.savefig(outfile)
    plt.close()

    transformer = TSNE(n_components=2, n_iter_without_progress=10)
    x, y = transformer.fit_transform(adj_mat).T
    cmap = plt.get_cmap('jet', np.max(clusters) + 2)
    cmap.set_under('gray')

    fig, ax = plt.subplots()
    ax.scatter(x, y, c=clusters, s=10, cmap=cmap)
    outfile = os.path.join(os.path.dirname(prot_graph_fname),
                           'protein_tsne_clusters.png')
    plt.savefig(outfile)
    plt.close()

Beispiel #10

0

Datei anzeigen

Datei: cluster.py Projekt: fwzhuang/hair_modeling

def optics(params): 
    distance_path=''
    distance_path+=params["distance_path"]
    print(distance_path)
    distance=np.loadtxt(distance_path,dtype=np.float32)
    print(distance.shape)

    #using default values, set metric to 'precomputed'
    op = OPTICS(eps=0.03, min_samples =10, metric='precomputed')
    #check db
    print(op)

    op.fit(distance)
    #get labels
    labels = op.labels_

    print(labels,labels.shape)
    #get number of clusters
    no_clusters = len(set(labels)) - (1 if -1 in labels else 0)
    print(no_clusters,"no_clusters")

    #for i in range(no_clusters):
        #print('Cluster  : ', np.nonzero(labels == i)[0])

    #print(type(labels))
    return_val=tuple(labels.tolist())
    #print(type(return_val))
    return return_val

Beispiel #11

0

Datei anzeigen

 def clustering(self, min_cluster_size=5, min_samples=3, eps=1, cpu_threads=-1):
     clust_matr = []
     self.mols_and_aa = []
     for i in range(len(alignment.alignment)):
         num_of_aa = alignment.alignment[i][self.col]
         if num_of_aa != '-' and alignment.molecules[i].amino_acids[num_of_aa].start is not None and alignment.molecules[i].amino_acids[num_of_aa].end is not None:
             self.mols_and_aa.append((alignment.molecules[i], num_of_aa)) 
             clust_matr.append(np.hstack((alignment.molecules[i].amino_acids[num_of_aa].start, alignment.molecules[i].amino_acids[num_of_aa].end)))
     clust_matr = np.array(clust_matr)
     if self.method == 'optics':
         clusterer = OPTICS(metric='euclidean', n_jobs=cpu_threads, min_samples=min_samples)
     elif self.method == 'hdbscan':
         clusterer = hdbscan.HDBSCAN(metric='euclidean', min_cluster_size=min_cluster_size, min_samples=min_samples) 
     elif self.method == 'dbscan':
         clusterer = DBSCAN(metric='euclidean', n_jobs=cpu_threads, eps=eps, min_samples=min_samples)
     db = clusterer.fit(clust_matr)
     self.lab = db.labels_
     if list(self.lab).count(-1) == 0 and len(set(self.lab)) == 2 or list(self.lab).count(-1) == 1 and len(set(self.lab)) == 3:
         self.sil = silhouette_score(clust_matr[self.lab != -1], self.lab[self.lab != -1], metric='euclidean')
         dist_matr = np.array([[distance.euclidean(clust_matr[i], clust_matr[j]) for i in range(len(clust_matr))] for j in range(len(clust_matr))])
         mean_diams_clusters = [dist_matr[self.lab == i].T[self.lab == j].mean() for i in set(self.lab) for j in set(self.lab) if i != j and i != -1 and j != -1] 
         self.diam = max(mean_diams_clusters) 
         self.score = self.sil * self.diam
     else: 
         self.sil = None
         self.diam = None
         self.score = None

Beispiel #12

0

Datei anzeigen

def plot_bacteria_as_clusters(data: pd.DataFrame,
                              save_path: Path,
                              save_fig: bool = False,
                              time_point=None):
    if time_point is None:
        # set to last time step
        time_point = -1
    position_matrix = []
    for bac in data['position'].index:
        x, y, z = data['position'][bac][time_point][0], \
                  data['position'][bac][time_point][1], \
                  data['position'][bac][time_point][2]
        position_matrix.append([x, y, z])

    fig = plt.figure()
    ax = Axes3D(fig)
    ax.scatter(data[:, 0], data[:, 1], data[:, 2], s=30)
    ax.view_init(azim=200)
    plt.show()

    # model = DBSCAN(eps=2.5, min_samples=2)
    model = OPTICS(min_samples=2, metric='euclidean')
    model.fit_predict(data)

    fig = plt.figure()
    ax = Axes3D(fig)
    ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=model.labels_, s=30)
    ax.view_init(azim=200)
    plt.show()
    if save_fig:
        path = Path(save_path).parent / 'cluster_plot.png'
        plt.savefig(path)
        plt.close(fig)
    else:
        plt.show()

Beispiel #13

0

Datei anzeigen

Datei: test_inferring.py Projekt: Coldwater30/bachelor-thesis

    def test_collect_Loactions(self):
        # TODO: mock optics or use namedtuple
        class OPTICS:
            def __init__(self):
                pass

        mockClusters = [[1,7], [8,15],[16,20],[0,25],[32,40],
                        [30,49],[0,50],[85,98],[80,99],[0,99]
                       ]
        optics = OPTICS()
        optics.cluster_hierarchy_ = mockClusters
        
        r = util.build_tree(optics) 

        f = lambda cnodes: [cnode.cluster for cnode in cnodes]

        actual1 = f(inferring.collect_locations(1, r))
        actual2 = f(inferring.collect_locations(2, r))
        actual3 = f(inferring.collect_locations(3, r))
        actual4 = f(inferring.collect_locations(4, r))
        actual = [actual1, actual2, actual3, actual4]
        expected1 = [[0,50], [80, 99]]
        expected2 = [[0,25], [30, 49], [85,98]]
        expected3 = [[1,7], [8,15], [16,20], [32,40]]
        expected4 = []
        expected = [expected1, expected2, expected3, expected4]
        for i, r, e in zip(range(len(actual)), actual, expected):
            with self.subTest(i=i):
                self.assertCountEqual(r, e, "i")

Beispiel #14

0

Datei anzeigen

def optics_fit_predict(X, min_samples=50, cluster_method='dbscan', eps=2):
    """Perform OPTICS clustering
    Extracts an ordered list of points and reachability distances, and
    performs initial clustering using ``max_eps`` distance specified at
    OPTICS object instantiation.
    
    Parameters
    ----------
    X               : array, shape (n_samples, n_features), or (n_samples, n_samples)  
    min_samples     : The number of samples in a neighborhood for a point to be considered as a core point.
    cluster_method  : 'dbscan' by default. Other available: 'xi'
    eps             : The maximum distance between two samples for one to be considered as in the neighborhood of the other.

    Returns
    -------
    labels: Prediction/labels  
    """
    opt = OPTICS(min_samples=min_samples, cluster_method=str(cluster_method))
    opt.fit(X)
    labels = cluster_optics_dbscan(reachability=opt.reachability_,
                                   core_distances=opt.core_distances_,
                                   ordering=opt.ordering_,
                                   eps=eps)

    return labels

Beispiel #15

0

Datei anzeigen

Datei: clustering_helper.py Projekt: aswinvisva/dnn_vessel_heterogeneity

    def fit_model(self):
        '''
        Fit model and save if not pretrained

        :return: None
        '''

        if self.show_plots:
            self.elbow_method()

        if not self.pretrained:
            if self.method == "kmeans":
                self.model = KMeans(n_clusters=self.n_clusters)
                self.model.fit(self.data)
            elif self.method == "dbscan":
                self.model = DBSCAN(metric=self.metric, eps=0.15)
                self.model.fit(self.data)
            elif self.method == "optics":
                self.model = OPTICS(metric=self.metric)
                self.model.fit(self.data)
            elif self.method == "hierarichal":
                self.model = linkage(self.data, metric=self.metric)

            if self.save:
                pickle.dump(self.model, open("trained_models/%s_model.pkl" % self.method, "wb"))

        else:
            self.model = pickle.load(open("trained_models/%s_model.pkl" % self.method, "rb"))

Beispiel #16

0

Datei anzeigen

Datei: mlpairs.py Projekt: streater512/oct_applications

    def find_pairs(self):
        """
        Uses OPTICS algorithim to find clusters of similar securities within
        PCA component space. Once clusters labels are assigned, function
        generates series of tuples containing unique pairs of securities
        within the same cluster.
        """

        if self.returns_reduced is None:
            raise ValueError("returns_reduced not found: must run \
                             .reduce_PCA() before this function")

        # Initialize and fit OPTICS cluster to PCA components
        clustering = OPTICS()
        clustering.fit(self.components_.T)

        # Create cluster data frame and identify trading pairs
        clusters = pd.DataFrame({
            'security': self.securities,
            'cluster': clustering.labels_
        })
        # clusters with label == -1 are 'noise'
        clusters = clusters[clusters['cluster'] != -1]

        # Group securities by cluster and flatten list of combination lists
        groups = clusters.groupby('cluster')
        combos = list(groups['security'].apply(combinations, 2))  # All pairs
        pairs = list(chain.from_iterable(combos))  # Flatten list of lists

        print(f"Found {len(pairs)} potential pairs")

        self.pairs = pd.Series(pairs)
        self.cluster_labels_ = clustering.labels_

Beispiel #17

0

Datei anzeigen

    def __init__(self, algorithm: str, n_clusters: int = 5, verbose=False):
        """
        Initialize the classifier
        :param algorithm: The name of the clustering algorithm
        :param n_c
        lusters: Number of clusters. Ignored for density based algorithms
        :param verbose: Print more...
        """
        # Store the file path of the training data
        self.data = None
        self.verbose = verbose

        if algorithm == "KMeans":
            self.sklearn_clustering = KMeans(verbose=verbose,
                                             n_clusters=n_clusters)
        elif algorithm.startswith("AgglomerativeClustering"):
            algo, linkage_method = algorithm.split("_")
            self.sklearn_clustering = AgglomerativeClustering(
                linkage=linkage_method, n_clusters=n_clusters)
        elif algorithm == "OPTICS":
            self.sklearn_clustering = OPTICS(min_samples=5)
        else:
            raise Exception(
                "Unsupported clustering type {0}. Use one of {1}".format(
                    algorithm, self.supported_algos))

        self.algorithm = algorithm
        self.count_vectorizer = None
        self.tfidf_transformer = None

Beispiel #18

0

Datei anzeigen

def make_autoencoder(data, lr=0.001, enc_dim=100):
    # Auto encoder layers
    ae0 = Input(shape=products_shape, name='FeaturesInput')
    encode = Dense(enc_dim,
                   activation='relu',
                   kernel_initializer=he_normal(1),
                   name='AE_feature_reduction')(ae0)
    decode = Dense(products_shape[0], activation='relu', name='AE_3')(encode)

    # inspired by https://www.frontiersin.org/articles/10.3389/fgene.2018.00585/full
    # clustering layers (will work with the help of OPTICS)
    # we want to find the probability of one product to be in 1 of total found clusters
    opt = OPTICS()
    opt.fit(minmax.fit_transform(data))
    clusters = len(np.unique(opt.labels_))
    print('Optimal number of cluster:', clusters)
    prob0 = Dense(enc_dim // 2,
                  activation='relu',
                  kernel_initializer=he_normal(1))(encode)
    prob1 = BatchNormalization()(prob0)
    prob = Dense(clusters, activation='softmax',
                 name='Probability_Product')(prob1)

    autoencoder_ = Model(inputs=ae0, outputs=decode)
    encoder_ = Model(inputs=ae0, outputs=encode)
    p_prob = Model(inputs=ae0, outputs=prob)

    autoencoder_.compile(optimizer=Adam(learning_rate=lr),
                         loss='mae',
                         metrics=['mse'])

    return autoencoder_, encoder_, p_prob, opt

Beispiel #19

0

Datei anzeigen

Datei: optics_exploratory.py Projekt: rabi3elbeji/fakenews

def exploratory_analysis(dataset: str, samples=0.1, eps=np.inf) -> None:
    X = np.genfromtxt(dataset, delimiter=',', encoding='utf8')
    scaler = StandardScaler(copy=False)
    X_transformed = scaler.fit_transform(X)
    clust = OPTICS(min_samples=samples, max_eps=eps, n_jobs=2)
    labels = clust.fit_predict(X)
    n_clusters = len(set(labels))
    print("# clusters: {0}".format(n_clusters))

Beispiel #20

0

Datei anzeigen

def cluster_embedded_maps_optics(aligned_maps):
    # embeding = embed(aligned_maps)
    embedding = np.vstack([xmap.flatten() for xmap in aligned_maps])
    clusterer = OPTICS()

    clusterer.fit(embedding.astype(np.float64))

    return clusterer.labels_

Beispiel #21

0

Datei anzeigen

 def create_clusters(self, min_samples):
     optics = OPTICS(min_samples=min_samples)
     clustering = optics.fit(self.performance_features)
     len(clustering.labels_[clustering.labels_ == -1]) / len(
         clustering.labels_)
     classified = pd.Series(clustering.labels_,
                            index=self.performance.columns)
     self._clusters = classified
     self._create_cluster_based_pairs()

Beispiel #22

0

Datei anzeigen

Datei: plot_optics.py Projekt: as133/scikit-learn

import matplotlib.pyplot as plt

# Generate sample data

np.random.seed(0)
n_points_per_cluster = 250

C1 = [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)
C2 = [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)
C3 = [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)
C4 = [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)
C5 = [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)
C6 = [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)
X = np.vstack((C1, C2, C3, C4, C5, C6))

clust = OPTICS(min_samples=9, rejection_ratio=0.5)

# Run the fit
clust.fit(X)

_, labels_025 = clust.extract_dbscan(0.25)
_, labels_075 = clust.extract_dbscan(0.75)

space = np.arange(len(X))
reachability = clust.reachability_[clust.ordering_]
labels = clust.labels_[clust.ordering_]

plt.figure(figsize=(10, 7))
G = gridspec.GridSpec(2, 3)
ax1 = plt.subplot(G[0, :])
ax2 = plt.subplot(G[1, 0])

Beispiel #23

0

Datei anzeigen

Datei: plot_optics.py Projekt: daniel-perry/scikit-learn

import numpy as np

# Generate sample data

np.random.seed(0)
n_points_per_cluster = 250

C1 = [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)
C2 = [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)
C3 = [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)
C4 = [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)
C5 = [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)
C6 = [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)
X = np.vstack((C1, C2, C3, C4, C5, C6))

clust = OPTICS(min_samples=50, xi=.05, min_cluster_size=.05)

# Run the fit
clust.fit(X)

labels_050 = cluster_optics_dbscan(reachability=clust.reachability_,
                                   core_distances=clust.core_distances_,
                                   ordering=clust.ordering_, eps=0.5)
labels_200 = cluster_optics_dbscan(reachability=clust.reachability_,
                                   core_distances=clust.core_distances_,
                                   ordering=clust.ordering_, eps=2)

space = np.arange(len(X))
reachability = clust.reachability_[clust.ordering_]
labels = clust.labels_[clust.ordering_]