Python Birch.fit Beispiele, sklearn.cluster.Birch.fit Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: cluster_map.py Projekt: harsh8088/py_cluster

def map_clusters(n_list, n_clusters):
    # x = np.array([[28.596596, 77.344098], [28.574783, 77.333393]])
    # x = np.append(x, [[28.596596, 77.344098], [28.574783, 77.333393], [28.582515, 77.246735],
    #                   [28.582915, 77.215735], [28.635639, 77.201197], [28.464873, 76.995451]], axis=0)
    x = np.array([[28.596596, 0], [28.574783, 0], [28.996596,
                                                   0], [28.674783, 0],
                  [28.582515, 0], [28.582915, 0], [28.635639, 0],
                  [28.464873, 0]])
    # x = np.append(x, n_list, axis=0)
    # define the model
    model = Birch(threshold=0.01, n_clusters=n_clusters)
    # fit the model
    model.fit(n_list)
    # assign a cluster to each example
    yhat = model.predict(n_list)
    # retrieve unique clusters
    clusters = unique(yhat)
    dic = {}
    # create scatter plot for samples from each cluster
    for cluster in clusters:
        # 	# get row indexes for samples with this cluster
        row_ix = where(yhat == cluster)
        # 	# create scatter of these samples
        dic[cluster] = row_ix[0]
        # pyplot.scatter(x[row_ix, 0], x[row_ix, 1])
    # print(dic)
    # pyplot.show()
    return dic

Beispiel #2

0

Datei anzeigen

Datei: BirchAlgorithm.py Projekt: jose2008/ensemble_clustering_models

 def _runAlgorithm(self):
     birch = Birch(branching_factor=50,
                   n_clusters=self.params['birch'],
                   threshold=0.5)
     birch.fit(self.m_data)
     self.m_resultLabels = birch.labels_
     pass

Beispiel #3

0

Datei anzeigen

def birch_clustering(principal_components, principal_df, number_of_clusters):
    final_df = pd.concat([principal_df], axis=1)
    model = Birch(threshold=0.01, n_clusters=number_of_clusters)
    # fit the model
    model.fit(principal_components)
    # assign a cluster to each example
    yhat = model.predict(principal_components)
    # retrieve unique clusters
    clusters = unique(yhat)
    final_df['Segment'] = model.labels_
    # create scatter plot for samples from each cluster
    for cluster in clusters:
        # get row indexes for samples with this cluster
        row_ix = where(yhat == cluster)
        # create scatter of these samples
        plt.scatter(principal_components[row_ix, 0],
                    principal_components[row_ix, 1],
                    s=75)
    final_df.rename({
        0: 'PC1',
        1: 'PC2',
        2: 'PC3',
        'y': 'Race'
    },
                    axis=1,
                    inplace=True)
    plt.title("BIRCH Clustering")
    add_race_labels(final_df)
    calc_silhouette(data=principal_components,
                    prediction=yhat,
                    n_clusters=len(clusters))
    return final_df

Beispiel #4

0

Datei anzeigen

    def birch_enrich(self, input_clustering, numclusters=10, threshold=1.7):
        """Enrich the training set with BIRCH clustering algorithm.
        BIRCH (balanced iterative reducing and clustering using hierarchies) is an unsupervised data mining algorithm
        used to perform hierarchical clustering over particularly large data-sets. An advantage of BIRCH is its ability
        to incrementally and dynamically cluster incoming, multi-dimensional metric data points in an attempt to produce
        the best quality clustering for a given set of resources (memory and time constraints). In most cases, BIRCH
        only requires a single scan of the database.
        :param numclusters: Number of clusters
        :type numclusters: int
        :param threshold: The radius of the subcluster obtained by merging a new sample and the closest subcluster
        should be lesser than the threshold.
        :type threshold: float
        """
        self.X = self.X.astype(float)
        birch = Birch(threshold=threshold, n_clusters=numclusters)
        birch.fit(input_clustering)
        labels = birch.labels_
        cluster_centers = birch.subcluster_centers_
        n_features = self.vocabulary.__len__()

        sum = 0
        for x in range(self.X.__len__()):
            sum = sum + np.count_nonzero(self.X[x])
        mean_n_features_in_docs = sum / self.X.__len__()

        for x in range(self.X.__len__()):
            # check gamma, influence of length
            gamma = mean_n_features_in_docs / np.count_nonzero(self.X[x])
            x_label = labels[x]
            center_vector = cluster_centers[x_label]
            for i in range(n_features):
                self.X[x][i] = self.X[x][i] + gamma * center_vector[i]

Beispiel #5

0

Datei anzeigen

Datei: onlinedetectWithlittleData.py Projekt: DawnsonLi/onlineDetectForHadoop

def clusteringReminMost(window):
    brc = Birch(branching_factor=50,
                n_clusters=3,
                threshold=0.5,
                compute_labels=True)
    brc.fit(window)
    Class = brc.predict(window)
    #统计各个类别的信息，找出个数最多的类别，取出这些数据，从而强化历史数据
    num0 = 0
    num1 = 0
    num2 = 0

    for i in Class:
        if i == 0:
            num0 += 1
        elif i == 1:
            num1 += 1
        else:
            num2 += 1
    lable = chooseMax(num0, num1, num2)
    newwindow = window[0:1]
    for i in range(1, len(Class)):
        if Class[i] == lable:  #属于目标类别，则进行添加
            newwindow = newwindow.append(window[i - 1:i])  #都为pandas数据结果
    return newwindow

Beispiel #6

0

Datei anzeigen

def birchclustering(datalist):
    brc = Birch(branching_factor=50,
                n_clusters=None,
                threshold=0.17,
                compute_labels=True)
    brc.fit(datalist)
    return brc

Beispiel #7

0

Datei anzeigen

Datei: BirchModelClustering.py Projekt: Rakavy/COMP472Recognition

def BirchModel(data, actualLabels):
    pca = PCA(n_components=2).fit(data)
    pca_2d = pca.transform(data)
    birch_model = Birch(threshold=0.1, n_clusters=10)
    t0 = time()
    birch_model.fit(pca_2d)
    labels = birch_model.labels_
    centroids = birch_model.subcluster_centers_
    n_clusters = np.unique(labels).size
    print('% 9s' % 'init'
          '    time   h**o   compl  v-meas     ARI AMI  silhouette')
    print(
        '% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f' %
        ('Birch Model', (time() - t0),
         metrics.homogeneity_score(actualLabels, birch_model.labels_),
         metrics.completeness_score(actualLabels, birch_model.labels_),
         metrics.v_measure_score(actualLabels, birch_model.labels_),
         metrics.adjusted_rand_score(actualLabels, birch_model.labels_),
         metrics.adjusted_mutual_info_score(actualLabels, birch_model.labels_),
         metrics.silhouette_score(
             data, birch_model.labels_, metric='euclidean',
             sample_size=10000)))

    scatter = plt.scatter(pca_2d[:, 0], pca_2d[:, 1], c=labels, marker='*')
    plt.plot(centroids[:, 0],
             centroids[:, 1],
             'X',
             markeredgecolor='k',
             markersize=3)
    plt.colorbar(scatter)

    plt.title('Birch Model Clustering')
    plt.show()

Beispiel #8

0

Datei anzeigen

Datei: landmark_extraction_test_with_odom.py Projekt: MuchenSun/rt_erg_ctrl

    def scan_callback(self, msg):
        pose = self.pose.copy()
        bearings = self.bearings.copy()

        ranges = np.array(msg.ranges)
        inf_flag = (-1 * np.isinf(ranges).astype(int) + 1)
        ranges = np.nan_to_num(ranges) * inf_flag

        euc_coord_x = pose[0] + np.cos(bearings - pose[2]) * ranges
        euc_coord_y = pose[1] + np.sin(bearings - pose[2]) * ranges
        dist_flag = np.where( (euc_coord_x-pose[0])**2 + \
                        (euc_coord_y-pose[1])**2 != 0.0)[0]
        points = np.array([euc_coord_x, euc_coord_y]).T
        points = points[dist_flag]

        self.obsv = []
        if len(points) > 0:
            brc = Birch(n_clusters=None, threshold=0.05)
            brc.fit(points)
            labels = brc.predict(points)
            u_labels = np.unique(labels)
            for l in u_labels:
                seg_idx = np.where(labels == l)
                seg = points[seg_idx]
                if seg.shape[0] <= 1:
                    fit_cov = 10
                else:
                    fit_cov = np.trace(np.cov(seg.T))
                if fit_cov < 0.001 and seg.shape[0] >= 3:
                    self.obsv.append(seg.mean(axis=0))
            print(self.obsv)

Beispiel #9

0

Datei anzeigen

def birch_ad_with_smoothing(latency_df, threshold):
    # anomaly detection on response time of service invocation. 
    # input: response times of service invocations, threshold for birch clustering
    # output: anomalous service invocation
    
    anomalies = []
    for svc, latency in latency_df.iteritems():
        # No anomaly detection in db
        if svc != 'timestamp' and 'Unnamed' not in svc and 'rabbitmq' not in svc and 'db' not in svc:
            latency = latency.rolling(window=smoothing_window, min_periods=1).mean()
            x = np.array(latency)
            x = np.where(np.isnan(x), 0, x)
            normalized_x = preprocessing.normalize([x])

            X = normalized_x.reshape(-1,1)

#            threshold = 0.05

            brc = Birch(branching_factor=50, n_clusters=None, threshold=threshold, compute_labels=True)
            brc.fit(X)
            brc.predict(X)

            labels = brc.labels_
#            centroids = brc.subcluster_centers_
            n_clusters = np.unique(labels).size
            if n_clusters > 1:
                anomalies.append(svc)
    return anomalies

Beispiel #10

0

Datei anzeigen

Datei: color_extraction.py Projekt: SoichiroOta/color-extractor-as-service

class BirchColorExtractor:
    def __init__(self,
                 n_colors=None,
                 threshold=0.5,
                 branching_factor=50,
                 compute_labels=True,
                 copy=True):
        self.birch = Birch(n_clusters=n_colors,
                           threshold=threshold,
                           branching_factor=branching_factor,
                           compute_labels=compute_labels,
                           copy=copy)

    def extract(self, img):
        img_array = np.array(img, dtype=np.float64) / 255

        # Load Image and transform to a 2D numpy array.
        w, h, d = tuple(img_array.shape)
        assert d == 3
        image_array = np.reshape(img_array, (w * h, d))

        print("Fitting model on a small sub-sample of the data")
        # manually fit on batches
        self.birch.fit(image_array)

        # Get labels for all points
        print("Predicting color indices on the full image (birch)")
        labels = self.birch.labels_

        main_color_array = 255 * self.birch.subcluster_centers_
        return [
            dict(color=dict(r=color[0], g=color[1], b=color[2]),
                 count=labels[labels == i].shape[0])
            for i, color in enumerate(main_color_array)
        ]

Beispiel #11

0

Datei anzeigen

def birch_clusters(textdata,
                   trained_doc2vec,
                   n_clusters,
                   start_alpha=0.025,
                   infer_epoch=100,
                   branching_factor=10,
                   threshold=0.01,
                   compute_labels=True,
                   metric='cosine',
                   **kwargs):
    infer_list = []

    for doc in textdata:
        infer_list.append(
            trained_doc2vec.infer_vector(doc,
                                         alpha=start_alpha,
                                         steps=infer_epoch,
                                         **kwargs))
    brc = Birch(branching_factor=branching_factor,
                n_clusters=int(n_clusters),
                threshold=threshold,
                compute_labels=compute_labels)

    brc.fit(infer_list)
    clusters = brc.predict(infer_list)
    birch_labels = brc.labels_

    silhouette_score = metrics.silhouette_score(infer_list,
                                                birch_labels,
                                                metric=metric)

    return silhouette_score, clusters

Beispiel #12

0

Datei anzeigen

def cluster_latlon(n_clusters, data):
    #split the data between "around NYC" and "other locations" basically our first two clusters
    data_c = data[(data.longitude > -74.05) & (data.longitude < -73.75) &
                  (data.latitude > 40.4) & (data.latitude < 40.9)]
    data_e = data[~(data.longitude > -74.05) & (data.longitude < -73.75) &
                  (data.latitude > 40.4) & (data.latitude < 40.9)]
    #put it in matrix form
    coords = data_c.as_matrix(columns=['latitude', "longitude"])

    brc = Birch(branching_factor=100,
                n_clusters=n_clusters,
                threshold=0.01,
                compute_labels=True)

    brc.fit(coords)
    clusters = brc.predict(coords)
    data_c["cluster_" + str(n_clusters)] = clusters
    data_e["cluster_" + str(
        n_clusters)] = -1  #assign cluster label -1 for the non NYC listings
    data = pd.concat([data_c, data_e])
    plt.scatter(data_c["longitude"],
                data_c["latitude"],
                c=data_c["cluster_" + str(n_clusters)],
                s=10,
                linewidth=0.1)
    plt.title(str(n_clusters) + " Neighbourhoods from clustering")
    plt.show()
    return data

Beispiel #13

0

Datei anzeigen

def birch_skm_part1_helper(data, m, k, delta):
    """
    The function receive data and calculates k centers using the birch function in sklearn, and their quantile radius
    :param data: numpy array
    :param m: Size of the data
    :param k: Number of centers.
    :param delta: int
    :return: tuple of two numpy array. (k_medoids, k_distances).
    """
    birch_instance = Birch(n_clusters=k, threshold=0.1)  # birch instance
    birch_instance.fit(data)  # Run birch on the data
    labels = birch_instance.predict(data) # calculate the cluster number for each point
    l_medoids = []
    # since birch does not return centers, I have to calculate them
    for label in range(
            np.unique(labels).size):
        # calculate the center for each cluster
        cluster = data[labels == label]
        kmedoids_instance_for_birch = kmedoids(cluster.tolist(), init_centers(cluster, 1))
        kmedoids_instance_for_birch.process()
        l_medoids.append(cluster[kmedoids_instance_for_birch.get_medoids()][0])
    l_medoids = np.array(l_medoids)
    q = calc_q(m, delta)  # calculate q
    # calculate the distance to the quantile points around each center
    l_distances = calc_quantile_radius_around_centers(data, l_medoids, q, k)
    return l_medoids, l_distances

Beispiel #14

0

Datei anzeigen

    def birch(self, x, threshold = 0.01):
        """

        """
        model = Birch(threshold = threshold, n_clusters = self.max_clusters)
        model.fit(x)
        return model

Beispiel #15

0

Datei anzeigen

def train(feature, weights, cluster_num, feature_path = None, down = 0.006, up = 0.0085, bf_index = 2):
	if feature_path != None:
		feature = pd.read_csv(feature_path)
	X = []
	print("Training...\n")
	for i in range(len(feature[feature.columns[0]])):
		f = np.array(feature.iloc[i][1:])
		key = f[bf_index]
		if key > up:
			f_w = combine(feature.iloc[i][1:], weights)
			X.append(f_w)
	clf = Birch(n_clusters = cluster_num)
	clf = KMeans(n_clusters = cluster_num)
	clf.fit(X)
	pred = []
	for i in range(len(feature[feature.columns[0]])):
		f = np.array(feature.iloc[i][1:])
		key = f[bf_index]
		if key > up:
			p = clf.predict([combine(f, weights)])
			pred.append(p[0])
		if key < down:
			pred.append(cluster_num)
		if key > down and key < up:
			pred.append(cluster_num + 1)
	joblib.dump(clf, 'curve_model_Birch.pkl') 
	print(pred)
	return pred

Beispiel #16

0

Datei anzeigen

Datei: feature_engineering_runner.py Projekt: marc-04/MT_Stacking

def add_cluster_column(train_df, test_df, n_clusters):
    train_df['source'] = 'train'
    test_df['source'] = 'test'

    total_rows = train_df.shape[0] + test_df.shape[0]

    data = pd.concat([train_df, test_df])

    #split the data between "around NYC" and "other locations"
    data_c = data[(data.longitude > -74.05) & (data.longitude < -73.75) &
                  (data.latitude > 40.4) & (data.latitude < 40.9)]
    data_e = data[~((data.longitude > -74.05) & (data.longitude < -73.75) &
                    (data.latitude > 40.4) & (data.latitude < 40.9))]
    #put it in matrix form
    coords = data_c.as_matrix(columns=['latitude', "longitude"])

    brc = Birch(branching_factor=100,
                n_clusters=n_clusters,
                threshold=0.01,
                compute_labels=True)

    brc.fit(coords)
    clusters = brc.predict(coords)
    data_c["num_cluster_" + str(n_clusters)] = clusters
    data_e["num_cluster_" + str(
        n_clusters)] = -1  #assign cluster label -1 for the non NYC listings
    data = pd.concat([data_c, data_e])

    print('lost: {}'.format(total_rows -
                            data[data['source'] == 'train'].shape[0] -
                            data[data['source'] == 'test'].shape[0]))
    return data[data['source'] == 'train'], data[data['source'] == 'test']

Beispiel #17

0

Datei anzeigen

Datei: cluster_models.py Projekt: rupakc/Large-Scale-Preprocessing-Evaluation

def get_clustered_data(data_matrix,
                       clustering_algorithm=model_constants.KMEANS,
                       distance_metric='euclidean',
                       num_clusters=3):
    if clustering_algorithm.lower() == model_constants.AFFINITY_PROP:
        aff_prop = AffinityPropagation(affinity=distance_metric)
        aff_prop.fit(data_matrix)
        return aff_prop.labels_, aff_prop
    elif clustering_algorithm.lower() == model_constants.DBSCAN:
        dbscan = DBSCAN(metric=distance_metric)
        dbscan.fit(data_matrix)
        return dbscan.labels_, dbscan
    elif clustering_algorithm.lower() == model_constants.OPTICS:
        optics = OPTICS(metric=distance_metric)
        optics.fit(data_matrix)
        return optics.labels_, optics
    elif clustering_algorithm.lower() == model_constants.MEANSHIFT:
        mean_shift = MeanShift()
        mean_shift.fit(data_matrix)
        return mean_shift.labels_, mean_shift
    elif clustering_algorithm.lower() == model_constants.BIRCH:
        birch = Birch(n_clusters=num_clusters)
        birch.fit(data_matrix)
        return birch.labels_, birch
    elif clustering_algorithm.lower() == model_constants.AGGLOMERATIVE:
        agglomerative = AgglomerativeClustering(n_clusters=num_clusters,
                                                affinity=distance_metric)
        agglomerative.fit(data_matrix)
        return agglomerative.labels_, agglomerative
    else:
        kmeans = KMeans(n_clusters=num_clusters, random_state=42)
        kmeans.fit(data_matrix)
        return kmeans.labels_, kmeans

Beispiel #18

0

Datei anzeigen

def birchcluster(X):
    brc = Birch()
    brc.fit(X)
    # Plot result
    labels = brc.labels_
    centroids = brc.subcluster_centers_
    n_clusters = np.unique(labels).size
    print("n_clusters : %d" % n_clusters)
    return labels

Beispiel #19

0

Datei anzeigen

Datei: birch.py Projekt: PFAWeb2Control/combined_results

def birch_algo(X, threshold=1.7, clustering=None):
    birch = Birch(threshold=threshold, n_clusters=clustering)
    t = time()
    birch.fit(X)
    time_ = time() - t
    labels = birch.labels_
    centroids = birch.subcluster_centers_
    n_clusters = np.unique(labels).size
    print(" The number of clusters is : %d" % n_clusters)

Beispiel #20

0

Datei anzeigen

Datei: birch.py Projekt: PFAWeb2Control/combined_results

def birch_algo(X, threshold=1.7, clustering=None):
        birch = Birch(threshold=threshold, n_clusters=clustering)
        t = time()
        birch.fit(X)
        time_ = time() - t
        labels = birch.labels_
        centroids = birch.subcluster_centers_
        n_clusters = np.unique(labels).size
        print(" The number of clusters is : %d" % n_clusters)

Beispiel #21

0

Datei anzeigen

Datei: birch.py Projekt: PFAWeb2Control/machine-learning

def birch_algo(X, threshold=1.7, clustering=None):
        birch = Birch(threshold=threshold, n_clusters=clustering)
        birch.fit(X)
        labels = birch.labels_
        centroids = birch.subcluster_centers_
        labels_unique = np.unique(labels)
        n_clusters = labels_unique.size
        print(" The number of clusters is : %d" % n_clusters)
        return labels, centroids, n_clusters

Beispiel #22

0

Datei anzeigen

def birch(data,threshold,branching_factor):
    # bir = Birch(threshold=args['threshold'], branching_factor=int(args['branching_factor']))

    db = Birch(threshold=threshold, branching_factor=branching_factor)
    db.fit(data)
    pred = db.fit_predict(data)
    score = sil_score(data,pred)
    print(score)
    return db,pred,score

Beispiel #23

0

Datei anzeigen

 def cluster_birch(self):
     print "Starting Birch clustering"
     brc = Birch(branching_factor=10,
                 n_clusters=40,
                 threshold=self.cluster_distance,
                 compute_labels=False)
     brc.fit(self.all_frames_xy)
     clusters = brc.predict(self.all_frames_xy)
     return clusters

Beispiel #24

0

Datei anzeigen

Datei: test_birch.py Projekt: PSSF23/scikit-learn-stream

def test_feature_names_out():
    """Check `get_feature_names_out` for `Birch`."""
    X, _ = make_blobs(n_samples=80, n_features=4, random_state=0)
    brc = Birch(n_clusters=4)
    brc.fit(X)
    n_clusters = brc.subcluster_centers_.shape[0]

    names_out = brc.get_feature_names_out()
    assert_array_equal([f"birch{i}" for i in range(n_clusters)], names_out)

Beispiel #25

0

Datei anzeigen

Datei: cluster.py Projekt: alicetang0618/big_data

def birchcluster(X):
  brc = Birch()
  brc.fit(X)
  # Plot result
  labels = brc.labels_
  centroids = brc.subcluster_centers_
  n_clusters = np.unique(labels).size
  print("n_clusters : %d" % n_clusters)
  return labels

Beispiel #26

0

Datei anzeigen

def test_n_samples_leaves_roots():
    # Sanity check for the number of samples in leaves and roots
    X, y = make_blobs(n_samples=10)
    brc = Birch()
    brc.fit(X)
    n_samples_root = sum([sc.n_samples_ for sc in brc.root_.subclusters_])
    n_samples_leaves = sum([sc.n_samples_ for leaf in brc._get_leaves()
                            for sc in leaf.subclusters_])
    assert n_samples_leaves == X.shape[0]
    assert n_samples_root == X.shape[0]

Beispiel #27

0

Datei anzeigen

Datei: test_birch.py Projekt: PSSF23/scikit-learn-stream

def test_threshold():
    # Test that the leaf subclusters have a threshold lesser than radius
    X, y = make_blobs(n_samples=80, centers=4)
    brc = Birch(threshold=0.5, n_clusters=None)
    brc.fit(X)
    check_threshold(brc, 0.5)

    brc = Birch(threshold=5.0, n_clusters=None)
    brc.fit(X)
    check_threshold(brc, 5.0)

Beispiel #28

0

Datei anzeigen

def update_k_clusters(attrname, old, new):
    k_cluster = int(k_slider.value)
    brc = Birch(branching_factor=50,
                n_clusters=k_cluster,
                threshold=0.5,
                compute_labels=True)
    brc.fit(tweet_vecs)
    predictions = brc.predict(tweet_vecs)
    colors = get_colors(predictions)
    brc_data.data = dict(colors=colors, x=tsne_vecs[:, 0], y=tsne_vecs[:, 1])

Beispiel #29

0

Datei anzeigen

Datei: helper.py Projekt: mohebbihr/bcb-crater-ception

def BIRCH2_duplicate_removal(dataframe, threshold=0.8):
    # Note this method now takes a dataframe as input

    if len(dataframe) < 2:
        # nothing to do
        return dataframe

    Crater_data = dataframe
    # extract axes
    x = Crater_data[0].values.tolist()
    y = Crater_data[1].values.tolist()
    r = Crater_data[2].values.tolist()
    p = Crater_data[3].values.tolist()
    Points = []

    X = np.column_stack((x, y))
    brc = Birch(branching_factor=50,
                n_clusters=int(threshold * len(x)),
                threshold=0.5,
                compute_labels=True)
    brc.fit(X)
    groups_pred = brc.predict(X)

    for c in set(groups_pred):
        idx = [i for i, e in enumerate(groups_pred) if e == c]

        Group_x = []
        Group_y = []
        Group_r = []
        Group_p = []
        index = []

        for i in idx:
            if i in range(0, len(x)):
                Group_x.append(x[i])
                Group_y.append(y[i])
                Group_r.append(r[i])
                Group_p.append(p[i])
                index.append(i)

        # after group is defined, extract its elements from list
        Points.append([Group_x, Group_y, Group_r, Group_p])

    # now reduce groups
    center_size = []
    for i, (Xs, Ys, Rr, Ps) in enumerate(Points):
        # we take the point with best prediction confidence
        best_index = np.argmax(Ps)
        x_center = Xs[best_index]
        y_center = Ys[best_index]
        radius = Rr[best_index]
        prob = Ps[best_index]
        center_size += [[x_center, y_center, radius, prob]]

    return pd.DataFrame(center_size)

Beispiel #30

0

Datei anzeigen

Datei: main.py Projekt: vikrant-21/BIRCH-Mall-Customers-clustering

def compute_clusters(data: List) -> np.ndarray:
    print("--->Computing clusters")
    birch = Birch(branching_factor=50,
                  n_clusters=5,
                  threshold=0.3,
                  copy=True,
                  compute_labels=True)

    birch.fit(data)
    predictions = np.array(birch.predict(data))
    return predictions

Beispiel #31

0

Datei anzeigen

Datei: clustering_algorithms.py Projekt: yishayahu/clustering_curriculum

class BirchSklearn(AbstractClusteringAlgorithm):
    def __init__(self, **kwargs):
        from sklearn.cluster import Birch
        self.model = Birch(**kwargs)

    def fit(self, x: [np.ndarray]):
        self.model.fit(x)

    @property
    def labels_(self):
        return self.model.labels_

Beispiel #32

0

Datei anzeigen

Datei: test.py Projekt: Brnawyah/Project1

def skitleanBirch():
    data = pd.read_csv("soy_rock.csv", header=None)
    X = data.values.tolist()
    randomm = randint(5, 20)

    brc = Birch(branching_factor=randomm,
                n_clusters=4,
                threshold=0.1,
                compute_labels=True)
    brc.fit(X)
    pred = brc.predict(X)
    return pred

Beispiel #33

0

Datei anzeigen

Datei: hierarchical.py Projekt: jakobjoachim/text-mining-haw-bachelor

def birch(x, n_clusters=None, threshold=0.5, branching_factor=5):
  birch_model = Birch(
    threshold=threshold, 
    n_clusters=n_clusters, 
    branching_factor=branching_factor
  )
  birch_model.fit(x)

  centroids = birch_model.subcluster_centers_
  c = birch_model.labels_
  k = len(centroids)

  return birch_model, (centroids, c, k)

Beispiel #34

0

Datei anzeigen

Datei: birchFontClassify.py Projekt: UniversityOfPeradeniya/SinhalaOCR

def main():
    #remove sub folders
    removeSubFolders(path+algorithm+'\\')
    
    for file in os.listdir(path):
        if file.endswith("-d.txt"):
            text_file = open(path+file,'r')
            
            ar = (text_file.readline().split(' '))
            ar.remove('\n')
            if(len(ar)>0):
                #print map(int,ar)
                row = map(int,ar);
                data.append(row)
                fileNames.append(file)
            #print(row)

    #create np array

    npData = np.array(data)
    n_samples, n_features = npData.shape
    brc = Birch(branching_factor=50, n_clusters=n_digits, threshold=0.5,compute_labels=True)
    #kmeans = KMeans(init='random', n_clusters=n_digits, n_init=500)
    brc.fit(npData)
    list1 = brc.labels_
    list2 = fileNames
    print brc.labels_
    print fileNames

    list1, list2 = zip(*sorted(zip(list1, list2)))

    print list1
    print list2
    '''
    k=0
    lim = len(list1)-1
    for i in range(0,n_digits):
        
        while(list1[k]==i):
            # want to copy these into folders
            copychar(list1[k],list2[k])
            print list1[k],list2[k]
            k+=1
            if k==lim:
                break
    '''
    for i in range(0,len(list1)):
        print list1[i],list2[i]
        copychar(list1[i],list2[i])

Beispiel #35

0

Datei anzeigen

Datei: tfidf_lda_cluster.py Projekt: ophidianwang/CapstoneTask2

def runBrich(K_cluster, cluster_input):
    # clustering by topic-probability vector of each category
    t0 = time()
    bri = Birch(n_clusters=K_cluster)
    bri.fit(cluster_input)
    print("done in %0.3fs" % (time() - t0))

    with open('result/brich_cluster_' + str(K_cluster) + '.txt', 'w') as f:
        f.write("cluster_centers\n")
        f.write(str(bri.subcluster_centers_))
        f.write("\n==========\n")
        f.write("labels (sequence of cluster # which input belongs to )\n")
        f.write(str(bri.labels_))
        f.write("\n==========\n")
        f.write("inertia\n")
        f.write(str(bri.subcluster_labels_))
        f.write("\n==========\n")

    return bri.labels_

Beispiel #36

0

Datei anzeigen

Datei: _cluster.py Projekt: Leelow/upper-body-clustering

    def split_birch(self, branching_factor, threshold):

        # Extract dataset from files
        dataset = [f.dataset for f in self.files]

        # Initialize classifier
        classifier = Birch(branching_factor=branching_factor, n_clusters=None, threshold=threshold)

        classifier.fit(dataset)

        # Get index
        index = classifier.predict(dataset)

        count = max(index) + 1

        # Create new clusters
        clusters = [Cluster(self.directory, self.name + '-' + str(i)) for i in range(count)]
        for i in range(0, len(self.files), 1):
            clusters[index[i]].add_file(self.files[i])

        return clusters

Beispiel #37

0

Datei anzeigen

Datei: clustering_python.py Projekt: NoamGit/Data-Hack

def build_model(df, cluster_type="kmeans", seed=1):
    if cluster_type == "birch":
        model = Birch(n_clusters=N_CLUSTERS)
        res = model.fit_predict(df)
    elif cluster_type == "minibatch":
        model = MiniBatchKMeans(n_clusters=N_CLUSTERS, random_state=seed)
        res = model.fit_predict(df)
    elif cluster_type == "em":
        model = mixture.GMM(n_components=N_CLUSTERS)
        model.fit(df)
        res = model.predict(df)
    elif cluster_type == 'lda':
        model = lda.LDA(n_topics=N_CLUSTERS, n_iter=1500, random_state=seed)
        data_to_cluster = np.array(df).astype(int)
        lda_res = model.fit_transform(data_to_cluster)
        res = []
        for i in lda_res:  #for now - do hard clustering, take the higheset propability
            res.append(i.argmax())
    else:
        model = KMeans(n_clusters=N_CLUSTERS, random_state=seed)
        res = model.fit_predict(df)
        df_array = np.array(df)

        dis_dict = {}
        for i in range(N_CLUSTERS):
            dis_dict[i] = clusters_centers[i]
        all_dist = []
        for line_idx in range(len(df_array)):
            label =  model.labels_[line_idx]
            dist = calc_distance(df_array[line_idx],dis_dict[label])
            all_dist.append(dist)
        df["distance_from_cluster"] = all_dist

    #clusters = model.labels_.tolist()
    #print ("clusters are:",clusters)
    print(""">>>> model is: %s, # of clusters:%s, and %s""" %(cluster_type,N_CLUSTERS,Counter(res)))
    res = [str(i) for i in res]
    docs_clusteres = zip(df.index,res)
    return docs_clusteres

Beispiel #38

0

Datei anzeigen

Datei: tsp.py Projekt: Abraxos/clustering_tsp_solver

def test_birch_with_depot_calculation():
    points = points_from_file('tsps/berlin52.txt')
    matrix = load_matrix(points)
    X = [[p[1],p[2]] for p in points]
    est = Birch(n_clusters=3)
    est.fit(X)
    labels = est.labels_
    hl_matrix, clusters, G = load_matrices_from_labels(points,labels)
    depots, C = compute_depots(clusters, matrix, G, per_cluster=True)
    depots_actual, _ = compute_depots(clusters, matrix, G)
    cluster_optimal_cost, R, hl_route = clustered_tsp_solve(points, 3, labels=labels, depots=depots)
    cluster_optimal_cost += C

    print(depots_actual)
    print(R,C)

    for depot in depots_actual:
        for r in R:
            if r[1][0] == depot:
                for point in r[1]:
                    print(matrix.points[point])
        print('')

Beispiel #39

0

Datei anzeigen

Datei: Birch.py Projekt: PierreHao/BOFClustering

   def obtainCodebook(self, sampled_x, x):

      print 'Obatining codebook using Birch from skilean...'
   
      scaled_x_sampled = StandardScaler().fit_transform(sampled_x)
      scaled_x = StandardScaler().fit_transform(x)
      
      brc = BIRCH(branching_factor=self.branching_factor, n_clusters=self.nclusters, threshold=self.threshold, compute_labels=True)
      
      #obatin the codebook and the projections of the images on the codebook (clusters of words)
      codebook = brc.fit(scaled_x_sampled)
      clusters = brc.predict(scaled_x)
      
      print 'Clusters obtained.'
      
      return codebook, clusters

Beispiel #40

0

Datei anzeigen

Datei: Birch.py Projekt: PierreHao/BOFClustering

   def obtainClusters(self, hist):

      print 'Obatining clusters using Birch from skilean...'
   
      hist = np.array(hist)
      hist = hist.astype(float)      
      scaled_vec = StandardScaler().fit_transform(hist)
      
      brc = BIRCH(branching_factor=self.branching_factor, n_clusters=self.nclusters, threshold=self.threshold, compute_labels=True)
      
      #obatin the codebook and the projections of the images on the codebook (clusters of words)
      codebook = brc.fit(scaled_vec)
      clusters = brc.predict(scaled_vec)
      
      print 'Clusters obtained.'
      
      return clusters

Beispiel #41

0

Datei anzeigen

Datei: test_sequential_birch.py Projekt: PFAWeb2Control/machine-learning

import numpy as np
from sklearn.cluster import Birch
from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
from itertools import cycle

# Generates random vectors to cluster
n_samples = 50
centers = [[0, 1], [4, -2], [-2, 2], [0, -1]]
X, _ = make_blobs(n_samples=n_samples, centers=centers, cluster_std=0.2)

# Creates the Birch classificator and gives it the vectors
brc = Birch(branching_factor=50, n_clusters=None, threshold=0.8, compute_labels=True)
brc.fit(X)

labels = brc.labels_
cluster_centers = brc.subcluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)

# Prints the points generated
plt.figure(1)
plt.clf()

colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
    my_members = labels == k
    cluster_center = cluster_centers[k]
    plt.plot(X[my_members, 0], X[my_members, 1], col + '.')
    plt.axis([-4,12,-4,12])
plt.title('Estimated number of clusters: %d' % n_clusters_)

Beispiel #42

0

Datei anzeigen

Datei: BIRCHClustering.py Projekt: tangyudi/360birch

def birchclustering(datalist):
    brc = Birch(branching_factor=50, n_clusters=None, threshold=0.17,compute_labels=True)
    brc.fit(datalist)
    return brc

Beispiel #43

0

Datei anzeigen

Datei: storm_clustering.py Projekt: wpotrzebowski/StormClustering

	def cluster_birch(self):
		print "Starting Birch clustering"
		brc = Birch(branching_factor=10, n_clusters=40, threshold=self.cluster_distance,compute_labels=False)
		brc.fit(self.all_frames_xy)
		clusters = brc.predict(self.all_frames_xy)
		return clusters

Beispiel #44

0

Datei anzeigen

Datei: summarize_sample_GFF_junctions.py Projekt: Magdoll/cDNA_Cupcake

def cluster_junctions(juncs):
    birch_model = Birch(threshold=3, n_clusters=None)
    X = np.array(juncs)
    birch_model.fit(X)

    return birch_model.labels_

Beispiel #45

0

Datei anzeigen

Datei: analyze_dsp.py Projekt: amaggi/legacy-code

station_array = np.array(station_list)
dsp_array = np.array(dsp_list)

# extract the unique station names
stations = np.unique(station_array)
print stations

for sta in stations:
    events = event_array[station_array == sta, :]
    dsp_shortlist = dsp_array[station_array == sta]
    print sta, events.shape, dsp_shortlist.shape

    # cluster on events so as to compare dispersion curves for nearby
    # events
    brc = Birch(branching_factor=50, n_clusters=None, threshold=dist, compute_labels=True)
    brc.fit(events)
    labels = brc.predict(events)
    print np.max(labels)
    for lab in np.unique(labels):
        dsp_this_label_list = dsp_shortlist[labels == lab]
        cluster_name = os.path.join(dirname, "cluster_%s_%03d" % (sta, lab))
        plot_all_dsp(dsp_this_label_list, legend=False, fname="%s_gvel.png" % cluster_name)
        plot_all_map(dsp_this_label_list, fname="%s_map.png" % cluster_name, legend=False)
        f = open("%s_info.txt" % cluster_name, "w")
        for (dsp, dsp_dict) in dsp_this_label_list:
            f.write(
                "%s %s %d %03d %02d %02d %.3f %.3f\n"
                % (
                    dsp_dict["STA"],
                    dsp_dict["COMP"],
                    dsp_dict["YEAR"],