Esempi in Python per Birch.fit, esempi in Python per sklearn.cluster.Birch.fit

Esempio n. 1

0

Mostra file

File: cluster_map.py Progetto: harsh8088/py_cluster

def map_clusters(n_list, n_clusters):
    # x = np.array([[28.596596, 77.344098], [28.574783, 77.333393]])
    # x = np.append(x, [[28.596596, 77.344098], [28.574783, 77.333393], [28.582515, 77.246735],
    #                   [28.582915, 77.215735], [28.635639, 77.201197], [28.464873, 76.995451]], axis=0)
    x = np.array([[28.596596, 0], [28.574783, 0], [28.996596,
                                                   0], [28.674783, 0],
                  [28.582515, 0], [28.582915, 0], [28.635639, 0],
                  [28.464873, 0]])
    # x = np.append(x, n_list, axis=0)
    # define the model
    model = Birch(threshold=0.01, n_clusters=n_clusters)
    # fit the model
    model.fit(n_list)
    # assign a cluster to each example
    yhat = model.predict(n_list)
    # retrieve unique clusters
    clusters = unique(yhat)
    dic = {}
    # create scatter plot for samples from each cluster
    for cluster in clusters:
        # 	# get row indexes for samples with this cluster
        row_ix = where(yhat == cluster)
        # 	# create scatter of these samples
        dic[cluster] = row_ix[0]
        # pyplot.scatter(x[row_ix, 0], x[row_ix, 1])
    # print(dic)
    # pyplot.show()
    return dic

Esempio n. 2

0

Mostra file

File: BirchAlgorithm.py Progetto: jose2008/ensemble_clustering_models

 def _runAlgorithm(self):
     birch = Birch(branching_factor=50,
                   n_clusters=self.params['birch'],
                   threshold=0.5)
     birch.fit(self.m_data)
     self.m_resultLabels = birch.labels_
     pass

Esempio n. 3

0

Mostra file

def birch_clustering(principal_components, principal_df, number_of_clusters):
    final_df = pd.concat([principal_df], axis=1)
    model = Birch(threshold=0.01, n_clusters=number_of_clusters)
    # fit the model
    model.fit(principal_components)
    # assign a cluster to each example
    yhat = model.predict(principal_components)
    # retrieve unique clusters
    clusters = unique(yhat)
    final_df['Segment'] = model.labels_
    # create scatter plot for samples from each cluster
    for cluster in clusters:
        # get row indexes for samples with this cluster
        row_ix = where(yhat == cluster)
        # create scatter of these samples
        plt.scatter(principal_components[row_ix, 0],
                    principal_components[row_ix, 1],
                    s=75)
    final_df.rename({
        0: 'PC1',
        1: 'PC2',
        2: 'PC3',
        'y': 'Race'
    },
                    axis=1,
                    inplace=True)
    plt.title("BIRCH Clustering")
    add_race_labels(final_df)
    calc_silhouette(data=principal_components,
                    prediction=yhat,
                    n_clusters=len(clusters))
    return final_df

Esempio n. 4

0

Mostra file

    def birch_enrich(self, input_clustering, numclusters=10, threshold=1.7):
        """Enrich the training set with BIRCH clustering algorithm.
        BIRCH (balanced iterative reducing and clustering using hierarchies) is an unsupervised data mining algorithm
        used to perform hierarchical clustering over particularly large data-sets. An advantage of BIRCH is its ability
        to incrementally and dynamically cluster incoming, multi-dimensional metric data points in an attempt to produce
        the best quality clustering for a given set of resources (memory and time constraints). In most cases, BIRCH
        only requires a single scan of the database.
        :param numclusters: Number of clusters
        :type numclusters: int
        :param threshold: The radius of the subcluster obtained by merging a new sample and the closest subcluster
        should be lesser than the threshold.
        :type threshold: float
        """
        self.X = self.X.astype(float)
        birch = Birch(threshold=threshold, n_clusters=numclusters)
        birch.fit(input_clustering)
        labels = birch.labels_
        cluster_centers = birch.subcluster_centers_
        n_features = self.vocabulary.__len__()

        sum = 0
        for x in range(self.X.__len__()):
            sum = sum + np.count_nonzero(self.X[x])
        mean_n_features_in_docs = sum / self.X.__len__()

        for x in range(self.X.__len__()):
            # check gamma, influence of length
            gamma = mean_n_features_in_docs / np.count_nonzero(self.X[x])
            x_label = labels[x]
            center_vector = cluster_centers[x_label]
            for i in range(n_features):
                self.X[x][i] = self.X[x][i] + gamma * center_vector[i]

Esempio n. 5

0

Mostra file

File: onlinedetectWithlittleData.py Progetto: DawnsonLi/onlineDetectForHadoop

def clusteringReminMost(window):
    brc = Birch(branching_factor=50,
                n_clusters=3,
                threshold=0.5,
                compute_labels=True)
    brc.fit(window)
    Class = brc.predict(window)
    #统计各个类别的信息，找出个数最多的类别，取出这些数据，从而强化历史数据
    num0 = 0
    num1 = 0
    num2 = 0

    for i in Class:
        if i == 0:
            num0 += 1
        elif i == 1:
            num1 += 1
        else:
            num2 += 1
    lable = chooseMax(num0, num1, num2)
    newwindow = window[0:1]
    for i in range(1, len(Class)):
        if Class[i] == lable:  #属于目标类别，则进行添加
            newwindow = newwindow.append(window[i - 1:i])  #都为pandas数据结果
    return newwindow

Esempio n. 6

0

Mostra file

def birchclustering(datalist):
    brc = Birch(branching_factor=50,
                n_clusters=None,
                threshold=0.17,
                compute_labels=True)
    brc.fit(datalist)
    return brc

Esempio n. 7

0

Mostra file

File: BirchModelClustering.py Progetto: Rakavy/COMP472Recognition

def BirchModel(data, actualLabels):
    pca = PCA(n_components=2).fit(data)
    pca_2d = pca.transform(data)
    birch_model = Birch(threshold=0.1, n_clusters=10)
    t0 = time()
    birch_model.fit(pca_2d)
    labels = birch_model.labels_
    centroids = birch_model.subcluster_centers_
    n_clusters = np.unique(labels).size
    print('% 9s' % 'init'
          '    time   h**o   compl  v-meas     ARI AMI  silhouette')
    print(
        '% 9s   %.2fs    %i   %.3f   %.3f   %.3f   %.3f' %
        ('Birch Model', (time() - t0),
         metrics.homogeneity_score(actualLabels, birch_model.labels_),
         metrics.completeness_score(actualLabels, birch_model.labels_),
         metrics.v_measure_score(actualLabels, birch_model.labels_),
         metrics.adjusted_rand_score(actualLabels, birch_model.labels_),
         metrics.adjusted_mutual_info_score(actualLabels, birch_model.labels_),
         metrics.silhouette_score(
             data, birch_model.labels_, metric='euclidean',
             sample_size=10000)))

    scatter = plt.scatter(pca_2d[:, 0], pca_2d[:, 1], c=labels, marker='*')
    plt.plot(centroids[:, 0],
             centroids[:, 1],
             'X',
             markeredgecolor='k',
             markersize=3)
    plt.colorbar(scatter)

    plt.title('Birch Model Clustering')
    plt.show()

Esempio n. 8

0

Mostra file

File: landmark_extraction_test_with_odom.py Progetto: MuchenSun/rt_erg_ctrl

    def scan_callback(self, msg):
        pose = self.pose.copy()
        bearings = self.bearings.copy()

        ranges = np.array(msg.ranges)
        inf_flag = (-1 * np.isinf(ranges).astype(int) + 1)
        ranges = np.nan_to_num(ranges) * inf_flag

        euc_coord_x = pose[0] + np.cos(bearings - pose[2]) * ranges
        euc_coord_y = pose[1] + np.sin(bearings - pose[2]) * ranges
        dist_flag = np.where( (euc_coord_x-pose[0])**2 + \
                        (euc_coord_y-pose[1])**2 != 0.0)[0]
        points = np.array([euc_coord_x, euc_coord_y]).T
        points = points[dist_flag]

        self.obsv = []
        if len(points) > 0:
            brc = Birch(n_clusters=None, threshold=0.05)
            brc.fit(points)
            labels = brc.predict(points)
            u_labels = np.unique(labels)
            for l in u_labels:
                seg_idx = np.where(labels == l)
                seg = points[seg_idx]
                if seg.shape[0] <= 1:
                    fit_cov = 10
                else:
                    fit_cov = np.trace(np.cov(seg.T))
                if fit_cov < 0.001 and seg.shape[0] >= 3:
                    self.obsv.append(seg.mean(axis=0))
            print(self.obsv)

Esempio n. 9

0

Mostra file

def birch_ad_with_smoothing(latency_df, threshold):
    # anomaly detection on response time of service invocation. 
    # input: response times of service invocations, threshold for birch clustering
    # output: anomalous service invocation
    
    anomalies = []
    for svc, latency in latency_df.iteritems():
        # No anomaly detection in db
        if svc != 'timestamp' and 'Unnamed' not in svc and 'rabbitmq' not in svc and 'db' not in svc:
            latency = latency.rolling(window=smoothing_window, min_periods=1).mean()
            x = np.array(latency)
            x = np.where(np.isnan(x), 0, x)
            normalized_x = preprocessing.normalize([x])

            X = normalized_x.reshape(-1,1)

#            threshold = 0.05

            brc = Birch(branching_factor=50, n_clusters=None, threshold=threshold, compute_labels=True)
            brc.fit(X)
            brc.predict(X)

            labels = brc.labels_
#            centroids = brc.subcluster_centers_
            n_clusters = np.unique(labels).size
            if n_clusters > 1:
                anomalies.append(svc)
    return anomalies

Esempio n. 10

0

Mostra file

File: color_extraction.py Progetto: SoichiroOta/color-extractor-as-service

class BirchColorExtractor:
    def __init__(self,
                 n_colors=None,
                 threshold=0.5,
                 branching_factor=50,
                 compute_labels=True,
                 copy=True):
        self.birch = Birch(n_clusters=n_colors,
                           threshold=threshold,
                           branching_factor=branching_factor,
                           compute_labels=compute_labels,
                           copy=copy)

    def extract(self, img):
        img_array = np.array(img, dtype=np.float64) / 255

        # Load Image and transform to a 2D numpy array.
        w, h, d = tuple(img_array.shape)
        assert d == 3
        image_array = np.reshape(img_array, (w * h, d))

        print("Fitting model on a small sub-sample of the data")
        # manually fit on batches
        self.birch.fit(image_array)

        # Get labels for all points
        print("Predicting color indices on the full image (birch)")
        labels = self.birch.labels_

        main_color_array = 255 * self.birch.subcluster_centers_
        return [
            dict(color=dict(r=color[0], g=color[1], b=color[2]),
                 count=labels[labels == i].shape[0])
            for i, color in enumerate(main_color_array)
        ]

Esempio n. 11

0

Mostra file

def birch_clusters(textdata,
                   trained_doc2vec,
                   n_clusters,
                   start_alpha=0.025,
                   infer_epoch=100,
                   branching_factor=10,
                   threshold=0.01,
                   compute_labels=True,
                   metric='cosine',
                   **kwargs):
    infer_list = []

    for doc in textdata:
        infer_list.append(
            trained_doc2vec.infer_vector(doc,
                                         alpha=start_alpha,
                                         steps=infer_epoch,
                                         **kwargs))
    brc = Birch(branching_factor=branching_factor,
                n_clusters=int(n_clusters),
                threshold=threshold,
                compute_labels=compute_labels)

    brc.fit(infer_list)
    clusters = brc.predict(infer_list)
    birch_labels = brc.labels_

    silhouette_score = metrics.silhouette_score(infer_list,
                                                birch_labels,
                                                metric=metric)

    return silhouette_score, clusters

Esempio n. 12

0

Mostra file

def cluster_latlon(n_clusters, data):
    #split the data between "around NYC" and "other locations" basically our first two clusters
    data_c = data[(data.longitude > -74.05) & (data.longitude < -73.75) &
                  (data.latitude > 40.4) & (data.latitude < 40.9)]
    data_e = data[~(data.longitude > -74.05) & (data.longitude < -73.75) &
                  (data.latitude > 40.4) & (data.latitude < 40.9)]
    #put it in matrix form
    coords = data_c.as_matrix(columns=['latitude', "longitude"])

    brc = Birch(branching_factor=100,
                n_clusters=n_clusters,
                threshold=0.01,
                compute_labels=True)

    brc.fit(coords)
    clusters = brc.predict(coords)
    data_c["cluster_" + str(n_clusters)] = clusters
    data_e["cluster_" + str(
        n_clusters)] = -1  #assign cluster label -1 for the non NYC listings
    data = pd.concat([data_c, data_e])
    plt.scatter(data_c["longitude"],
                data_c["latitude"],
                c=data_c["cluster_" + str(n_clusters)],
                s=10,
                linewidth=0.1)
    plt.title(str(n_clusters) + " Neighbourhoods from clustering")
    plt.show()
    return data

Esempio n. 13

0

Mostra file

def birch_skm_part1_helper(data, m, k, delta):
    """
    The function receive data and calculates k centers using the birch function in sklearn, and their quantile radius
    :param data: numpy array
    :param m: Size of the data
    :param k: Number of centers.
    :param delta: int
    :return: tuple of two numpy array. (k_medoids, k_distances).
    """
    birch_instance = Birch(n_clusters=k, threshold=0.1)  # birch instance
    birch_instance.fit(data)  # Run birch on the data
    labels = birch_instance.predict(data) # calculate the cluster number for each point
    l_medoids = []
    # since birch does not return centers, I have to calculate them
    for label in range(
            np.unique(labels).size):
        # calculate the center for each cluster
        cluster = data[labels == label]
        kmedoids_instance_for_birch = kmedoids(cluster.tolist(), init_centers(cluster, 1))
        kmedoids_instance_for_birch.process()
        l_medoids.append(cluster[kmedoids_instance_for_birch.get_medoids()][0])
    l_medoids = np.array(l_medoids)
    q = calc_q(m, delta)  # calculate q
    # calculate the distance to the quantile points around each center
    l_distances = calc_quantile_radius_around_centers(data, l_medoids, q, k)
    return l_medoids, l_distances

Esempio n. 14

0

Mostra file

    def birch(self, x, threshold = 0.01):
        """

        """
        model = Birch(threshold = threshold, n_clusters = self.max_clusters)
        model.fit(x)
        return model

Esempio n. 15

0

Mostra file

def train(feature, weights, cluster_num, feature_path = None, down = 0.006, up = 0.0085, bf_index = 2):
	if feature_path != None:
		feature = pd.read_csv(feature_path)
	X = []
	print("Training...\n")
	for i in range(len(feature[feature.columns[0]])):
		f = np.array(feature.iloc[i][1:])
		key = f[bf_index]
		if key > up:
			f_w = combine(feature.iloc[i][1:], weights)
			X.append(f_w)
	clf = Birch(n_clusters = cluster_num)
	clf = KMeans(n_clusters = cluster_num)
	clf.fit(X)
	pred = []
	for i in range(len(feature[feature.columns[0]])):
		f = np.array(feature.iloc[i][1:])
		key = f[bf_index]
		if key > up:
			p = clf.predict([combine(f, weights)])
			pred.append(p[0])
		if key < down:
			pred.append(cluster_num)
		if key > down and key < up:
			pred.append(cluster_num + 1)
	joblib.dump(clf, 'curve_model_Birch.pkl') 
	print(pred)
	return pred

Esempio n. 16

0

Mostra file

File: feature_engineering_runner.py Progetto: marc-04/MT_Stacking

def add_cluster_column(train_df, test_df, n_clusters):
    train_df['source'] = 'train'
    test_df['source'] = 'test'

    total_rows = train_df.shape[0] + test_df.shape[0]

    data = pd.concat([train_df, test_df])

    #split the data between "around NYC" and "other locations"
    data_c = data[(data.longitude > -74.05) & (data.longitude < -73.75) &
                  (data.latitude > 40.4) & (data.latitude < 40.9)]
    data_e = data[~((data.longitude > -74.05) & (data.longitude < -73.75) &
                    (data.latitude > 40.4) & (data.latitude < 40.9))]
    #put it in matrix form
    coords = data_c.as_matrix(columns=['latitude', "longitude"])

    brc = Birch(branching_factor=100,
                n_clusters=n_clusters,
                threshold=0.01,
                compute_labels=True)

    brc.fit(coords)
    clusters = brc.predict(coords)
    data_c["num_cluster_" + str(n_clusters)] = clusters
    data_e["num_cluster_" + str(
        n_clusters)] = -1  #assign cluster label -1 for the non NYC listings
    data = pd.concat([data_c, data_e])

    print('lost: {}'.format(total_rows -
                            data[data['source'] == 'train'].shape[0] -
                            data[data['source'] == 'test'].shape[0]))
    return data[data['source'] == 'train'], data[data['source'] == 'test']

Esempio n. 17

0

Mostra file

File: cluster_models.py Progetto: rupakc/Large-Scale-Preprocessing-Evaluation

def get_clustered_data(data_matrix,
                       clustering_algorithm=model_constants.KMEANS,
                       distance_metric='euclidean',
                       num_clusters=3):
    if clustering_algorithm.lower() == model_constants.AFFINITY_PROP:
        aff_prop = AffinityPropagation(affinity=distance_metric)
        aff_prop.fit(data_matrix)
        return aff_prop.labels_, aff_prop
    elif clustering_algorithm.lower() == model_constants.DBSCAN:
        dbscan = DBSCAN(metric=distance_metric)
        dbscan.fit(data_matrix)
        return dbscan.labels_, dbscan
    elif clustering_algorithm.lower() == model_constants.OPTICS:
        optics = OPTICS(metric=distance_metric)
        optics.fit(data_matrix)
        return optics.labels_, optics
    elif clustering_algorithm.lower() == model_constants.MEANSHIFT:
        mean_shift = MeanShift()
        mean_shift.fit(data_matrix)
        return mean_shift.labels_, mean_shift
    elif clustering_algorithm.lower() == model_constants.BIRCH:
        birch = Birch(n_clusters=num_clusters)
        birch.fit(data_matrix)
        return birch.labels_, birch
    elif clustering_algorithm.lower() == model_constants.AGGLOMERATIVE:
        agglomerative = AgglomerativeClustering(n_clusters=num_clusters,
                                                affinity=distance_metric)
        agglomerative.fit(data_matrix)
        return agglomerative.labels_, agglomerative
    else:
        kmeans = KMeans(n_clusters=num_clusters, random_state=42)
        kmeans.fit(data_matrix)
        return kmeans.labels_, kmeans

Esempio n. 18

0

Mostra file

def birchcluster(X):
    brc = Birch()
    brc.fit(X)
    # Plot result
    labels = brc.labels_
    centroids = brc.subcluster_centers_
    n_clusters = np.unique(labels).size
    print("n_clusters : %d" % n_clusters)
    return labels

Esempio n. 19

0

Mostra file

File: birch.py Progetto: PFAWeb2Control/combined_results

def birch_algo(X, threshold=1.7, clustering=None):
    birch = Birch(threshold=threshold, n_clusters=clustering)
    t = time()
    birch.fit(X)
    time_ = time() - t
    labels = birch.labels_
    centroids = birch.subcluster_centers_
    n_clusters = np.unique(labels).size
    print(" The number of clusters is : %d" % n_clusters)

Esempio n. 20

0

Mostra file

File: birch.py Progetto: PFAWeb2Control/combined_results

def birch_algo(X, threshold=1.7, clustering=None):
        birch = Birch(threshold=threshold, n_clusters=clustering)
        t = time()
        birch.fit(X)
        time_ = time() - t
        labels = birch.labels_
        centroids = birch.subcluster_centers_
        n_clusters = np.unique(labels).size
        print(" The number of clusters is : %d" % n_clusters)

Esempio n. 21

0

Mostra file

File: birch.py Progetto: PFAWeb2Control/machine-learning

def birch_algo(X, threshold=1.7, clustering=None):
        birch = Birch(threshold=threshold, n_clusters=clustering)
        birch.fit(X)
        labels = birch.labels_
        centroids = birch.subcluster_centers_
        labels_unique = np.unique(labels)
        n_clusters = labels_unique.size
        print(" The number of clusters is : %d" % n_clusters)
        return labels, centroids, n_clusters

Esempio n. 22

0

Mostra file

def birch(data,threshold,branching_factor):
    # bir = Birch(threshold=args['threshold'], branching_factor=int(args['branching_factor']))

    db = Birch(threshold=threshold, branching_factor=branching_factor)
    db.fit(data)
    pred = db.fit_predict(data)
    score = sil_score(data,pred)
    print(score)
    return db,pred,score

Esempio n. 23

0

Mostra file

 def cluster_birch(self):
     print "Starting Birch clustering"
     brc = Birch(branching_factor=10,
                 n_clusters=40,
                 threshold=self.cluster_distance,
                 compute_labels=False)
     brc.fit(self.all_frames_xy)
     clusters = brc.predict(self.all_frames_xy)
     return clusters

Esempio n. 24

0

Mostra file

File: test_birch.py Progetto: PSSF23/scikit-learn-stream

def test_feature_names_out():
    """Check `get_feature_names_out` for `Birch`."""
    X, _ = make_blobs(n_samples=80, n_features=4, random_state=0)
    brc = Birch(n_clusters=4)
    brc.fit(X)
    n_clusters = brc.subcluster_centers_.shape[0]

    names_out = brc.get_feature_names_out()
    assert_array_equal([f"birch{i}" for i in range(n_clusters)], names_out)

Esempio n. 25

0

Mostra file

File: cluster.py Progetto: alicetang0618/big_data

def birchcluster(X):
  brc = Birch()
  brc.fit(X)
  # Plot result
  labels = brc.labels_
  centroids = brc.subcluster_centers_
  n_clusters = np.unique(labels).size
  print("n_clusters : %d" % n_clusters)
  return labels

Esempio n. 26

0

Mostra file

def test_n_samples_leaves_roots():
    # Sanity check for the number of samples in leaves and roots
    X, y = make_blobs(n_samples=10)
    brc = Birch()
    brc.fit(X)
    n_samples_root = sum([sc.n_samples_ for sc in brc.root_.subclusters_])
    n_samples_leaves = sum([sc.n_samples_ for leaf in brc._get_leaves()
                            for sc in leaf.subclusters_])
    assert n_samples_leaves == X.shape[0]
    assert n_samples_root == X.shape[0]

Esempio n. 27

0

Mostra file

File: test_birch.py Progetto: PSSF23/scikit-learn-stream

def test_threshold():
    # Test that the leaf subclusters have a threshold lesser than radius
    X, y = make_blobs(n_samples=80, centers=4)
    brc = Birch(threshold=0.5, n_clusters=None)
    brc.fit(X)
    check_threshold(brc, 0.5)

    brc = Birch(threshold=5.0, n_clusters=None)
    brc.fit(X)
    check_threshold(brc, 5.0)

Esempio n. 28

0

Mostra file

def update_k_clusters(attrname, old, new):
    k_cluster = int(k_slider.value)
    brc = Birch(branching_factor=50,
                n_clusters=k_cluster,
                threshold=0.5,
                compute_labels=True)
    brc.fit(tweet_vecs)
    predictions = brc.predict(tweet_vecs)
    colors = get_colors(predictions)
    brc_data.data = dict(colors=colors, x=tsne_vecs[:, 0], y=tsne_vecs[:, 1])

Esempio n. 29

0

Mostra file

File: helper.py Progetto: mohebbihr/bcb-crater-ception

def BIRCH2_duplicate_removal(dataframe, threshold=0.8):
    # Note this method now takes a dataframe as input

    if len(dataframe) < 2:
        # nothing to do
        return dataframe

    Crater_data = dataframe
    # extract axes
    x = Crater_data[0].values.tolist()
    y = Crater_data[1].values.tolist()
    r = Crater_data[2].values.tolist()
    p = Crater_data[3].values.tolist()
    Points = []

    X = np.column_stack((x, y))
    brc = Birch(branching_factor=50,
                n_clusters=int(threshold * len(x)),
                threshold=0.5,
                compute_labels=True)
    brc.fit(X)
    groups_pred = brc.predict(X)

    for c in set(groups_pred):
        idx = [i for i, e in enumerate(groups_pred) if e == c]

        Group_x = []
        Group_y = []
        Group_r = []
        Group_p = []
        index = []

        for i in idx:
            if i in range(0, len(x)):
                Group_x.append(x[i])
                Group_y.append(y[i])
                Group_r.append(r[i])
                Group_p.append(p[i])
                index.append(i)

        # after group is defined, extract its elements from list
        Points.append([Group_x, Group_y, Group_r, Group_p])

    # now reduce groups
    center_size = []
    for i, (Xs, Ys, Rr, Ps) in enumerate(Points):
        # we take the point with best prediction confidence
        best_index = np.argmax(Ps)
        x_center = Xs[best_index]
        y_center = Ys[best_index]
        radius = Rr[best_index]
        prob = Ps[best_index]
        center_size += [[x_center, y_center, radius, prob]]

    return pd.DataFrame(center_size)

Esempio n. 30

0

Mostra file

File: main.py Progetto: vikrant-21/BIRCH-Mall-Customers-clustering

def compute_clusters(data: List) -> np.ndarray:
    print("--->Computing clusters")
    birch = Birch(branching_factor=50,
                  n_clusters=5,
                  threshold=0.3,
                  copy=True,
                  compute_labels=True)

    birch.fit(data)
    predictions = np.array(birch.predict(data))
    return predictions

Esempio n. 31

0

Mostra file

File: clustering_algorithms.py Progetto: yishayahu/clustering_curriculum

class BirchSklearn(AbstractClusteringAlgorithm):
    def __init__(self, **kwargs):
        from sklearn.cluster import Birch
        self.model = Birch(**kwargs)

    def fit(self, x: [np.ndarray]):
        self.model.fit(x)

    @property
    def labels_(self):
        return self.model.labels_

Esempio n. 32

0

Mostra file

File: test.py Progetto: Brnawyah/Project1

def skitleanBirch():
    data = pd.read_csv("soy_rock.csv", header=None)
    X = data.values.tolist()
    randomm = randint(5, 20)

    brc = Birch(branching_factor=randomm,
                n_clusters=4,
                threshold=0.1,
                compute_labels=True)
    brc.fit(X)
    pred = brc.predict(X)
    return pred

Esempio n. 33

0

Mostra file

File: hierarchical.py Progetto: jakobjoachim/text-mining-haw-bachelor

def birch(x, n_clusters=None, threshold=0.5, branching_factor=5):
  birch_model = Birch(
    threshold=threshold, 
    n_clusters=n_clusters, 
    branching_factor=branching_factor
  )
  birch_model.fit(x)

  centroids = birch_model.subcluster_centers_
  c = birch_model.labels_
  k = len(centroids)

  return birch_model, (centroids, c, k)

Esempio n. 34

0

Mostra file

File: birchFontClassify.py Progetto: UniversityOfPeradeniya/SinhalaOCR

def main():
    #remove sub folders
    removeSubFolders(path+algorithm+'\\')
    
    for file in os.listdir(path):
        if file.endswith("-d.txt"):
            text_file = open(path+file,'r')
            
            ar = (text_file.readline().split(' '))
            ar.remove('\n')
            if(len(ar)>0):
                #print map(int,ar)
                row = map(int,ar);
                data.append(row)
                fileNames.append(file)
            #print(row)

    #create np array

    npData = np.array(data)
    n_samples, n_features = npData.shape
    brc = Birch(branching_factor=50, n_clusters=n_digits, threshold=0.5,compute_labels=True)
    #kmeans = KMeans(init='random', n_clusters=n_digits, n_init=500)
    brc.fit(npData)
    list1 = brc.labels_
    list2 = fileNames
    print brc.labels_
    print fileNames

    list1, list2 = zip(*sorted(zip(list1, list2)))

    print list1
    print list2
    '''
    k=0
    lim = len(list1)-1
    for i in range(0,n_digits):
        
        while(list1[k]==i):
            # want to copy these into folders
            copychar(list1[k],list2[k])
            print list1[k],list2[k]
            k+=1
            if k==lim:
                break
    '''
    for i in range(0,len(list1)):
        print list1[i],list2[i]
        copychar(list1[i],list2[i])

Esempio n. 35

0

Mostra file

File: tfidf_lda_cluster.py Progetto: ophidianwang/CapstoneTask2

def runBrich(K_cluster, cluster_input):
    # clustering by topic-probability vector of each category
    t0 = time()
    bri = Birch(n_clusters=K_cluster)
    bri.fit(cluster_input)
    print("done in %0.3fs" % (time() - t0))

    with open('result/brich_cluster_' + str(K_cluster) + '.txt', 'w') as f:
        f.write("cluster_centers\n")
        f.write(str(bri.subcluster_centers_))
        f.write("\n==========\n")
        f.write("labels (sequence of cluster # which input belongs to )\n")
        f.write(str(bri.labels_))
        f.write("\n==========\n")
        f.write("inertia\n")
        f.write(str(bri.subcluster_labels_))
        f.write("\n==========\n")

    return bri.labels_

Esempio n. 36

0

Mostra file

File: _cluster.py Progetto: Leelow/upper-body-clustering

    def split_birch(self, branching_factor, threshold):

        # Extract dataset from files
        dataset = [f.dataset for f in self.files]

        # Initialize classifier
        classifier = Birch(branching_factor=branching_factor, n_clusters=None, threshold=threshold)

        classifier.fit(dataset)

        # Get index
        index = classifier.predict(dataset)

        count = max(index) + 1

        # Create new clusters
        clusters = [Cluster(self.directory, self.name + '-' + str(i)) for i in range(count)]
        for i in range(0, len(self.files), 1):
            clusters[index[i]].add_file(self.files[i])

        return clusters

Esempio n. 37

0

Mostra file

File: clustering_python.py Progetto: NoamGit/Data-Hack

def build_model(df, cluster_type="kmeans", seed=1):
    if cluster_type == "birch":
        model = Birch(n_clusters=N_CLUSTERS)
        res = model.fit_predict(df)
    elif cluster_type == "minibatch":
        model = MiniBatchKMeans(n_clusters=N_CLUSTERS, random_state=seed)
        res = model.fit_predict(df)
    elif cluster_type == "em":
        model = mixture.GMM(n_components=N_CLUSTERS)
        model.fit(df)
        res = model.predict(df)
    elif cluster_type == 'lda':
        model = lda.LDA(n_topics=N_CLUSTERS, n_iter=1500, random_state=seed)
        data_to_cluster = np.array(df).astype(int)
        lda_res = model.fit_transform(data_to_cluster)
        res = []
        for i in lda_res:  #for now - do hard clustering, take the higheset propability
            res.append(i.argmax())
    else:
        model = KMeans(n_clusters=N_CLUSTERS, random_state=seed)
        res = model.fit_predict(df)
        df_array = np.array(df)

        dis_dict = {}
        for i in range(N_CLUSTERS):
            dis_dict[i] = clusters_centers[i]
        all_dist = []
        for line_idx in range(len(df_array)):
            label =  model.labels_[line_idx]
            dist = calc_distance(df_array[line_idx],dis_dict[label])
            all_dist.append(dist)
        df["distance_from_cluster"] = all_dist

    #clusters = model.labels_.tolist()
    #print ("clusters are:",clusters)
    print(""">>>> model is: %s, # of clusters:%s, and %s""" %(cluster_type,N_CLUSTERS,Counter(res)))
    res = [str(i) for i in res]
    docs_clusteres = zip(df.index,res)
    return docs_clusteres

Esempio n. 38

0

Mostra file

File: tsp.py Progetto: Abraxos/clustering_tsp_solver

def test_birch_with_depot_calculation():
    points = points_from_file('tsps/berlin52.txt')
    matrix = load_matrix(points)
    X = [[p[1],p[2]] for p in points]
    est = Birch(n_clusters=3)
    est.fit(X)
    labels = est.labels_
    hl_matrix, clusters, G = load_matrices_from_labels(points,labels)
    depots, C = compute_depots(clusters, matrix, G, per_cluster=True)
    depots_actual, _ = compute_depots(clusters, matrix, G)
    cluster_optimal_cost, R, hl_route = clustered_tsp_solve(points, 3, labels=labels, depots=depots)
    cluster_optimal_cost += C

    print(depots_actual)
    print(R,C)

    for depot in depots_actual:
        for r in R:
            if r[1][0] == depot:
                for point in r[1]:
                    print(matrix.points[point])
        print('')

Esempio n. 39

0

Mostra file

File: Birch.py Progetto: PierreHao/BOFClustering

   def obtainCodebook(self, sampled_x, x):

      print 'Obatining codebook using Birch from skilean...'
   
      scaled_x_sampled = StandardScaler().fit_transform(sampled_x)
      scaled_x = StandardScaler().fit_transform(x)
      
      brc = BIRCH(branching_factor=self.branching_factor, n_clusters=self.nclusters, threshold=self.threshold, compute_labels=True)
      
      #obatin the codebook and the projections of the images on the codebook (clusters of words)
      codebook = brc.fit(scaled_x_sampled)
      clusters = brc.predict(scaled_x)
      
      print 'Clusters obtained.'
      
      return codebook, clusters

Esempio n. 40

0

Mostra file

File: Birch.py Progetto: PierreHao/BOFClustering

   def obtainClusters(self, hist):

      print 'Obatining clusters using Birch from skilean...'
   
      hist = np.array(hist)
      hist = hist.astype(float)      
      scaled_vec = StandardScaler().fit_transform(hist)
      
      brc = BIRCH(branching_factor=self.branching_factor, n_clusters=self.nclusters, threshold=self.threshold, compute_labels=True)
      
      #obatin the codebook and the projections of the images on the codebook (clusters of words)
      codebook = brc.fit(scaled_vec)
      clusters = brc.predict(scaled_vec)
      
      print 'Clusters obtained.'
      
      return clusters

Esempio n. 41

0

Mostra file

File: test_sequential_birch.py Progetto: PFAWeb2Control/machine-learning

import numpy as np
from sklearn.cluster import Birch
from sklearn.datasets.samples_generator import make_blobs
import matplotlib.pyplot as plt
from itertools import cycle

# Generates random vectors to cluster
n_samples = 50
centers = [[0, 1], [4, -2], [-2, 2], [0, -1]]
X, _ = make_blobs(n_samples=n_samples, centers=centers, cluster_std=0.2)

# Creates the Birch classificator and gives it the vectors
brc = Birch(branching_factor=50, n_clusters=None, threshold=0.8, compute_labels=True)
brc.fit(X)

labels = brc.labels_
cluster_centers = brc.subcluster_centers_
labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)

# Prints the points generated
plt.figure(1)
plt.clf()

colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
    my_members = labels == k
    cluster_center = cluster_centers[k]
    plt.plot(X[my_members, 0], X[my_members, 1], col + '.')
    plt.axis([-4,12,-4,12])
plt.title('Estimated number of clusters: %d' % n_clusters_)

Esempio n. 42

0

Mostra file

File: BIRCHClustering.py Progetto: tangyudi/360birch

def birchclustering(datalist):
    brc = Birch(branching_factor=50, n_clusters=None, threshold=0.17,compute_labels=True)
    brc.fit(datalist)
    return brc

Esempio n. 43

0

Mostra file

File: storm_clustering.py Progetto: wpotrzebowski/StormClustering

	def cluster_birch(self):
		print "Starting Birch clustering"
		brc = Birch(branching_factor=10, n_clusters=40, threshold=self.cluster_distance,compute_labels=False)
		brc.fit(self.all_frames_xy)
		clusters = brc.predict(self.all_frames_xy)
		return clusters

Esempio n. 44

0

Mostra file

File: summarize_sample_GFF_junctions.py Progetto: Magdoll/cDNA_Cupcake

def cluster_junctions(juncs):
    birch_model = Birch(threshold=3, n_clusters=None)
    X = np.array(juncs)
    birch_model.fit(X)

    return birch_model.labels_

Esempio n. 45

0

Mostra file

File: analyze_dsp.py Progetto: amaggi/legacy-code

station_array = np.array(station_list)
dsp_array = np.array(dsp_list)

# extract the unique station names
stations = np.unique(station_array)
print stations

for sta in stations:
    events = event_array[station_array == sta, :]
    dsp_shortlist = dsp_array[station_array == sta]
    print sta, events.shape, dsp_shortlist.shape

    # cluster on events so as to compare dispersion curves for nearby
    # events
    brc = Birch(branching_factor=50, n_clusters=None, threshold=dist, compute_labels=True)
    brc.fit(events)
    labels = brc.predict(events)
    print np.max(labels)
    for lab in np.unique(labels):
        dsp_this_label_list = dsp_shortlist[labels == lab]
        cluster_name = os.path.join(dirname, "cluster_%s_%03d" % (sta, lab))
        plot_all_dsp(dsp_this_label_list, legend=False, fname="%s_gvel.png" % cluster_name)
        plot_all_map(dsp_this_label_list, fname="%s_map.png" % cluster_name, legend=False)
        f = open("%s_info.txt" % cluster_name, "w")
        for (dsp, dsp_dict) in dsp_this_label_list:
            f.write(
                "%s %s %d %03d %02d %02d %.3f %.3f\n"
                % (
                    dsp_dict["STA"],
                    dsp_dict["COMP"],
                    dsp_dict["YEAR"],