Ejemplos de KM en Python, ejemplos de sklearn.cluster.KM en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: analysis2.py Proyecto: jj4192/MLp3

def km(tx, ty, rx, ry, add="", times=5):
    #this does the exact same thing as the above
    errs = []

    checker = KM(n_clusters=2)
    checker.fit(ry)
    truth = checker.predict(ry)

    # so we do this a bunch of times
    for i in range(2,times):
        clusters = {x:[] for x in range(i)}
        clf = KM(n_clusters=i)
        clf.fit(tx)  #fit it to our data
        test = clf.predict(tx)
        result = clf.predict(rx)  # and test it on the testing set
        for index, val in enumerate(result):
            clusters[val].append(index)
        mapper = {x: round(sum(truth[v] for v in clusters[x])/float(len(clusters[x]))) if clusters[x] else 0 for x in range(i)}
        processed = [mapper[val] for val in result]
        errs.append(sum((processed-truth)**2) / float(len(ry)))
    plot([0, times, min(errs)-.1, max(errs)+.1],[range(2, times), errs, "ro"], "Number of Clusters", "Error Rate", "KMeans clustering error", "KM"+add)

    td = np.reshape(test, (test.size, 1))
    rd = np.reshape(result, (result.size, 1))
    newtx = np.append(tx, td, 1)
    newrx = np.append(rx, rd, 1)
    nn(newtx, ty, newrx, ry, add="onKM"+add)

Ejemplo n.º 2

0

Mostrar archivo

 def KMeansRatio(self):
     '''
     Type: K-Means
     Y-axis: No Reaction
     X-axis: Reaction
     '''
     if self.authenticated:
         from sklearn.cluster import KMeans as KM
         algorithm = KM(n_clusters=2)
         categories = algorithm.fit_predict(self.allCoord)
         plt.scatter(self.allCoord[categories == 0, 0],
                     self.allCoord[categories == 0, 1],
                     c="green")
         plt.scatter(self.allCoord[categories == 1, 0],
                     self.allCoord[categories == 1, 1],
                     c="red")
         plt.scatter(algorithm.cluster_centers_[:, 0],
                     algorithm.cluster_centers_[:, 1],
                     c="black",
                     marker="*")
         for i, txt in enumerate(self.labels):
             plt.annotate(txt, (self.allCoord[i][0], self.allCoord[i][1]))
         plt.ylabel("NO REACTION")
         plt.xlabel("REACTION")
         plt.annotate("NO INFLAMMATION", algorithm.cluster_centers_[0])
         plt.annotate("CAUSES INFLAMMATION", algorithm.cluster_centers_[1])
         plt.title("K-Means: Reaction, No Reaction")
         plt.show()

Ejemplo n.º 3

0

Mostrar archivo

 def KMeansPartPercent(self):
     '''
     Type: K-Means
     Y-axis: % Reactions
     X-axis: # Reactions
     '''
     if self.authenticated:
         from sklearn.cluster import KMeans as KM
         algorithm = KM(n_clusters=2)
         # partPercent = np.array([np.array([x, percent]) for j in self.stuff for _, x, _, percent in j])
         categories = algorithm.fit_predict(self.partPercent)
         plt.scatter(self.partPercent[categories == 0, 0],
                     self.partPercent[categories == 0, 1],
                     c="green")
         plt.scatter(self.partPercent[categories == 1, 0],
                     self.partPercent[categories == 1, 1],
                     c="red")
         plt.scatter(algorithm.cluster_centers_[:, 0],
                     algorithm.cluster_centers_[:, 1],
                     c="black",
                     marker="*")
         for i, txt in enumerate(self.labels):
             plt.annotate(txt,
                          (self.partPercent[i][0], self.partPercent[i][1]))
         plt.ylabel("PERCENT")
         plt.xlabel("NUM OF INFLAMS")
         plt.annotate("NO INFLAMMATION", algorithm.cluster_centers_[0])
         plt.annotate("CAUSES INFLAMMATION", algorithm.cluster_centers_[1])
         plt.title("K-Means: # Reactions, % Reactions")
         plt.show()

Ejemplo n.º 4

0

Mostrar archivo

Archivo: color_card.py Proyecto: Xinrui-Zhang/ProPainting

def extractColors(stream, maxColor):
    pixData = np.array(stream)
    h, w, d = pixData.shape
    data = np.reshape(pixData, (h * w, d))
    km = KM(n_clusters=maxColor)
    km.fit(data)
    theme = np.array(km.cluster_centers_, dtype=np.uint8).tolist()
    return getrgbAndHex(theme)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: kmeans.py Proyecto: mrjakobdk/fraud_detector

    def cluster(self, inputs):
        t = time()
        helper._print('Training clusters (KMeans)...')
        kmeans = KM(n_clusters=self.num_clusters, init=self.cluster_init, max_iter=1000, tol=0.000001)
        cluster_pred = kmeans.fit_predict(inputs)
        helper._print(f'Done training clusters. Finished in {int((time() - t)/60)} minutes and {int((time() - t) % 60)} seconds!')

        return cluster_pred

Ejemplo n.º 6

0

Mostrar archivo

 def __init__(self, data, k, t, iter, maxE):
     #if t == 0:
     #   t = 'k-means++'
     #else:
     #   t = 'random'
     self.kmean = KM(k, t, iter, maxE).fit(np.array(data))
     self.labels = self.kmean.labels_
     self.clusters = self.kmean.cluster_centers_

Ejemplo n.º 7

0

Mostrar archivo

 def __init__(self, pixData, maxColor, useSklearn=True):
     super(KMeans, self).__init__()
     h, w, d = pixData.shape
     self.pixData = np.reshape(pixData, (h * w, d))
     self.maxColor = maxColor
     if useSklearn:
         self._KMeans = KM(n_clusters=maxColor)
     else:
         self._KMeans = KMDiy(n_clusters=maxColor)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: Corpusobject.py Proyecto: ar667/paston

    def do_KM(self, letter, k, iter=200, dump=True, parr=-2):
        if type(letter) == str:
            l, v = self.find_by_start(letter)
            km_obj = KM(n_clusters=k, max_iter=iter, n_jobs=parr)
            results = km_obj.fit(v)
            if dump == True:
                filename = DT.now().strftime(
                    '%d%m%y-%H%M%S') + self.generate_filename() + '-k' + str(k)
                self.dump(l, results.labels_, filename)
                self.km_labels = l
                self.km_clusters = results.labels_
                print('Stored KM output in self.km_labels, self.km_clusters')
            else:
                self.km_labels = l
                self.km_clusters = results.labels_
                print('Stored KM output in self.km_labels, self.km_clusters')

        if type(letter) == list:
            l, v = self.find_by_list(letter)
            km_obj = KM(n_clusters=k, max_iter=iter, n_jobs=parr)
            results = km_obj.fit(v)

            self.predicted_clusters = {}
            for v, k in zip(l, results.labels_):
                self.predicted_clusters[v] = k
            self.pred_prep = sorted(self.predicted_clusters.items(),
                                    key=itemgetter(0))
            self.predicted = [i[1] for i in self.pred_prep]

            if dump == True:
                filename = DT.now().strftime(
                    '%d%m%y-%H%M%S') + self.generate_filename() + '-k' + str(k)
                self.dump(l, results.labels_, filename)
                self.km_labels = l
                self.km_clusters = results.labels_
                print('Stored KM output in {self}.km_labels, self.km_clusters')
            else:
                self.km_labels = l
                self.km_clusters = results.labels_
                print('Stored KM output in self.km_labels, self.km_clusters')

Ejemplo n.º 9

0

Mostrar archivo

Archivo: KMeans.py Proyecto: danielrbk/Discretisation

 def set_bin_ranges_for_property(
         self, property_to_entities: Dict[int, Set[Entity]],
         class_to_entities: Dict[int, Set[Entity]],
         property_to_timestamps: Dict[int,
                                      List[TimeStamp]], property_id: int):
     if not property_to_timestamps:
         self.load_property_to_timestamps(property_to_timestamps,
                                          property_id)
     property_values = np.array([
         ts.value for ts in property_to_timestamps[property_id]
     ]).reshape(-1, 1)
     kmeans = KM(n_clusters=self.bin_count - 1).fit(property_values)
     return list(
         sorted([centroid[0] for centroid in kmeans.cluster_centers_]))

Ejemplo n.º 10

0

Mostrar archivo

Archivo: cluster.py Proyecto: hatooku/synapti-vision

def findClusterCenters(data):
    # prepare data
    nonzero_xy = np.nonzero(data)
    if len(nonzero_xy) != 2:
        return
    if nonzero_xy[0].size < 8:
        return
        
    processed_data = np.vstack((nonzero_xy[0], nonzero_xy[1])).transpose()
    
    # K-Means algorithm
    kmeans = KM(n_clusters = 8, max_iter=20, n_init = 5)
    kmeans.fit(processed_data)
    center_points = np.fliplr(kmeans.cluster_centers_)
    return center_points

Ejemplo n.º 11

0

Mostrar archivo

Archivo: understanding.py Proyecto: GenericP3rson/InflaTracker-CAC-Challenge

 def KMeans(self):
     return1 = self.printPoints()  
     if not return1:
         return
     algorithm = KM(n_clusters=2)
     categories = algorithm.fit_predict(self.allCoord)
     print(self.allCoord)
     print(categories)
     plt.scatter(self.allCoord[categories == 0, 0], self.allCoord[categories == 0, 1], c= "green")
     plt.scatter(self.allCoord[categories == 1, 0],self.allCoord[categories == 1, 1], c="red")
     plt.scatter(algorithm.cluster_centers_[:, 0], algorithm.cluster_centers_[:, 1], c= "black", marker="*")
     print(len(self.labels), len(self.allCoord))
     for i, txt in enumerate(self.labels):
         plt.annotate(txt, (self.allCoord[i][0], self.allCoord[i][1]))
     plt.annotate("NO INFLAMMATION", algorithm.cluster_centers_[0])
     plt.annotate("CAUSES INFLAMMATION", algorithm.cluster_centers_[1])
     plt.savefig("static/" + NAME)
     self.src = NAME

Ejemplo n.º 12

0

Mostrar archivo

    def KMeansPercentTotal(self):
        '''
        Type: K-Means
        Y-axis: % Reactions
        X-axis: # Observations
        '''
        if self.authenticated:
            from sklearn.cluster import KMeans as KM
            algorithm = KM(n_clusters=2)
            fig = plt.figure()
            # partPercent = np.array([np.array([x, percent]) for j in self.stuff for _, x, _, percent in j])
            categories = algorithm.fit_predict(self.percentTotal)
            plt.scatter(self.percentTotal[categories == 0, 0],
                        self.percentTotal[categories == 0, 1],
                        c="green")
            plt.scatter(self.percentTotal[categories == 1, 0],
                        self.percentTotal[categories == 1, 1],
                        c="red")
            plt.scatter(algorithm.cluster_centers_[:, 0],
                        algorithm.cluster_centers_[:, 1],
                        c="black",
                        marker="*")
            for i, txt in enumerate(self.labels):
                plt.annotate(
                    txt, (self.percentTotal[i][0], self.percentTotal[i][1]))
            plt.ylabel("PERCENT")
            plt.xlabel("TOTAL")
            plt.annotate("NO INFLAMMATION", algorithm.cluster_centers_[0])
            plt.annotate("CAUSES INFLAMMATION", algorithm.cluster_centers_[1])
            plt.title("K-Means: # Observations, % Reactions")
            # plt.show()
            # mpld3.show()
            # plt.savefig()

            tmpfile = BytesIO()
            # plt.savefig('test.png')
            fig.savefig(tmpfile, format='png')
            encoded = base64.b64encode(tmpfile.getvalue())

            html = '<img src=\'data:image/png;base64,{}\'>'.format(
                encoded.decode("utf-8"))

            with open('KMeansPercentTotal.html', 'w') as f:
                f.write(html)

Ejemplo n.º 13

0

Mostrar archivo

def ttsf(ID, CONTENTS):
    length = len(ID)
    wei = []
    for content in CONTENTS:
        cut = jieba.cut(content, cut_all=False)
        words = (' '.join(cut))
        wei.append(words)
    vector = TfidfVectorizer()
    tfidf = vector.fit_transform(wei)
    d = tfidf.toarray()
    k = 3
    clf = KM(k)
    hehe = clf.fit_predict(d)
    results = []
    for i in range(length):
        if hehe[i] == hehe[0] and i != 0:
            results.append(ID[i])
        else:
            None
    print(results)
    return results

Ejemplo n.º 14

0

Mostrar archivo

def passl_local_graph_partial(site, loc_param_indices, params):
    X = site.buff[loc_param_indices[0]]
    K, rbf_sigma, local_graph_index, n_cluster, centers_index, point_cluster_index, inter_graph_index, member_id_index = params
    nins = NN(K + 1, None, metric='euclidean').fit(X)
    W = nins.kneighbors_graph(nins._fit_X, K + 1, mode='distance')
    #W.data=W.data**2
    W.data = np.exp(-W.data**2 / rbf_sigma)
    W[np.diag_indices(W.shape[0])] = 0
    #W[np.diag_indices(W.shape[0])]=0
    site.buff[local_graph_index] = W
    kins = KM(n_cluster)
    point_cluster = kins.fit_predict(X)
    site.buff[point_cluster_index] = point_cluster
    site.buff[centers_index] = kins.cluster_centers_
    #print(kins.cluster_centers_)
    site.buff[inter_graph_index] = {}
    member_id = []
    for i in range(n_cluster):
        member_id.append(np.where(point_cluster == i)[0])
        #print(member_id[-1])
    site.buff[member_id_index] = member_id

Ejemplo n.º 15

0

Mostrar archivo

Archivo: kmeans.py Proyecto: parsareal/News-Search-Engine

    def cluster_list_k(self, k_list, data):
        data = sp.vstack(data, format='csr')
        # self.data = pd.DataFrame.sparse.from_spmatrix(v)
        # data = pd.DataFrame(sp.vstack(data, format='csr').toarray())
        # data = np.array(data)

        self.k_index = 0
        self.labels_list = []
        self.RSS_list = []
        self.centroids_list = []
        ks = []
        for k in k_list:
            # print('\n')
            print('\n' + str(k) + ":", end=" ")
            # # try:
            # labels, RSS, cents = KMeans.cluster_with_k(k, data)
            # self.RSS_list.append(RSS)
            # self.labels_list.append(np.array(labels))
            # self.centroids_list.append(cents)
            # ks.append(k)
            # # except:
            # #     print('Failed', end='')
            # try:
            sk = KM(k, n_init=1, max_iter=30).fit(data)
            self.labels_list.append(np.array(sk.labels_))
            self.centroids_list.append(sk.cluster_centers_)
            self.RSS_list.append(sk.inertia_)
            ks.append(k)
            # except:
            #     print('Failed', end='')

        plt.plot(ks, self.RSS_list)
        plt.savefig('./cluster_rss')
        plt.show()
        np.save("index" + file_post, [self.k_index])
        np.save("label" + file_post, self.labels_list)
        np.save("rss" + file_post, self.RSS_list)
        np.save("cent" + file_post, self.centroids_list)

Ejemplo n.º 16

0

Mostrar archivo

def em(tx, ty, rx, ry, reduced_data, add="", times=5, dataset="", alg=""):
    clf = EM(n_components=times)
    clf.fit(reduced_data)
    # Step size of the mesh. Decrease to increase the quality of the VQ.
    h = .02     # point in the mesh [x_min, x_max]x[y_min, y_max].
    x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
    y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.clf()
    plt.imshow(Z, interpolation='nearest',
               extent=(xx.min(), xx.max(), yy.min(), yy.max()),
               cmap=plt.cm.Paired,
               aspect='auto', origin='lower')

    plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2)
    # centroids = clf.cluster_centers_
    # plt.scatter(centroids[:, 0], centroids[:, 1],
    #             marker='x', s=169, linewidths=3,
    #             color='w', zorder=10)
    plt.title(dataset + ': EM clustering (' + alg + '-reduced data)')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.xticks(())
    plt.yticks(())
    plt.show()

    clf = EM(n_components=times)
    clf.fit(tx)  #fit it to our data
    test = clf.predict(tx)
    result = clf.predict(rx)
    checker = EM(n_components=times)
    ry = ry.reshape(-1,1)
    checker.fit(ry)
    truth = checker.predict(ry)
    td = np.reshape(test, (test.size, 1))
    rd = np.reshape(result, (result.size, 1))
    # newtx = np.append(td)
    # newrx = np.append(rd)
    myNN(test, ty, result, ry, alg="EM_"+alg)
    errs = []
    scores = []
    # this is what we will compare to
    checker = EM(n_components=2)
    ry = ry.reshape(-1,1)
    checker.fit(ry)
    truth = checker.predict(ry)
    adj_rand = []
    v_meas = []
    mutual_info = []
    adj_mutual_info = []
    # so we do this a bunch of times
    for i in range(2,times):
        clusters = {x:[] for x in range(i)}
        # create a clusterer
        clf = EM(n_components=i)
        clf.fit(tx)  #fit it to our data
        test = clf.predict(tx)
        result = clf.predict(rx)  # and test it on the testing set
        for index, val in enumerate(result):
            clusters[val].append(index)
        mapper = {x: round(sum(truth[v] for v in clusters[x])/float(len(clusters[x]))) if clusters[x] else 0 for x in range(i)}
        processed = [mapper[val] for val in result]
        errs.append(sum((processed-truth)**2) / float(len(ry)))
        scores.append(clf.score(tx, ty))
        adj_rand.append(metrics.adjusted_rand_score(ry.ravel(), result))
        v_meas.append(metrics.v_measure_score(ry.ravel(), result))
        mutual_info.append(metrics.fowlkes_mallows_score(ry.ravel(), result))
        adj_mutual_info.append(metrics.homogeneity_score(ry.ravel(), result))
    # plot([0, times, min(scores)-.1, max(scores)+.1],[range(2, times), scores, "-"], "Number of Clusters", "Log Likelihood", dataset+": EM Log Likelihood - " + alg, dataset+"_EM_"+alg)

    # other metrics
    # names = ["Adjusted Random", "V Measure", "Mutual Info", "Adjusted Mutual Info"]
    plt.figure()
    plt.title(dataset+": EM Clustering measures - "+alg)
    plt.xlabel('Number of clusters')
    plt.ylabel('Score value')
    plt.plot(range(2,times),adj_rand, label="Adjusted Random")
    plt.plot(range(2,times),v_meas, label="V Measure")
    plt.plot(range(2,times),mutual_info, label = "Fowlkes Mallows Score")
    plt.plot(range(2,times),adj_mutual_info, label="Homogeneity Score")
    plt.legend()
    plt.savefig("EMMetrics"+dataset+"_"+alg+".png")

    kmeans = KM(n_clusters=2)
    kmeans.fit(reduced_data)

    Step size of the mesh. Decrease to increase the quality of the VQ.
    h = .02     # point in the mesh [x_min, x_max]x[y_min, y_max].

    Plot the decision boundary. For that, we will assign a color to each
    x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
    y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    # Obtain labels for each point in mesh. Use last trained model.
    Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.clf()
    plt.imshow(Z, interpolation='nearest',
               extent=(xx.min(), xx.max(), yy.min(), yy.max()),
               cmap=plt.cm.Paired,
               aspect='auto', origin='lower')

    plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2)
    # Plot the centroids as a white X
    centroids = kmeans.cluster_centers_
    plt.scatter(centroids[:, 0], centroids[:, 1],
                marker='x', s=169, linewidths=3,
                color='w', zorder=10)
    plt.title(dataset + ': EM clustering (' + alg + '-reduced data)\n'
              'Centroids are marked with a white cross')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.xticks(())
    plt.yticks(())
    plt.show()

Ejemplo n.º 17

0

Mostrar archivo

                                      categories=categories,
                                      shuffle=True,
                                      random_state=None,
                                      remove=('headers', 'footers'))
fp = open('Result/metrics.txt', 'w+')

# Question 1
data = dataset.data
target = dataset.target
data_matrix = tf_idf_matrix(data)
fp.write('tf_idf_matrix shape: {}\n'.format(data_matrix.shape))

# Question 2
target = [1 if i > 3 else 0 for i in target]

cluster = KM(n_clusters=2, random_state=0, max_iter=1000, n_init=30)
cluster.fit(data_matrix)
predict_target = cluster.labels_
fp.write("Contingency table: \n{}\n".format(
    contingency_matrix(target, predict_target)))

# Question 3
metrics_dict = {
    "homogeneity_score": homogeneity_score,
    'completeness_score': completeness_score,
    "v_measure_score": v_measure_score,
    "adjusted_rand_score": adjusted_rand_score,
    "adjusted_mutual_info_score": adjusted_mutual_info_score
}

for metrics_name in metrics_dict.keys():

Ejemplo n.º 18

0

Mostrar archivo

# SCALE THE DATA
scaler = SS()
scaler.fit(df)
scaled_data = scaler.transform(df)

# PCA
pca = PCA(n_components=2)
pca.fit(scaled_data)
x_pca = pca.transform(scaled_data)

# K MEANS - MAKES A LOT OF SENSE TO APPLY THIS ON TOP OF PCA

from sklearn.cluster import KMeans as KM

model = KM(n_clusters=2)
model.fit(df)
fig, axes = plt.subplots(1, 2, figsize=(10, 6))
fig.suptitle('Breast Cancer', fontsize=20)

axes[0].set_title('Diagnosis')
axes[0].scatter(
    x_pca[:, 0],
    x_pca[:, 1],
    c=model.labels_,
    cmap='coolwarm',
)
axes[1].set_title('KMC')
axes[1].scatter(x_pca[:, 0], x_pca[:, 1], c=cancer['target'], cmap='coolwarm')

plt.show()

Ejemplo n.º 19

0

Mostrar archivo

def km(tx, ty, rx, ry, reduced_data, add="", times=5, dataset="", alg=""):
    processed = []
    adj_rand = []
    v_meas = []
    mutual_info = []
    adj_mutual_info = []
    sil = []
    inertia = []
    for i in range(2,times):
        clusters = {x:[] for x in range(i)}
        clf = KM(n_clusters=i)
        clf.fit(tx)
        test = clf.predict(tx)
        result = clf.predict(rx)

        adj_rand.append(metrics.adjusted_rand_score(ry.ravel(), result))
        v_meas.append(metrics.v_measure_score(ry.ravel(), result))
        mutual_info.append(metrics.fowlkes_mallows_score(ry.ravel(), result))
        adj_mutual_info.append(metrics.homogeneity_score(ry.ravel(), result))
        inertia.append(clf.inertia_)
    plots = [adj_rand, v_meas, mutual_info, adj_mutual_info]
    plt.title(dataset+": KM Clustering measures - "+alg)
    plt.xlabel('Number of clusters')
    plt.ylabel('Score value')
    plt.plot(range(2,times), adj_rand, label="Adjusted Random")
    plt.plot(range(2,times), v_meas, label="V Measure")
    plt.plot(range(2,times), mutual_info, label = "Fowlkes Mallows Score")
    plt.plot(range(2,times), adj_mutual_info, label="Homogeneity Score")
    plt.legend()
    plt.ylim(ymin=-0.05, ymax=1.05)
    plt.savefig("KMeansMetric"+dataset+"_"+alg+".png")

    plt.figure()
    plt.title(dataset+": KMeans Inertia - "+alg)
    plt.xlabel('Number of clusters')
    plt.ylabel('Inertia')
    plt.plot(range(2,times), inertia)
    plt.savefig("KM-Inertia-"+dataset+"-"+alg+".png")

    td = np.reshape(test, (test.size, 1))
    rd = np.reshape(result, (result.size, 1))
    newtx = np.append(tx, td, 1)
    newrx = np.append(rx, rd, 1)

    h = .02     # point in the mesh [x_min, x_max]x[y_min, y_max].
        # Plot the decision boundary. For that, we will assign a color to each
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    best_clusterer = KM(n_clusters=4)
    best_clusterer.fit(X)
    Z = best_clusterer.predict(X)
    print(len(Z))
    print(len(X))
    plt.figure(1)
    plt.clf()
    colors = ['r', 'g', 'b', 'y', 'c', 'm','#eeefff', '#317c15', '#4479b4', '#6b2b9c',
'#63133b', '#6c0d22', '#0c7c8c', '#67c50e','#c5670e', '#946c47', '#58902a', '#54b4e4',
'#e4549e', '#2b2e85'  ]
    for i in range(0, len(X)):
        plt.plot(X[i][0], X[i][1], marker='.', color=colors[Z[i]], markersize=2)
    #plt.plot(X[:, 0], X[:, 1], 'k.', markersize=2)
    # Plot the centroids as a white X
    centroids = best_clusterer.cluster_centers_
    plt.scatter(centroids[:, 0], centroids[:, 1],
                marker='x', s=169, linewidths=3,
                color='k', zorder=10)
    plt.title('K-means Clusters ' + alg)
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.xticks(())
    plt.yticks(())
    plt.show()
    kmeans = KM(n_clusters=3)
    kmeans.fit(tx)
    result=pd.DataFrame(kmeans.transform(tx), columns=['KM%i' % i for i in range(3)])
    my_color = pd.Series(ty).astype('category').cat.codes
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(result['KM0'], result['KM1'], result['KM2'], c=my_color, cmap="Dark2_r", s=60)
    plt.show()
    reduced_data = PCA(n_components=2).fit_transform(tx)
    kmeans = KM(n_clusters=4)
    kmeans.fit(reduced_data)
    # Step size of the mesh. Decrease to increase the quality of the VQ.
    h = .02     # point in the mesh [x_min, x_max]x[y_min, y_max].
    x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
    y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.clf()
    plt.imshow(Z, interpolation='nearest',
               extent=(xx.min(), xx.max(), yy.min(), yy.max()),
               cmap=plt.cm.Paired,
               aspect='auto', origin='lower')

    plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2)
    centroids = kmeans.cluster_centers_
    plt.scatter(centroids[:, 0], centroids[:, 1],
                marker='x', s=169, linewidths=3,
                color='w', zorder=10)
    plt.title(dataset + ': K-means clustering (' + alg + '-reduced data)\n'
              'Centroids are marked with a white cross')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.xticks(())
    plt.yticks(())
    plt.show()

    checker = KM(n_clusters=2)
    ry = ry.reshape(-1,1)
    checker.fit(ry)
    truth = checker.predict(ry)
    clusters = {x:[] for x in range(4)}
    clf = KM(n_clusters=4)
    clf.fit(tx)  #fit it to our data
    test = clf.predict(tx)
    result = clf.predict(rx)  # and test it on the testing set
    for index, val in enumerate(result):
        clusters[val].append(index)
    mapper = {x: round(sum(truth[v] for v in clusters[x])/float(len(clusters[x]))) if clusters[x] else 0 for x in range(4)}
    processed = [mapper[val] for val in result]
    print(sum((processed-truth)**2) / float(len(ry)))
    clf = KM(n_clusters=times)
    clf.fit(tx)  #fit it to our data
    test = clf.predict(tx)
    result = clf.predict(rx)
    checker = KM(n_clusters=times)
    ry = ry.reshape(-1,1)
    checker.fit(ry)
    truth = checker.predict(ry)
    td = np.reshape(test, (test.size, 1))
    rd = np.reshape(result, (result.size, 1))
    newtx = np.append(td)
    newrx = np.append(rd)
    myNN(test, ty, result, ry, alg="KM_"+alg)
    nn(newtx, ty, newrx, ry, add="onKM"+add)

Ejemplo n.º 20

0

Mostrar archivo

            best_model = model
            best_error = model.error
        else:
            if best_error > model.error:
                best_error = model.error
                best_model = model

    label = best_model.predict(X)
    center = best_model.center

    ####### plot ######################
    plt.figure(1)
    plt.subplot(121)
    plt.title('my KMeans')
    plt.scatter(X[:, 0], X[:, 1], c=label)
    plt.scatter(center[:, 0], center[:, 1], c='r', marker='+')

    ####### compare to scikit ########
    from sklearn.cluster import KMeans as KM

    km = KM(n_clusters=k)
    km.fit(X)
    plt.subplot(122)
    plt.title('scikit KMeans')
    plt.scatter(X[:, 0], X[:, 1], c=km.labels_)
    plt.scatter(km.cluster_centers_[:, 0],
                km.cluster_centers_[:, 1],
                c='r',
                marker='+')
    plt.show()

Ejemplo n.º 21

0

Mostrar archivo

 def __init__(self, imgPixels, K):
     self.imgPixels = imgPixels
     self.KM = KM(n_clusters=K, random_state=0).fit(self.imgPixels)

Ejemplo n.º 22

0

Mostrar archivo

def kmtable(tx, ty, rx, ry, dataset=""):
    processed = []
    adj_rand = []
    v_meas = []
    mutual_info = []
    adj_mutual_info = []
    sil = []
    inertia = []


    compressor = PCA(n_components = tx[1].size/2)
    compressor.fit(tx, y=ty)
    pcatx = compressor.transform(tx)
    pcarx = compressor.transform(rx)
    p = []

    compressor = ICA(n_components = tx[1].size/2)
    compressor.fit(tx, y=ty)
    icatx = compressor.transform(tx)
    icarx = compressor.transform(rx)
    ic = []

    compressor = RandomProjection(tx[1].size/2)
    compressor.fit(tx, y=ty)
    rptx = compressor.transform(tx)
    rprx = compressor.transform(rx)
    r = []

    compressor = best(k=tx[1].size/2)
    compressor.fit(tx, y=ty)
    kbtx = compressor.transform(tx)
    kbrx = compressor.transform(rx)
    k = []
    for i in range(2,8):
        # clusters = {x:[] for x in range(i)}
        clf = KM(n_clusters=i)
        clf.fit(pcatx)
        test = clf.predict(pcatx)
        result = clf.predict(pcarx)
        p.append(metrics.v_measure_score(ry.ravel(), result))

        clf = KM(n_clusters=i)
        clf.fit(icatx)
        test = clf.predict(icatx)
        result = clf.predict(icarx)
        ic.append(metrics.v_measure_score(ry.ravel(), result))

        clf = KM(n_clusters=i)
        clf.fit(rptx)
        test = clf.predict(rptx)
        result = clf.predict(rprx)
        r.append(metrics.v_measure_score(ry.ravel(), result))

        clf = KM(n_clusters=i)
        clf.fit(kbtx)
        test = clf.predict(kbtx)
        result = clf.predict(kbrx)
        k.append(metrics.v_measure_score(ry.ravel(), result))
        # adj_rand.append(metrics.adjusted_rand_score(ry.ravel(), result))
        # v_meas.append(metrics.v_measure_score(ry.ravel(), result))
        # mutual_info.append(metrics.fowlkes_mallows_score(ry.ravel(), result))
        # adj_mutual_info.append(metrics.homogeneity_score(ry.ravel(), result))
    plt.figure()
    plt.title(dataset+": KM Clustering & DR")
    plt.xlabel('Number of clusters')
    plt.ylabel('V Measure Score value')
    plt.plot(range(2,8), p, label="PCA")
    plt.plot(range(2,8), ic, label="ICA")
    plt.plot(range(2,8), r, label = "RP")
    plt.plot(range(2,8), k, label="KB")
    plt.legend()
    plt.ylim(ymin=-0.05, ymax=0.5)
    plt.savefig("KM_DR_"+dataset+"_VM.png", dpi=300)

Ejemplo n.º 23

0

Mostrar archivo

lower conversion price and more dilution to BankAmerica stock
holders, noted Daniel Williams, analyst with Sutro Group.
    Several analysts said that while they believe the Brazilian
debt problem will continue to hang over the banking industry
through the quarter, the initial shock reaction is likely to
ease over the coming weeks.
    Nevertheless, BankAmerica, which holds about 2.70 billion
dlrs in Brazilian loans, stands to lose 15-20 mln dlrs if the
interest rate is reduced on the debt, and as much as 200 mln
dlrs if Brazil pays no interest for a year, said Joseph
Arsenio, analyst with Birr, Wilson and Co.
    He noted, however, that any potential losses would not show
up in the current quarter.
'''

kmeans = KM(n_clusters=32, init='random', n_init=1, verbose=1)
kmeans.fit(features)

print kmeans.labels_

new_post_vector = vectorizer.transform([new_post_2])
new_post_label = kmeans.predict(new_post_vector)[0]

print "new posts label", new_post_label

similar_indices = (kmeans.labels_ == new_post_label).nonzero()[0]

similar = []

for i in similar_indices:
    dist = sp.linalg.norm((new_post_vector - features[i]).toarray())

Ejemplo n.º 24

0

Mostrar archivo

Archivo: k-means.py Proyecto: MetelevEvgenii/k-means


if __name__ == '__main__':
    dir_from = '../drosophila_kc167_1_images'
    dir_to = '../KMeansResults'
    number_clusters = [i for i in range(2, 70, 2)]  #[2, 4, 6, 8, 10, 12, 14, 16]
    for file_name in os.listdir(dir_from):
        array_time = []
        path_file = dir_from + '/' + file_name
        name_without_extension = file_name[:-4]
        dir_save = dir_to + '/' + name_without_extension + '/'
        # os.mkdir(dir_save)
        data = KMeans(path_file).pixels
        sse = []
        for K in number_clusters:
            path_file = '../drosophila_kc167_1_images/CPvalid1_48_40x_Tiles_p0003DAPI.TIF'
            #label, center = KMeans(path_file, K).kmeans()
            km = KM(K)
            km.fit(data)
            sse.append(km.inertia_)
        # Plot sse against k
        plt.figure(figsize=(6, 6))
        plt.plot(number_clusters, sse, '-o')
        plt.xlabel(r'Number of clusters *k*')
        plt.ylabel('Sum of squared distance');
        plt.show()
        exit()
            # center = np.uint8(center)
            # res = center[label].reshape((512, 512, 3))
            # Image.fromarray(res, 'RGB').save(dir_save + str(K) + '.bmp')

Ejemplo n.º 25

0

Mostrar archivo

X_train_people_nmf = nmf_decomp.transform(X_train_people)
X_test_people_nmf = nmf_decomp.transform(X_test_people)

nmf_decomp = NMF(n_components=5, init='nndsvd',
                 random_state=37).fit(X_train_energy)
X_train_energy_nmf = nmf_decomp.transform(X_train_energy)
X_test_energy_nmf = nmf_decomp.transform(X_test_energy)

nmf_decomp = NMF(n_components=100, random_state=37).fit(X_train_mnist)
X_train_mnist_nmf = nmf_decomp.transform(X_train_mnist)
X_test_mnist_nmf = nmf_decomp.transform(X_test_mnist)
print('nmf')

#k-Means Clustering
from sklearn.cluster import KMeans as KM
km_cluster = KM(n_clusters=10, random_state=37).fit(X_train_people)
X_train_people_km = km_cluster.transform(X_train_people)
X_test_people_km = km_cluster.transform(X_test_people)

km_cluster = KM(n_clusters=10, n_init=5, random_state=37).fit(X_train_energy)
X_train_energy_km = km_cluster.transform(X_train_energy)
X_test_energy_km = km_cluster.transform(X_test_energy)

km_cluster = KM(n_clusters=10, random_state=37).fit(X_train_mnist)
X_train_mnist_km = km_cluster.transform(X_train_mnist)
X_test_mnist_km = km_cluster.transform(X_test_mnist)
print('km\n')
"""
##############################Classification##############################
"""
from sklearn.neighbors import KNeighborsClassifier as KNC

Ejemplo n.º 26

0

Mostrar archivo

Archivo: 聚类模块.py Proyecto: abjiesir/voice_system

 def __init__(self):
     self.ma1 = TF.fit_transform(ma)
     self.model = KM(6)
     self.res = self.model.fit_predict(self.ma1)

Ejemplo n.º 27

0

Mostrar archivo

Archivo: 5-聚类 kmeans.py Proyecto: ssdwawa/python-

#先随机在数据中扔几个点，作为核心。再计算每一个点距离核心的位置。
#每个核心肯定会有边境点，之后计算每个核心分离的中间位置，把核心偏移过去。重新形成新的边境
#多次循环后核心就会稳定
import pandas as pda
import numpy as np
from sklearn.cluster import KMeans as KM
if __name__ == '__main__':
    data = pda.read_csv('luqu.csv')
    x = data.iloc[:, 1:4].as_matrix()
    km = KM(n_clusters=2, n_jobs=2)
    print(km.fit_predict(x))

Ejemplo n.º 28

0

Mostrar archivo

    for key, name in webcolors.css21_hex_to_names.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        rd = (r_c - rgb_[0])**2
        gd = (g_c - rgb_[1])**2
        bd = (b_c - rgb_[2])**2
        min_colors[(rd + gd + bd)] = name
    return min_colors[min(min_colors.keys())]


# Construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("--image", required=True, help="Input Image Path")
ap.add_argument("--clusters", required=True, type=int, help="# of clusters")
args = vars(ap.parse_args())

image = cv2.imread(args["image"])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Reshape the image to be a list of pixels
image = image.reshape((image.shape[0] * image.shape[1], 3))

# K-Means Clustering
clt = KM(n_clusters=args["clusters"])
clt.fit(image)

# Get dominant colors
colors = clt.cluster_centers_.astype("uint8").tolist()
for rgb in colors:
    color_name = get_color_name(rgb)
    print("Dominant color :", color_name)

Ejemplo n.º 29

0

Mostrar archivo

Archivo: Clustering_PCA_coverType.py Proyecto: maco668/Assignment3

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# One hot encode target values
one_hot = OneHotEncoder()

y_train_hot = one_hot.fit_transform(y_train.reshape(-1, 1)).todense()
y_test_hot = one_hot.transform(y_test.reshape(-1, 1)).todense()

labels = y_train

dim = [2, 3, 4, 5]

km = KM(random_state=42)
gmm = GMM(random_state=42)

Score = defaultdict(list)
adjMI = defaultdict(list)
S_homog = defaultdict(list)
S_adjMI = defaultdict(list)
S_vm = defaultdict(list)

for i in dim:
    reduced_X = PCA(n_components=i,
                    random_state=42).fit_transform(X_train_scaled)
    k = 30
    km.set_params(n_clusters=k)
    gmm.set_params(n_components=k)
    km.fit(reduced_X)

Ejemplo n.º 30

0

Mostrar archivo

Archivo: CustomerSegregation.py Proyecto: div-yansh/CustomerSegregation

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans as KM

file = pd.read_csv("Mall_Customers.csv")
X = file.iloc[:, 3:5].values

elbow = []
for i in range(10):
    km = KM(n_clusters=i + 1)
    km.fit(X)
    elbow.append(km.inertia_)

plt.plot(range(1, 11), elbow)
plt.xlabel("No. of Clusters")
plt.ylabel("Cost")
plt.show()

km = KM(n_clusters=5)
res = km.fit_predict(X)

colors = ["red", "blue", "green", "yellow", "silver"]

for i in range(5):
    plt.scatter(X[res == i, 0], X[res == i, 1], c=colors[i])
plt.axes().get_xaxis().set_visible(False)
plt.axes().get_yaxis().set_visible(False)
plt.show()