Python VonMisesFisherMixture.fitの例、spherecluster.VonMisesFisherMixture.fit Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_von_mises_fisher_mixture.py プロジェクト: dfafalis/fiberDirectionality

def test_integration_dense(params_in):
    n_clusters = 5
    n_examples = 20
    n_features = 100
    X = np.random.randn(n_examples, n_features)
    for ee in range(n_examples):
        X[ee, :] /= np.linalg.norm(X[ee, :])

    params_in.update({'n_clusters': n_clusters})
    movmf = VonMisesFisherMixture(**params_in)
    movmf.fit(X)

    assert movmf.cluster_centers_.shape == (n_clusters, n_features)
    assert len(movmf.concentrations_) == n_clusters
    assert len(movmf.weights_) == n_clusters
    assert len(movmf.labels_) == n_examples

    for center in movmf.cluster_centers_:
        assert_almost_equal(np.linalg.norm(center), 1.0)

    for concentration in movmf.concentrations_:
        assert concentration > 0

    for weight in movmf.weights_:
        assert not np.isnan(weight)

    plabels = movmf.predict(X)
    assert_array_equal(plabels, movmf.labels_)

    ll = movmf.log_likelihood(X)
    ll_labels = np.zeros(movmf.labels_.shape)
    for ee in range(n_examples):
        ll_labels[ee] = np.argmax(ll[:, ee])

    assert_array_equal(ll_labels, movmf.labels_)

コード例 #2

0

ファイルを表示

ファイル: gen.py プロジェクト: sanjanag/hatespeech-detection

def label_expansion(class_labels, write_path, vocabulary_inv, embedding_mat):
    print("Retrieving top-t nearest words...")
    n_classes = len(class_labels)
    prob_sup_array = []
    current_szes = []
    all_class_labels = []
    for class_label in class_labels:
        current_sz = len(class_label)
        current_szes.append(current_sz)
        prob_sup_array.append([1 / current_sz] * current_sz)
        all_class_labels += list(class_label)
    current_sz = np.min(current_szes)
    while len(all_class_labels) == len(set(all_class_labels)):
        current_sz += 1
        expanded_array = seed_expansion(class_labels, prob_sup_array,
                                        current_sz, None, vocabulary_inv,
                                        embedding_mat)
        all_class_labels = [w for w_class in expanded_array for w in w_class]

    expanded_array = seed_expansion(class_labels, prob_sup_array,
                                    current_sz - 1, None, vocabulary_inv,
                                    embedding_mat)
    print("Final expansion size t = {}".format(len(expanded_array[0])))

    centers = []
    kappas = []
    print("Top-t nearest words for each class:")
    for i in range(n_classes):
        expanded_class = expanded_array[i]
        vocab_expanded = [vocabulary_inv[w] for w in expanded_class]
        print("Class {}:".format(i))
        print(vocab_expanded)
        expanded_mat = embedding_mat[np.asarray(expanded_class)]
        vmf_soft = VonMisesFisherMixture(n_clusters=1)
        vmf_soft.fit(expanded_mat)
        center = vmf_soft.cluster_centers_[0]
        kappa = vmf_soft.concentrations_[0]
        centers.append(center)
        kappas.append(kappa)

    for j, expanded_class in enumerate(expanded_array):
        if write_path is not None:
            if not os.path.exists(write_path):
                os.makedirs(write_path)
            f = open(write_path + 'class' + str(j) + '.txt', 'w')
            for i, word in enumerate(expanded_class):
                f.write(vocabulary_inv[word] + ' ')
            f.close()
    print("Finished vMF distribution fitting.")
    return expanded_array, centers, kappas

コード例 #3

0

ファイルを表示

    def estimate_vMF_params(self):
        """

        Returns:

        """
        vmf_soft = VonMisesFisherMixture(n_clusters=1, posterior_type='soft')
        try:
            vmf_soft.fit(self.samples_xyz)
            self.kappa = vmf_soft.concentrations_[0]
            self.mean = vmf_soft.cluster_centers_[0]
            print('concentration parameter ', self.kappa.astype(int),
                  'mean direction ',
                  self._cartesian2spherical(self.mean).astype(int))
        except AttributeError:
            print(
                'object has no orientations. Use add_orientations to load orientation data manually or sample from a vMF distribution with the .sample method'
            )

コード例 #4

0

ファイルを表示

def mandatory_questions():
    # 1.2.1
    # ang_data = unistroke_to_angular(read_data(""))
    # plt.hist(ang_data[:, 0], 150, density=True)
    # plt.hist(ang_data[:, 1], 150, density=True)

    # 1.2.5
    mix = VonMisesFisherMixture(2)
    data = unistroke_to_angular(read_data(""))
    mix = mix.fit(data)

コード例 #5

0

ファイルを表示

ファイル: orientation_distribution.py プロジェクト: RothkopfLab/imgstats-frontiersin

def fit_movmf(theta, **kwargs):
    """ Fit a mixture of 2 von-Mises-Fisher distributions

    Args:
        theta (np.array): angles
        **kwargs: keyword arguments to spherecluster.VonMisesFisherMixture()

    Returns:
        pi, mu, kappa
    """
    X = np.array([np.cos(theta * 2), np.sin(theta * 2)]).T

    vmf_soft = VonMisesFisherMixture(n_clusters=2, **kwargs)
    vmf_soft.fit(X)

    mu = np.arctan2(vmf_soft.cluster_centers_[:, 1],
                    vmf_soft.cluster_centers_[:, 0])
    pi = vmf_soft.weights_
    kappa = vmf_soft.concentrations_

    return pi, mu, kappa

コード例 #6

0

ファイルを表示

ファイル: test_von_mises_fisher_mixture.py プロジェクト: MeMAD-project/trecvid-vsum

def test_integration_sparse(params_in):
    n_clusters = 5
    n_examples = 20
    n_features = 100
    n_nonzero = 10
    X = sp.sparse.csr_matrix((n_examples, n_features))
    for ee in range(n_examples):
        ridx = np.random.randint(n_features, size=(n_nonzero))
        random_values = np.random.randn(n_nonzero)
        random_values = random_values / np.linalg.norm(random_values)
        X[ee, ridx] = random_values

    params_in.update({"n_clusters": n_clusters})
    movmf = VonMisesFisherMixture(**params_in)
    movmf.fit(X)

    assert movmf.cluster_centers_.shape == (n_clusters, n_features)
    assert len(movmf.concentrations_) == n_clusters
    assert len(movmf.weights_) == n_clusters
    assert len(movmf.labels_) == n_examples
    assert len(movmf.posterior_) == n_clusters

    for center in movmf.cluster_centers_:
        assert_almost_equal(np.linalg.norm(center), 1.0)

    for concentration in movmf.concentrations_:
        assert concentration > 0

    for weight in movmf.weights_:
        assert not np.isnan(weight)

    plabels = movmf.predict(X)
    assert_array_equal(plabels, movmf.labels_)

    ll = movmf.log_likelihood(X)
    ll_labels = np.zeros(movmf.labels_.shape)
    for ee in range(n_examples):
        ll_labels[ee] = np.argmax(ll[:, ee])

    assert_array_equal(ll_labels, movmf.labels_)

コード例 #7

0

ファイルを表示

def soft_clustering(terms: List[str], wv: Dict[str, np.ndarray],
                    n_clusters: int) -> Tuple[List[int], List[str]]:
    """Use spherical vmf to cluster word vectors"""
    X = []
    X_terms = []
    n_out_of_vocab = 0
    for term in terms:
        try:
            phrase = term
            emb = wv[phrase]
            X.append(emb)
            X_terms.append(phrase)
        except KeyError as e:
            n_out_of_vocab += 1

    logger.debug(f"{n_out_of_vocab} / {len(terms)} words out of vocab")
    logger.debug(f"Clustering {len(X)} words")
    vmf_soft = VonMisesFisherMixture(n_clusters=n_clusters,
                                     posterior_type='soft')
    vmf_soft.fit(X)

    return vmf_soft.predict(X), X_terms, vmf_soft

コード例 #8

0

ファイルを表示

ファイル: gen.py プロジェクト: yuzhimanhua/HIMECat

def label_expansion(relevant_nodes,
                    write_path,
                    vocabulary_inv,
                    embedding_mat,
                    manual_num=None,
                    fitting='mix'):
    print("Retrieving top-t nearest words...")
    vocab_dict = {v: k for k, v in vocabulary_inv.items()}
    prob_sup_array = []
    current_szes = []
    all_class_keywords = []
    children_nodes = []
    for relevant_node in relevant_nodes:
        if relevant_node.children:
            children_nodes += relevant_node.children
        else:
            children_nodes += [relevant_node]
    for children_node in children_nodes:
        current_sz = len(children_node.keywords)
        current_szes.append(current_sz)
        prob_sup_array.append([1 / current_sz] * current_sz)
        all_class_keywords += children_node.keywords
    current_sz = np.min(current_szes)
    if manual_num is None:
        while len(all_class_keywords) == len(set(all_class_keywords)):
            print(f'current_sz: {current_sz}')
            current_sz += 1
            # print(f'len_kw: {len(all_class_keywords)}')
            seed_expansion(children_nodes, prob_sup_array, current_sz,
                           vocab_dict, embedding_mat)
            all_class_keywords = [
                w for relevant_node in children_nodes
                for w in relevant_node.expanded
            ]
        seed_expansion(children_nodes, prob_sup_array, current_sz - 1,
                       vocab_dict, embedding_mat)
        # seed_expansion(children_nodes, prob_sup_array, current_sz, vocab_dict, embedding_mat)
    else:
        seed_expansion(children_nodes, prob_sup_array, manual_num, vocab_dict,
                       embedding_mat)
    if manual_num is None:
        print(f"Final expansion size t = {len(children_nodes[0].expanded)}")
    else:
        print(f"Manual expansion size t = {manual_num}")

    centers = []
    kappas = []
    weights = []
    if write_path is not None:
        if not os.path.exists(write_path):
            os.makedirs(write_path)
        else:
            f = open(os.path.join(write_path, 'expanded.txt'), 'w')
            f.close()
    for relevant_node in relevant_nodes:
        children_nodes = relevant_node.children if relevant_node.children else [
            relevant_node
        ]
        num_children = len(children_nodes)
        expanded_class = []
        if fitting == 'mix':
            for child in children_nodes:
                # assert child.expanded != []
                expanded_class = np.concatenate(
                    (expanded_class, child.expanded))
                print([vocabulary_inv[w] for w in child.expanded])
            vocab_expanded = [vocabulary_inv[w] for w in expanded_class]
            expanded_mat = embedding_mat[np.asarray(list(set(expanded_class)),
                                                    dtype='int32')]
            vmf_soft = VonMisesFisherMixture(n_clusters=num_children,
                                             n_jobs=15,
                                             random_state=0)
            vmf_soft.fit(expanded_mat)
            center = vmf_soft.cluster_centers_
            kappa = vmf_soft.concentrations_
            weight = vmf_soft.weights_
            print(f'weight: {weight}')
            print(f'kappa: {kappa}')
            centers.append(center)
            kappas.append(kappa)
            weights.append(weight)
        elif fitting == 'separate':
            center = []
            kappa = []
            weight = []
            for child in children_nodes:
                assert child.expanded != []
                expanded_class = np.concatenate(
                    (expanded_class, child.expanded))
                expanded_mat = embedding_mat[np.asarray(child.expanded,
                                                        dtype='int32')]
                vmf_soft = VonMisesFisherMixture(n_clusters=1,
                                                 n_jobs=15,
                                                 random_state=0)
                vmf_soft.fit(expanded_mat)
                center.append(vmf_soft.cluster_centers_[0])
                kappa.append(vmf_soft.concentrations_[0])
                weight.append(1 / num_children)
                expanded = np.dot(embedding_mat, center[-1])
                word_expanded = sorted(range(len(expanded)),
                                       key=lambda k: expanded[k],
                                       reverse=True)
            vocab_expanded = [vocabulary_inv[w] for w in expanded_class]
            print(f'Class {relevant_node.name}:')
            print(vocab_expanded)
            print(f'weight: {weight}')
            print(f'kappa: {kappa}')
            centers.append(center)
            kappas.append(kappa)
            weights.append(weight)
        if write_path is not None:
            f = open(os.path.join(write_path, 'expanded.txt'), 'a')
            f.write(relevant_node.name + '\t')
            f.write(' '.join(vocab_expanded) + '\n')
            f.close()

    print("Finished vMF distribution fitting.")
    return centers, kappas, weights

コード例 #9

0

ファイルを表示

ファイル: main.py プロジェクト: edwinyi/Directional-Grid-Maps

def sim_multimodal():

    to_save = []
    data_cells = []

    # Data pre-processing
    for i in range(40):
        read_sim(i,
                 f_in='datasets/multimodal_sim2.npy',
                 f_out='datasets/transformed/multimodal_sim2_cell' + str(i))

    # Angles to query
    Thq = np.linspace(-np.pi, np.pi, 360)[:, None]
    Xq = np.hstack((np.cos(Thq), np.sin(Thq)))

    # Fit one cell at a time
    for i in range(40):
        print('\ncell no={}'.format(i))

        try:
            # Read data
            read_data = np.load('datasets/transformed/multimodal_sim2_cell' +
                                str(i) + '.npz')
            data, xx, yy = read_data['data'], read_data['xx'], read_data['yy']
            if data.shape[0] <= 1:
                continue

            # Data
            Th = data[:, 4][:, None]
            X = np.hstack((np.cos(Th), np.sin(Th)))
            db = DBSCAN().fit(X)
            core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
            core_samples_mask[db.core_sample_indices_] = True
            labels = db.labels_
            # Number of clusters in labels, ignoring noise if present.
            n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
            unique_labels = set(labels)
            print("n_clusters_={}, labels={}".format(n_clusters_,
                                                     unique_labels))
            for k in unique_labels:
                if k == -1:  # noisy samples
                    continue
                class_member_mask = (labels == k)
                xy = X[class_member_mask & core_samples_mask]
                if k == 0:
                    db_centers = np.mean(xy, axis=0)[None, :]
                else:
                    db_centers = np.concatenate(
                        (db_centers, np.mean(xy, axis=0)[None, :]), axis=0)
            print("db_centers=", db_centers)

            # TBD: "NOTE:: play with max_iter if you get the denom=inf error"

            # Mixture of von Mises Fisher clustering (soft)
            vmf_soft = VonMisesFisherMixture(n_clusters=n_clusters_,
                                             posterior_type='soft',
                                             init=db_centers,
                                             n_init=1,
                                             verbose=True,
                                             max_iter=20)
            vmf_soft.fit(X)

            y = 0
            for cn in range(n_clusters_):
                y += vmf_soft.weights_[cn] * np.exp(
                    von_mises_fisher_mixture._vmf_log(
                        Xq, vmf_soft.concentrations_[cn],
                        vmf_soft.cluster_centers_[cn]))
            yq = np.array(y)[:, None]
            to_save.append(yq)
            data_cells.append(i)

            # Plot
            pl.figure(figsize=(15, 4))

            pl.subplot(131)
            mesh = np.vstack((xx.ravel(), yy.ravel())).T
            pl.scatter(mesh[:, 0], mesh[:, 1], c='k', marker='.')
            pl.scatter(data[:, 1],
                       data[:, 2],
                       c=data[:, 0],
                       marker='*',
                       cmap='jet')
            pl.colorbar()
            pl.xlim([0, 20])
            pl.ylim([-5, 30])
            pl.title('data')

            pl.subplot(132)
            pl.scatter(Xq[:, 0], Xq[:, 1], c=yq[:], cmap='jet')
            pl.colorbar()
            pl.scatter(X[:, 0] * 0.9, X[:, 1] * 0.9, c='k', marker='+')
            pl.title('data and extimated distribution')

            pl.subplot(133, projection='polar')
            pl.polar(Thq, yq)
            pl.title('polar plot')
            pl.savefig('outputs/multimodal_sim2_cell{}'.format(i))
            #pl.show()
        except:
            print(' skipped...')
            continue

コード例 #10

0

ファイルを表示

ファイル: main.py プロジェクト: edwinyi/Directional-Grid-Maps

def sim_unimodal():

    to_save = []
    data_cells = []

    # Data pre-processing
    for i in range(40):
        read_sim(i,
                 f_in='datasets/unimodal_sim1.npy',
                 f_out='datasets/transformed/unimodal_sim1_cell' + str(i))

    # Angles to query
    Thq = np.linspace(-np.pi, np.pi, 360)[:, None]
    Xq = np.hstack((np.cos(Thq), np.sin(Thq)))

    # Fit one cell at a time
    for i in range(40):
        print('cell no={}'.format(i))

        try:
            # Read data
            read_data = np.load('datasets/transformed/unimodal_sim1_cell' +
                                str(i) + '.npz')
            data, xx, yy = read_data['data'], read_data['xx'], read_data['yy']
            if data.shape[0] <= 1:
                continue

            # Data
            Th = data[:, 4][:, None]
            X = np.hstack((np.cos(Th), np.sin(Th)))

            # Von Mises clustering (soft)
            vmf_soft = VonMisesFisherMixture(n_clusters=1,
                                             posterior_type='soft',
                                             n_init=20)
            vmf_soft.fit(X)
            y0 = np.exp(
                von_mises_fisher_mixture._vmf_log(
                    Xq, vmf_soft.concentrations_[0],
                    vmf_soft.cluster_centers_[0]))
            y = y0 * vmf_soft.weights_[0]

            # Query
            yq = np.array(y)[:, None]
            to_save.append(yq)
            data_cells.append(i)

            # Plot
            pl.figure(figsize=(15, 4))

            pl.subplot(131)
            mesh = np.vstack((xx.ravel(), yy.ravel())).T
            pl.scatter(mesh[:, 0], mesh[:, 1], c='k', marker='.')
            pl.scatter(data[:, 1],
                       data[:, 2],
                       c=data[:, 0],
                       marker='*',
                       cmap='jet')
            pl.colorbar()
            pl.xlim([0, 20])
            pl.ylim([-5, 30])
            pl.title('data')

            pl.subplot(132)
            pl.scatter(Xq[:, 0], Xq[:, 1], c=y0[:], cmap='jet')
            pl.colorbar()
            pl.scatter(X[:, 0] * 0.9, X[:, 1] * 0.9, c='k', marker='+')
            pl.title('data and extimated distribution')

            pl.subplot(133, projection='polar')
            pl.polar(Thq, yq)
            pl.title('polar plot')
            #pl.show()
            pl.savefig('outputs/unimodal_sim1_cell{}'.format(i))
        except:
            print(' skipped...')
            continue

コード例 #11

0

ファイルを表示

ファイル: small_mix_3d.py プロジェクト: MeMAD-project/trecvid-vsum

cdists = []
for center in skm.cluster_centers_:
    cdists.append(np.linalg.norm(mus[0] - center))

skm_mu_0_idx = np.argmin(cdists)
skm_mu_1_idx = 1 - skm_mu_0_idx

skm_mu_0_error = np.linalg.norm(mus[0] - skm.cluster_centers_[skm_mu_0_idx])
skm_mu_1_error = np.linalg.norm(mus[1] - skm.cluster_centers_[skm_mu_1_idx])

###############################################################################
# Mixture of von Mises Fisher clustering (soft)
vmf_soft = VonMisesFisherMixture(n_clusters=2,
                                 posterior_type='soft',
                                 n_init=20)
vmf_soft.fit(X)

cdists = []
for center in vmf_soft.cluster_centers_:
    cdists.append(np.linalg.norm(mus[0] - center))

vmf_soft_mu_0_idx = np.argmin(cdists)
vmf_soft_mu_1_idx = 1 - vmf_soft_mu_0_idx

vmf_soft_mu_0_error = np.linalg.norm(
    mus[0] - vmf_soft.cluster_centers_[vmf_soft_mu_0_idx])
vmf_soft_mu_1_error = np.linalg.norm(
    mus[1] - vmf_soft.cluster_centers_[vmf_soft_mu_1_idx])

###############################################################################
# Mixture of von Mises Fisher clustering (hard)

コード例 #12

0

ファイルを表示

class Clusterer:
    def __init__(self,
                 data,
                 n_cluster,
                 method="soft-movMF",
                 init="random-class",
                 n_init=10,
                 n_jobs=1):
        self.data = data
        self.n_cluster = n_cluster
        self.method = method

        if method == "spk":
            self.clus = SphericalKMeans(n_clusters=n_cluster)
        elif method == "hard-movMF":
            self.clus = VonMisesFisherMixture(n_clusters=n_cluster,
                                              posterior_type='hard',
                                              init=init,
                                              n_init=n_init,
                                              n_jobs=n_jobs)
        elif method == "soft-movMF":
            self.clus = VonMisesFisherMixture(n_clusters=n_cluster,
                                              posterior_type='soft',
                                              init=init,
                                              n_init=n_init,
                                              n_jobs=n_jobs)

        self.clusters = {
        }  # cluster id -> dict(element_id: distance to center)
        self.clusters_phrase = {}  # cluster id -> representative words
        self.membership = None  # a list contain the membership of the data points
        self.center_ids = None  # a list contain the ids of the cluster centers
        self.inertia_scores = None

    def fit(self, debug=False):
        start = time.time()
        self.clus.fit(self.data)
        end = time.time()
        print("Finish fitting data of size %s using %s seconds" %
              (self.data.shape, (end - start)))
        self.inertia_scores = self.clus.inertia_
        print('Clustering inertia score (smaller is better):',
              self.inertia_scores)

        labels = self.clus.labels_
        self.membership = labels

        if debug:
            print("Labels:", labels)
            # print("cluster_centers_:", self.clus.cluster_centers_)
            if self.method != "spk":
                print("concentrations_:", self.clus.concentrations_)
                print("weights_:", self.clus.weights_)
                print("posterior_:", self.clus.posterior_)

        for idx, label in enumerate(labels):
            cluster_center = self.clus.cluster_centers_[int(label)]
            consine_sim = self.calc_cosine(self.data[idx], cluster_center)
            if label not in self.clusters:
                self.clusters[label] = {}
                self.clusters[label][idx] = consine_sim
            else:
                self.clusters[label][idx] = consine_sim

        for cluster_id in range(self.n_cluster):
            self.clusters_phrase[cluster_id] = sorted(
                self.clusters[cluster_id].items(), key=lambda x: -x[1])

        # self.center_ids = self.gen_center_idx()

    # find the idx of each cluster center
    def gen_center_idx(self):
        ret = []
        for cluster_id in range(self.n_cluster):
            center_idx = self.find_center_idx_for_one_cluster(cluster_id)
            ret.append((cluster_id, center_idx))
        return ret

    def find_center_idx_for_one_cluster(self, cluster_id):
        query_vec = self.clus.cluster_centers_[cluster_id]
        members = self.clusters[cluster_id]
        best_similarity, ret = -1, -1
        for member_idx in members:
            member_vec = self.data[member_idx]
            cosine_sim = self.calc_cosine(query_vec, member_vec)
            if cosine_sim > best_similarity:
                best_similarity = cosine_sim
                ret = member_idx
        return ret

    def calc_cosine(self, vec_a, vec_b):
        return 1 - cosine(vec_a, vec_b)

    def find_phrase_rank(self, phrase, cluster_id):
        for idx, ele in enumerate(self.clusters_phrase[cluster_id]):
            if ele[0] == phrase:
                return (idx + 1)
        return -1

    def senity_check(self):
        for phrase_id in range(self.data.shape[0]):
            cluster_member = self.membership[phrase_id]
            result = []
            for cluster_id in range(self.n_cluster):
                cluster_rank = self.find_phrase_rank(phrase_id, cluster_id)
                sim = self.calc_cosine(self.data[phrase_id],
                                       self.clus.cluster_centers_[cluster_id])
                if sim < 0:
                    print(phrase_id, sim)
                    return
                # result.append((cluster_id, cluster_rank, sim))
            # print("Put in cluster: %s" % cluster_member)
            # print("Rank information in all clusters: %s" % str(result))

    def explore(self, keyword2id=None, id2keyword=None, iteractive=False):
        for cluster_id in range(self.n_cluster):
            print("Cluster %s top keywords" % cluster_id)
            for rank, keyword_id in enumerate(
                    self.clusters_phrase[cluster_id][0:10]):
                print("Rank:%s keywords:%s (score=%s)" %
                      (rank + 1, id2keyword[keyword_id[0]], keyword_id[1]))
            print("=" * 80)

        if iteractive:
            while (True):
                phrase = input(
                    'Input keyword (use "_" to concat tokens for phrase): ')
                if len(phrase) == 0:
                    break

                if phrase not in keyword2id:
                    print("Out of vocabulary keyword, please try again")
                    continue
                else:
                    phrase_id = keyword2id[phrase]
                    cluster_member = self.membership[phrase_id]
                    result = []
                    for cluster_id in range(self.n_cluster):
                        cluster_rank = self.find_phrase_rank(
                            phrase_id, cluster_id)
                        sim = self.calc_cosine(
                            self.data[phrase_id],
                            self.clus.cluster_centers_[cluster_id])
                        result.append((cluster_id, cluster_rank, sim))
                    print("Put in cluster: %s" % cluster_member)
                    print("Rank information in all clusters: %s" % str(result))

コード例 #13

0

ファイルを表示


input_X_s, input_Y_s, input_mu_s, processMode, verboseMode = parseArguments()

X_s = np.load(input_X_s)
Y_s = np.load(input_Y_s)
mu_s = np.load(input_mu_s)

num_clusters = np.unique(Y_s).size

print(num_clusters)

if processMode == 'soft':
    vmf_model = VonMisesFisherMixture(n_clusters=num_clusters,
                                      posterior_type='soft')
    vmf_model.fit(X_s)
elif processMode == 'hard':
    vmf_model = VonMisesFisherMixture(n_clusters=num_clusters,
                                      posterior_type='hard')
    vmf_model.fit(X_s)

estimated_kappa_s = vmf_model.concentrations_
estimated_mu_s = vmf_model.cluster_centers_

print(estimated_kappa_s)

cross_distance_s = cdist(estimated_mu_s, mu_s, metric='cosine')
print(cross_distance_s)

#print( vmf_model.labels_          )
#print( vmf_model.weights_         )

コード例 #14

0

ファイルを表示

ファイル: moVMF_clustering.py プロジェクト: AnshThakur/DE

# In[6]:

import scipy.io as sio
from spherecluster import VonMisesFisherMixture  # using spherecluster package : https://pypi.org/project/spherecluster/0.1.2/

# In[10]:

train = sio.loadmat('./vmf_data.mat')  # load segmented frames or superframes
data = train['ps']
print(data.shape)

# In[11]:

vmf_soft = VonMisesFisherMixture(n_clusters=15, posterior_type='soft')
vmf_soft.fit(data)

# In[15]:

mean_directions = vmf_soft.cluster_centers_
print(mean_directions.shape)

# In[18]:

concen = vmf_soft.concentrations_
ind = concen.argsort()[-10:][::-1]
print(concen)
print(ind)
mean_directions = mean_directions[ind, :]
print(mean_directions.shape)