Python Clustering.fit Examples

Programming Language: Python

Namespace/Package Name: clustering

Class/Type: Clustering

Method/Function: fit

Examples at hotexamples.com: 6

Python Clustering.fit - 6 examples found. These are the top rated real world Python examples of clustering.Clustering.fit extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Clustering(30)

fit(6)

k_means(4)

createDistanceMatrix(3)

load(3)

buildTree(3)

fill_clusters(3)

run(3)

cluster(3)

createLevelMatrix(3)

lab_to_labels(2)

merge(2)

min_link(2)

mwk_means(2)

imwk_means(2)

ik_means(2)

start(2)

spectral(2)

melhor_da_geracao(1)

inputType(1)

kMeans(1)

thrEstimation(1)

kmeans(1)

kmeans_clustering(1)

kmeans_fit(1)

list_cluster(1)

spectral_decomp(1)

mds_decomp(1)

preProcess(1)

predict(1)

selecao(1)

run_spectral_rotate(1)

run_clustering(1)

mutacao(1)

run_SKMeans_137(1)

mwpam(1)

nn_centers(1)

proclus(1)

plot(1)

print_clusters(1)

pp_distance(1)

pam(1)

get_double_centre(1)

get_label_mapping(1)

dbscan_clustering(1)

MDS_decomp(1)

addnext(1)

agnes_clustering(1)

ahc_fit(1)

avgTree(1)

Example #1

Show file

File: cv_analysis.py Project: stan-hua/pca-clustering

def get_cv_cpv(x: str, percent: float) -> float:
    global model_goal
    # Get dataset number
    dataset_num = get_dataset_num(x)

    # Get number of pcs for CPV > 0.8 and CPV > 0.99
    if percent == 0.99:
        pcs_cpv = df_selection.loc[dataset_num, "Cum. Perc. Var. (0.99)"]
    else:
        pcs_cpv = df_selection.loc[dataset_num, "Cum. Perc. Var. (0.8)"]

    # Get df_results
    df = pd.read_csv(x)
    idx = df.features_kept == pcs_cpv
    try:
        return df.loc[idx].cv.values[0]
    except:
        inputs = Inputs(paths)
        inputs.random_seed = 1969
        inputs.get_df_split(dataset_num)

        pca_model = get_pca_model(inputs)

        cluster_model = Clustering(inputs.num_cluster, 100, inputs.random_seed)
        cluster_model.fit(pca_model.pcs_train.loc[:, :pcs_cpv - 1])
        cluster_prediction = cluster_model.predict(
            pca_model.pcs_test.loc[:, :pcs_cpv - 1])
        cluster_performances = cluster_model.get_cluster_performances(
            inputs.df_test.copy(),
            cluster_prediction,
            pcs_cpv,
            inputs.num_cluster,
            model_goal=model_goal)
        return variation(cluster_performances)

Example #2

Show file

 def cluster(self, shapelets):
     """
     Uses a clustering algorithm to reduce the number of shapelets.
     :param shapelets: list of shapelet candidates
     :type shapelets: np.array, shape = (len(shapelets), len(s), len(dim(s)))
     :return: list of remaining shapelet candidates
     :rtype np.array, shape = (|remaining candidates|, len(s), len(dim(s)))
     """
     clustering = Clustering(self.d_max)
     clustering.fit(shapelets)
     return clustering.nn_centers()

Example #3

Show file

def perform_clustering(
        term_ids_to_embs: Dict[int, List[float]]) -> Dict[int, Set[int]]:
    """Cluster the given terms into 5 clusters.

    Args:
        term_ids_to_embs: A dictionary mapping term-ids to their
            embeddings.
    Return:
        A dictionary of mapping each cluster label to its cluster.
        Each cluster is a set of term-ids.
    """
    # Case less than 5 terms to cluster.
    num_terms = len(term_ids_to_embs)
    if num_terms < 5:
        clusters = {}
        for i, tid in enumerate(term_ids_to_embs):
            clusters[i] = {tid}
        return clusters

    # Case more than 5 terms to cluster.
    c = Clustering()
    term_ids_embs_items = [(k, v) for k, v in term_ids_to_embs.items()]
    results = c.fit([it[1] for it in term_ids_embs_items])
    labels = results['labels']
    print('  Density:', results['density'])
    clusters = defaultdict(set)
    for i in range(len(term_ids_embs_items)):
        term_id = term_ids_embs_items[i][0]
        label = labels[i]
        clusters[label].add(term_id)
    return clusters

Example #4

Show file

def clustering(x, df, n_clusters=10, distance='angular', method='K-medians'):
    """
  Do the clustering, based on the 91 features.
  Args:
	  x: array of features
	  df: dataframe of features
	  n_clusters: number of clusters
	  distance: could be 'angular' or 'euclidean';
      method: could be 'K-medians', 'K-means', 'Hierarchical'
  Output:
	  new_df: the labeled dataframe, according to the clustering algorithm
	  relevant_features_cs: a list with the relevant features (angles of the consecutive limbs) of the centroids
	  cs: dictionary with the centroid features 
  """

    relevant_features_id = [
        0, 3, 5, 13, 15, 17, 25, 46, 47, 56, 64, 65, 76, 77, 83, 85, 90
    ]
    keys_dict = [
        '0-1', '0-4', '0-6', '1-2', '1-4', '1-6', '2-3', '4-5', '4-6', '5-7',
        '6-8', '6-9', '8-9', '8-10', '9-12', '10-11', '12-13'
    ]

    clustering_ = Clustering(k=n_clusters, distance=distance, method=method)
    cs, cls = clustering_.fit(x)

    assert len(list(cls.keys())) == n_clusters

    d = pd.DataFrame()
    l = []
    for i in range(n_clusters):
        df1 = pd.DataFrame(cls[i])
        d = pd.concat([d, df1], sort=False)
        l += [i] * len(cls[i])

    d.columns = df.columns
    d.insert(91, 'label', l)

    new_df = df.reset_index().merge(d).set_index('index')

    relevant_features_cs = []
    if method == 'Hierarchical':
        pass
    else:
        for i in range(len(cs)):
            d = {}
            cs_rf = cs[i][relevant_features_id]
            for k in range(len(keys_dict)):
                d[keys_dict[k]] = cs_rf[k]
            relevant_features_cs.append(d)

    return new_df, relevant_features_cs, cs

Example #5

Show file

File: Pose_rec.py Project: L9L4/POSE-ID-on

def clustering(x, n_clusters):
    """
  Do the clustering, based on the 91 features. 
  We compute the reconstructed poses only with the following default parameters:
    method: 'K-Medians'
    distance: 'angular'
  Args:
    x: array of features
    n_clusters: number of clusters
  Output:
    new_df: the labeled dataframe, according to the clustering algorithm
    relevant_features_cs: a list with the relevant features (angles of the consecutive limbs) of the centroids
    cs: dictionary with the centroid features 
  """

    clustering_ = Clustering(k=n_clusters)
    cs, cls = clustering_.fit(x)
    d = pd.DataFrame()
    l = []
    for i in range(len(cs)):
        df1 = pd.DataFrame(cls[i])
        d = pd.concat([d, df1], sort=False)
        l += [i] * len(cls[i])

    d.columns = df.columns
    d.insert(91, 'label', l)

    new_df = df.reset_index().merge(d).set_index('index')

    assert len(cs) == n_clusters

    relevant_features_cs = []
    for i in range(len(cs)):
        d = {}
        cs_rf = cs[i][relevant_features_id]
        for k in range(len(keys_dict)):
            d[keys_dict[k]] = cs_rf[k]
        relevant_features_cs.append(d)

    return new_df, relevant_features_cs, cs

Example #6

Show file

from clustering import Clustering
# from embeddings import *

words = [
    'computer', 'algorithm', 'program', 'bear', 'cat', 'snake', 'fish', 'tree',
    'flower', 'gras', 'tea', 'water', 'milk'
]

# embedder = FastTextE()
# embedder.load_model()
emb_dict = {}
with open('fasttext-wiki-news-300d-1M.vec', 'r', encoding='utf8') as f:
    for line in f:
        sp_line = line.split(' ')
        token, vector = sp_line[0], sp_line[1:]
        emb_dict[token] = vector
word_embeddings = [emb_dict[word] for word in words]
clus = Clustering()
print(clus)
print(clus.clus_type)
print(clus.affinity)
print(clus.fit(word_embeddings))