Python Clustering.run Examples

Programming Language: Python

Namespace/Package Name: clustering

Class/Type: Clustering

Method/Function: run

Examples at hotexamples.com: 3

Python Clustering.run - 3 examples found. These are the top rated real world Python examples of clustering.Clustering.run extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Clustering(30)

fit(6)

k_means(4)

createDistanceMatrix(3)

load(3)

buildTree(3)

fill_clusters(3)

run(3)

cluster(3)

createLevelMatrix(3)

lab_to_labels(2)

merge(2)

min_link(2)

mwk_means(2)

imwk_means(2)

ik_means(2)

start(2)

spectral(2)

melhor_da_geracao(1)

inputType(1)

kMeans(1)

thrEstimation(1)

kmeans(1)

kmeans_clustering(1)

kmeans_fit(1)

list_cluster(1)

spectral_decomp(1)

mds_decomp(1)

preProcess(1)

predict(1)

selecao(1)

run_spectral_rotate(1)

run_clustering(1)

mutacao(1)

run_SKMeans_137(1)

mwpam(1)

nn_centers(1)

proclus(1)

plot(1)

print_clusters(1)

pp_distance(1)

pam(1)

get_double_centre(1)

get_label_mapping(1)

dbscan_clustering(1)

MDS_decomp(1)

addnext(1)

agnes_clustering(1)

ahc_fit(1)

avgTree(1)

Example #1

Show file

def main(args):
    #-----------------------------------------------------#
    #             2D/3D Convolutional Autoencoder         #
    #-----------------------------------------------------#
    if args.program == 'CAE':
        cae = CAE(input_dir=args.data_dir,
                  patch_size=ast.literal_eval(args.patch_size),
                  batch_size=args.batch_size,
                  test_size=args.test_size,
                  prepare_batches=args.prepare_batches)

        cae.prepare_data(args.sampler_type, args.max_patches, args.resample,
                         ast.literal_eval(args.patch_overlap),
                         args.min_lab_vox, args.label_prob, args.load_data)
        if args.model_dir is None:
            cae.train(args.epochs)
        cae.predict(args.model_dir)

    #-----------------------------------------------------#
    #               Patient classification                #
    #-----------------------------------------------------#
    """
    if args.program=='AutSeg':
        asg = AutomaticSegmentation(    model_name=args.model_name,
                                        patch_size=args.patch_size,
                                        patch_overlap=args.patch_overlap,
                                        input_dir=args.data_dir, 
                                        model_dir=args.model_dir   )
        asg.run()
        asg.run_postprocessing()

"""
    if args.program == 'CLUS':
        clustering = Clustering(num_iters=args.iterations,
                                num_clusters=args.num_clusters,
                                input_dir=args.data_dir)
        clustering.run()

    if args.program == 'FeEx':
        fe = FeatureExtraction(model_name=args.model_name,
                               patch_size=ast.literal_eval(args.patch_size),
                               patch_overlap=ast.literal_eval(
                                   args.patch_overlap),
                               num_clusters=args.num_clusters,
                               cluster_selection=args.cluster_selection,
                               resample=args.resample,
                               encoded_layer_num=args.encoded_layer_num,
                               model_dir=args.model_dir,
                               input_dir=args.data_dir)
        fe.run(batch_size=20)

    if args.program == 'SVM':
        svm = SvmClassifier(feature_dir=args.feature_dir,
                            ffr_dir=args.ffr_dir,
                            ffr_filename=args.ffr_filename,
                            input_dir=args.data_dir,
                            ffr_cut_off=args.ffr_cut_off,
                            test_size=args.test_size)
        svm.train()
        svm.predict()

Example #2

Show file

    def generate(self, keys, url):
        json_work("other_files/work_file.json", "w", [])  # обнуляем work

        print(f'Ключей получено: {len(keys)}')

        if len(keys) > 0:
            self.generate_pretmp(
                keys
            )  # генерация претемплейтов по ключам c уникальным stemming
            print(f'Ключей после удаления дублей: {len(self.work_file)}')
            time.sleep(2)
            if len(self.work_file) > 0:
                with ThreadPoolExecutor(5) as executor:
                    for _ in executor.map(self.template_generated,
                                          self.work_file):
                        pass
                work = json_work("other_files/work_file.json", "r")
                if len(work) > 0:
                    gen_data = sorted(work,
                                      key=lambda x: x["frequency"]["basic"],
                                      reverse=True)
                    json_work("other_files/work_file.json", "w", gen_data)
                    gen_data += json_work("other_files/main.json", "r")
                    gen_data = sorted(gen_data,
                                      key=lambda x: x["frequency"]["basic"],
                                      reverse=True)
                    json_work("other_files/main.json", "w", gen_data)
                    print(f"url {url} обработан")
                    clustering = Clustering(
                        json_work("other_files/work_file.json", "r"), url)
                    clustering.run()
            else:
                print("Перехожу к следующему url")
        return

Example #3

Show file

File: pipeline.py Project: vinitra/entity-matcher

def run_pipeline(data,
                 y,
                 dataset_id,
                 evaluate=False,
                 verbose=False,
                 optimize_method=False,
                 **kwargs):
    """
    It performs the basic logic pipeline for getting the data, creates blocking and clustering
    and stores the matching pairs.

    :param data: pd.DataFrame, the input dataset
    :param y: pd.DataFrame, the dataset with the actual pairs
    :param dataset_id: int, the id of the input dataset
    :param evaluate: boolean, if evaluation should be run
    :param verbose: boolean, if logging
    :return: pd.DataFrame, with the predicted matching pairs
    """
    cluster_n = kwargs.get('cluster_num', 0)
    distance_threshold = kwargs.get('distance_threshold', 0)
    method = kwargs.get('clustering_method', 'agglomerative')
    encoding = kwargs.get('encoding', 'use')

    # blocking
    if 'title' in data.columns:  # Instantiate correct blocker based on dataset
        blocker = X2Blocker()
        # identify X3
        if optimize_method and ("source" in data.instance_id[0]):
            method = 'cosine'
    elif 'name' in data.columns:
        blocker = X4Blocker()
        data["title"] = data["name"]
        if optimize_method:
            method = 'agglomerative'
            cluster_n = 0
    else:
        raise ValueError("Please add a valid dataset id")

    blocker.fit(data=data)
    # blocks is of type [[instance_id]]
    # list of lists of instance_ids belonging to same group
    # transform returns modified data frame for further use
    blocks, data = blocker.transform()

    # apply clustering for each block to get matching pairs
    clusters = list()

    print("Method: ", method)
    print("Encoding: ", encoding, "\n")
    cls = Clustering(method=method,
                     cluster_n=cluster_n,
                     distance_threshold=distance_threshold,
                     encoding=encoding)
    for block in blocks:
        if len(block) > 1:
            # filter data based on the instance_id's presented in the block
            block_df = data[data['instance_id'].isin(block)]
            clusters_l = cls.run(block_df)

            for c in clusters_l:
                clusters.append(c)

    # create pairs from clusters
    pairs_pred_df = create_pairs(clusters)

    dataset_scores = dict()
    if evaluate:
        # run performance evaluation
        dataset_scores = get_scores(actual=y, pred=pairs_pred_df)
        if verbose:
            print('Precision: {:.3f}'.format(
                dataset_scores['precision_score']))
            print('Recall: {:.3f}'.format(dataset_scores['recall_score']))
            print('F1 score: {:.3f}'.format(dataset_scores['f1_score']))

        dataset_scores['cluster_n'] = cluster_n
        dataset_scores['method'] = method
        dataset_scores['dataset'] = dataset_id
        dataset_scores['encoding'] = encoding
        dataset_scores['threshold'] = distance_threshold

    return pairs_pred_df, dataset_scores