Ejemplo n.º 1
0
def predict(master, input, center, centers):
    """
    Predict the closest clusters for the datapoints in input.
    """
    from kclustering_pipeline import Estimate
    job = Estimate()
    job.pipeline = [("split",
                 Stage("k_cluster_predict", input_chain =
                     [task_input_stream, reader], init = simple_init,
                       process = predict_map))]
    job.params = center
    job.params['centers'] = centers
    job.run(input = input)

    return job.wait()
Ejemplo n.º 2
0
def estimate(master, input, center, k, iterations):
    """
    Optimize k-clustering for `iterations` iterations with cluster
    center definitions as given in `center`.
    """
    from kclustering_pipeline import Estimate
    job = Estimate()
    job.pipeline = [("split",
                 Stage("k_cluster_init_map", input_chain =
                     [task_input_stream, reader], init = map_init,
                       process = random_init_map)),
                ('group_label',
                 Stage("k_cluster_init_reduce", process = estimate_reduce, init = simple_init))]
    job.params = center
    job.params['seed'] = 0
    job.params['k'] = k


    job.run(input = input)
    centers = [(i,c) for i,c in result_iterator(job.wait())]
    job.purge()

    for j in range(iterations):
        job = Estimate()
        job.params = center
        job.params['k'] = k
        job.params['centers'] = centers

        job.pipeline = [('split', Stage("kcluster_map_iter_%s" %(j,),
                input_chain = [task_input_stream, reader],
                process=estimate_map, init = simple_init)),
            ('group_label', Stage("kcluster_reduce_iter_%s" %(j,),
                process=estimate_reduce, init = simple_init))]
        job.run(input = input)
        centers = [(i,c) for i,c in result_iterator(job.wait())]
        job.purge()

    return centers