def kmeans_frag(numV, k, dim, epsilon, maxIterations, numFrag):
    from pycompss.api.api import compss_wait_on
    import time
    size = int(numV / numFrag)

    startTime = time.time()
    X = [genFragment(size, dim) for _ in range(numFrag)]
    print("Points generation Time {} (s)".format(time.time() - startTime))

    mu = init_random(dim, k)
    oldmu = []
    n = 0
    startTime = time.time()
    while not has_converged(mu, oldmu, epsilon, n, maxIterations):
        oldmu = mu
        clusters = [
            cluster_points_partial(X[f], mu, f * size) for f in range(numFrag)
        ]
        partialResult = [
            partial_sum(X[f], clusters[f], f * size) for f in range(numFrag)
        ]

        mu = merge_reduce(reduceCentersTask, partialResult)
        mu = compss_wait_on(mu)
        mu = [mu[c][1] / mu[c][0] for c in mu]
        print(mu)
        n += 1
    print("Kmeans Time {} (s)".format(time.time() - startTime))
    return (n, mu)
Example #2
0
def test_merge_reduce():
    from pycompss.functions.reduce import merge_reduce

    data = list(range(11))

    def accumulate(a, b):
        return a + b

    result = merge_reduce(accumulate, data)

    assert result == 55, "ERROR: Got unexpected result with merge_reduce."
Example #3
0
def pearson(data_x, data_y, mx, my):
    """
    Calculate the pearson coefficient.

    :param data_x: X data elements
    :param data_y: Y data elements
    :param mx: MX
    :param my: MY
    :return: The pearson coefficient.
    """

    xs = [_norm(x, mx) for x in data_x]
    ys = [_norm(y, my) for y in data_y]
    xxs = [_pow(x, 2) for x in xs]
    yys = [_pow(y, 2) for y in ys]

    suma = merge_reduce(reduce_add,
                        [mult_frag(a, b) for (a, b) in zip(xs, ys)])

    sum_x = merge_reduce(reduce_add, map(_add, xxs))
    sum_y = merge_reduce(reduce_add, map(_add, yys))
    r = op_task(sum_x, sum_y, suma)
    return r
Example #4
0
def mean(data, wait=False):
    """
    Arithmetic mean.

    :param data: chunked data
    :param wait: if we want to wait for result. Default False
    :return: mean of data.
    """

    n = _list_lenght(data)
    result = merge_reduce(reduce_add, [_mean(x, n) for x in data])
    if wait:
        from pycompss.api.api import compss_wait_on
        result = compss_wait_on(result)
    return result
Example #5
0
def mean(data, n, wait=False):
    """
    Calculate the mean of a list,

    :param data: List of elements
    :param n: Number of elements
    :param wait: <Boolean> Wait for the result
    :return: Mean
    """

    result = merge_reduce(reduce_add, [_mean(x, n) for x in data])
    if wait:
        from pycompss.api.api import compss_wait_on
        result = compss_wait_on(result)
    return result
Example #6
0
def std(data, m, n, wait=False):
    """
    Calculate the standard deviation.

    :param data: List of elements
    :param m: M
    :param n: N
    :param wait: Wait for the result
    :return: the standard deviation
    """

    xs = [_norm(x, m) for x in data]
    xp = [_pow(x, 2) for x in xs]
    sum_a = merge_reduce(reduce_add, [_mean(x, n) for x in xp])
    if wait:
        from pycompss.api.api import compss_wait_on
        sum_a = compss_wait_on(sum_a)
    return sum_a
Example #7
0
def kmeans(data, k, num_frag=-1, max_iterations=10, epsilon=1e-4,
           init_mode='random'):
    """
    kmeans: starting with a set of randomly chosen initial centers,
    one repeatedly assigns each imput point to its nearest center, and
    then recomputes the centers given the point assigment. This local
    search called Lloyd's iteration, continues until the solution does
    not change between two consecutive rounds or iteration > max_iterations.

    :param data: data
    :param k: num of centroids
    :param num_frag: num fragments, if -1 data is considered chunked
    :param max_iterations: max iterations
    :param epsilon: error threshold
    :param init_mode: initialization mode
    :return: list os centroids
    """

    from pycompss.api.api import compss_wait_on

    # Data is already fragmented
    if num_frag == -1:
        num_frag = len(data)
    else:
        # fragment data
        data = [d for d in chunks(data, len(data) / num_frag)]

    mu = init(data, k, init_mode)
    old_mu = []
    n = 0
    size = int(len(data) / num_frag)
    while not has_converged(mu, old_mu, epsilon, n, max_iterations):
        old_mu = list(mu)
        clusters = [cluster_points_partial(data[f], mu, f * size) for f in range(num_frag)]
        partial_result = [partial_sum(data[f], clusters[f], f * size) for f in range(num_frag)]

        mu = merge_reduce(reduce_centers_task, partial_result)
        mu = compss_wait_on(mu)
        mu = [mu[c][1] / mu[c][0] for c in mu]
        n += 1
    return mu
Example #8
0
 def test_merge_reduce(self):
     data = [1, 2, 3, 4]
     res = merge_reduce(sumTask, data)
     res = compss_wait_on(res)
     data = compss_wait_on(data)
     self.assertEqual(res, 10)
Example #9
0
 def test_merge_reduce_seq(self):
     data = [1, 2, 3, 4]
     res = merge_reduce(lambdaFunction, data)
     self.assertEqual(res, 10)
Example #10
0
 def test_merge_reduce(self):
     from pycompss.api.api import compss_wait_on
     res = merge_reduce(self.methodFunction, self.data)
     res = compss_wait_on(res)
     self.assertEqual(res, sum(self.data))
Example #11
0
 def test_merge_reduce_seq(self):
     res = merge_reduce(self.lambdaFunction, self.data)
     self.assertEqual(res, sum(self.data))
Example #12
0
 def testMapMergeReduce(self):
     initial = [1, 2, 3, 4, 5]
     partial = list(map(increment, initial))
     result = merge_reduce(accumulate, partial)
     result = compss_wait_on(result)
     self.assertEqual(result, 20)