Esempio n. 1
0
def pearson(X, Y, mx, my):
    xs = [_norm(x, mx) for x in X]
    ys = [_norm(y, my) for y in Y]
    xxs = [_pow(x, 2) for x in xs]
    yys = [_pow(y, 2) for y in ys]

    suma = mergeReduce(reduce_add, [multFrag(a, b) for (a, b) in zip(xs, ys)])

    sum_x = mergeReduce(reduce_add, map(_add, xxs))
    sum_y = mergeReduce(reduce_add, map(_add, yys))
    r = op_task(sum_x, sum_y, suma)
    return r
Esempio n. 2
0
def kmeans_frag(numV, k, dim, epsilon, maxIterations, numFrag):
    from pycompss.api.api import compss_wait_on
    import time
    size = int(numV / numFrag)

    startTime = time.time()
    X = [genFragment(size, dim) for _ in range(numFrag)]
    print("Points generation Time {} (s)".format(time.time() - startTime))

    mu = init_random(dim, k)
    oldmu = []
    n = 0
    startTime = time.time()
    while not has_converged(mu, oldmu, epsilon, n, maxIterations):
        oldmu = mu
        clusters = [
            cluster_points_partial(X[f], mu, f * size) for f in range(numFrag)
        ]
        partialResult = [
            partial_sum(X[f], clusters[f], f * size) for f in range(numFrag)
        ]

        mu = mergeReduce(reduceCentersTask, partialResult)
        mu = compss_wait_on(mu)
        mu = [mu[c][1] / mu[c][0] for c in mu]
        print(mu)
        n += 1
    print("Kmeans Time {} (s)".format(time.time() - startTime))
    return (n, mu)
Esempio n. 3
0
def std(X, m, n, wait=False):
    xs = [_norm(x, m) for x in X]
    xp = [_pow(x, 2) for x in xs]
    suma = mergeReduce(reduce_add, [_mean(x, n) for x in xp])
    if wait:
        from pycompss.api.api import compss_wait_on
        suma = compss_wait_on(suma)
    return suma
Esempio n. 4
0
def  waze_jams(trainfile, hypers, Ntrain, script, gridsList, grid, numFrag, output):
    """
    prepare():
        It contains both the data to be used for hyperparameter learning and
        inference as information regarding the GP prior distribution.

    trainGP():
        It outputs two items per cell: forecasts and hypers. The first items
        contains a Tx2 matrix with predictive mean and variance, where T is
        the number of time intervals required for testing.

        Predictions are in the interval [-1,+1], where predictions closer to -1
        indicate greater probability of being associated with label -1 and
        predictions closer to +1 indicate the opposite scenario.

        These predictions can be turned into probabilities by turning them into
        the interval [0,1]. The second item consists of a vector with learned
        hyperparameters.

    """

    import time
    timestr = str(time.strftime("%Y%m%d_%Hh"))
    gridsList = np.loadtxt(gridsList, delimiter=',', dtype=(int,int), skiprows=1, usecols = (4,5))
    gridsList = gridsList[:,0][gridsList[:, 1] == 1]
    print "[INFO] - {} valid grids".format(len(gridsList))
    config = prepare(trainfile, Ntrain)

    if grid == -1:
        frag_cells = np.array_split(gridsList, numFrag)
    else:
        if grid in gridsList:
            frag_cells = np.array([[grid]])
        else:
            print "[INFO] - Grid #{} is not valid".format(grid)
            return

    output_forecast = ['{}forecasts_part{}_{}.txt'.format(output,f,timestr) for f in range(len(frag_cells))]

    if len(hypers)>0:
        frag_cells    = [load_hypers(hypers,frag_cells[i]) for i in range(len(frag_cells))]
        for f  in range(len(frag_cells)):
            GP(script, config, frag_cells[f],  output_forecast[f])
    else:
        output_hyper  = [GP_hyper(script, config, frag_cells[f],  output_forecast[f]) for f  in range(len(frag_cells))]
        results       = mergeReduce(mergelists,output_hyper)

        from pycompss.api.api import compss_wait_on
        results = compss_wait_on(results)
        np.savetxt( '{}hypers_{}.txt'.format(output,timestr),
                    np.asarray(results), delimiter=',',
                    fmt="%i,%f,%f,%f,%f,%f,%f,%f")
Esempio n. 5
0
def mean(X, wait=False):
    """
    Arithmetic mean
    :param X: chunked data
    :param wait: if we want to wait for result. Default False
    :return: mean of X.
    """
    n = _list_lenght(X)
    result = mergeReduce(reduce_add, [_mean(x, n) for x in X])
    if wait:
        from pycompss.api.api import compss_wait_on
        result = compss_wait_on(result)
    return result
Esempio n. 6
0
def kmeans(data,
           k,
           numFrag=-1,
           maxIterations=10,
           epsilon=1e-4,
           initMode='random'):
    """
    kmeans: starting with a set of randomly chosen initial centers,
    one repeatedly assigns each imput point to its nearest center, and
    then recomputes the centers given the point assigment. This local
    search called Lloyd's iteration, continues until the solution does
    not change between two consecutive rounds or iteration > maxIterations.
    :param data: data
    :param k: num of centroids
    :param numFrag: num fragments, if -1 data is considered chunked
    :param maxIterations: max iterations
    :param epsilon: error threshold
    :return: list os centroids
    """
    from pycompss.api.api import compss_wait_on

    # Data is already fragmented
    if numFrag == -1:
        numFrag = len(data)
    else:
        # fragment data
        data = [d for d in chunks(data, len(data) / numFrag)]

    mu = init(data, k, initMode)
    oldmu = []
    n = 0
    size = int(len(data) / numFrag)
    while not has_converged(mu, oldmu, epsilon, n, maxIterations):
        oldmu = list(mu)
        clusters = [
            cluster_points_partial(data[f], mu, f * size)
            for f in range(numFrag)
        ]
        partialResult = [
            partial_sum(data[f], clusters[f], f * size) for f in range(numFrag)
        ]

        mu = mergeReduce(reduceCentersTask, partialResult)
        mu = compss_wait_on(mu)
        mu = [mu[c][1] / mu[c][0] for c in mu]
        n += 1
    return mu
Esempio n. 7
0
def mean(X, n, wait=False):
    result = mergeReduce(reduce_add, [_mean(x, n) for x in X])
    if wait:
        from pycompss.api.api import compss_wait_on
        result = compss_wait_on(result)
    return result
Esempio n. 8
0
 def test_mergeReduce(self):
     from pycompss.api.api import compss_wait_on
     res = mergeReduce(self.methodFunction, self.data)
     res = compss_wait_on(res)
     self.assertEqual(res, sum(self.data))
Esempio n. 9
0
 def test_mergeReduce_seq(self):
     res = mergeReduce(self.lambdaFunction, self.data)
     self.assertEqual(res, sum(self.data))
Esempio n. 10
0
    parser.add_argument('-g','--grids',  required=True,  help='The input of the grids list file.')
    parser.add_argument('-w','--window', required=False, help='The window time (in seconds) to take in count (default, 3600)', type=int, default=3600,)
    parser.add_argument('-f','--numFrag',required=False, help='Number of workers (cores)', type=int, default=4)
    arg = vars(parser.parse_args())

    filename    = arg['input']
    grids       = arg['grids']
    window_time = arg['window']
    numFrag     = arg['numFrag']

    print """
        Running: Waze-jams's preprocessing script with the following parameters:
         - Input file:   {}
         - Grids file:   {}
         - Window time:       {} seconds
         - Number of workers: {}

    """.format(filename,grids,window_time,numFrag)


    grids = np.genfromtxt(grids, delimiter=',', dtype=None, names=True)


    partial_grid = [preprocessing(grids, window_time, "{}_{}".format(filename,f)) for f in range(numFrag)]
    jam_grids_p = mergeReduce(mergeMatrix, partial_grid)
    jam_grids_p = compss_wait_on(jam_grids_p)
    jam_grids, events = updateJamGrid(jam_grids_p)

    jam_grids.to_csv("output_training.csv",sep=",",index=True,header=False)
    events.to_csv("output_counts.csv",sep=",")