Ejemplo n.º 1
0
def srumma(g_a, g_b, g_c, chunk_size, multiplier, g_counter):
    # statically partition the task list among nprocs
    task_list = get_task_list(chunk_size, multiplier)
    task_id = ga.read_inc(g_counter, 0)
    # the srumma algorithm, more or less
    task_prev = task_list[task_id]
    a_prev, a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi)
    b_prev, b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi)
    task_id = ga.read_inc(g_counter, 0)
    while task_id < multiplier**3:
        task_next = task_list[task_id]
        a_next, a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi)
        b_next, b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi)
        ga.nbwait(a_nb_prev)
        ga.nbwait(b_nb_prev)
        result = np.dot(a_prev, b_prev)
        ga.acc(g_c, result, task_prev.clo, task_prev.chi)
        task_prev = task_next
        a_prev, a_nb_prev = a_next, a_nb_next
        b_prev, b_nb_prev = b_next, b_nb_next
        task_id = ga.read_inc(g_counter, 0)
    ga.nbwait(a_nb_prev)
    ga.nbwait(b_nb_prev)
    result = np.dot(a_prev, b_prev)
    ga.acc(g_c, result, task_prev.clo, task_prev.chi)
    ga.sync()
Ejemplo n.º 2
0
def time_acc(g_a, lo, hi, buf, chunk, jump, local):
    count = 0
    rows = hi[0] - lo[0]
    cols = hi[1] - lo[1]
    shifti = [rows, 0, rows]
    shiftj = [0, cols, cols]
    seconds = time.time()
    # distance between consecutive patches increased by jump
    # to destroy locality of reference
    for ilo in range(lo[0], hi[0] - chunk - jump + 1, chunk + jump):
        ihi = ilo + chunk
        for jlo in range(lo[1], hi[1] - chunk - jump + 1, chunk + jump):
            jhi = jlo + chunk
            count += 1
            if local:
                llo = [ilo, jlo]
                lhi = [ihi, jhi]
                ga.acc(g_a, buf[ga.zip(llo, lhi)], llo, lhi, 1)
            else:
                index = count % 3
                llo = [ilo + shifti[index], jlo + shiftj[index]]
                lhi = [ihi + shifti[index], jhi + shiftj[index]]
                ga.acc(g_a, buf[ilo:ihi, jlo:jhi], llo, lhi, 1)
    seconds = time.time() - seconds
    return seconds / count
Ejemplo n.º 3
0
def time_acc(g_a, lo, hi, buf, chunk, jump, local):
    count = 0
    rows = hi[0]-lo[0]
    cols = hi[1]-lo[1]
    shifti = [rows, 0, rows]
    shiftj = [0, cols, cols]
    seconds = time.time()
    # distance between consecutive patches increased by jump
    # to destroy locality of reference
    for ilo in range(lo[0], hi[0]-chunk-jump+1, chunk+jump):
        ihi = ilo + chunk
        for jlo in range(lo[1], hi[1]-chunk-jump+1, chunk+jump):
            jhi = jlo + chunk
            count += 1
            if local:
                llo = [ilo,jlo]
                lhi = [ihi,jhi]
                ga.acc(g_a, buf[ga.zip(llo,lhi)], llo, lhi, 1)
            else:
                index = count%3
                llo = [ilo+shifti[index],jlo+shiftj[index]]
                lhi = [ihi+shifti[index],jhi+shiftj[index]]
                ga.acc(g_a, buf[ilo:ihi,jlo:jhi], llo, lhi, 1)
    seconds = time.time() - seconds
    return seconds/count
def srumma(g_a, g_b, g_c, chunk_size, multiplier, g_counter):
    # statically partition the task list among nprocs
    task_list = get_task_list(chunk_size, multiplier)
    task_id = ga.read_inc(g_counter, 0)
    # the srumma algorithm, more or less
    task_prev = task_list[task_id]
    a_prev,a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi)
    b_prev,b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi)
    task_id = ga.read_inc(g_counter, 0)
    while task_id < multiplier**3:
        task_next = task_list[task_id]
        a_next,a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi)
        b_next,b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi)
        ga.nbwait(a_nb_prev)
        ga.nbwait(b_nb_prev)
        result = np.dot(a_prev,b_prev)
        ga.acc(g_c, result, task_prev.clo, task_prev.chi)
        task_prev = task_next
        a_prev,a_nb_prev = a_next,a_nb_next
        b_prev,b_nb_prev = b_next,b_nb_next
        task_id = ga.read_inc(g_counter, 0)
    ga.nbwait(a_nb_prev)
    ga.nbwait(b_nb_prev)
    result = np.dot(a_prev,b_prev)
    ga.acc(g_c, result, task_prev.clo, task_prev.chi)
    ga.sync()
Ejemplo n.º 5
0
def srumma(g_a, g_b, g_c, chunk_size, multiplier):
    # statically partition the task list among nprocs
    task_list = get_task_list(chunk_size, multiplier)
    ntasks = multiplier**3 // nproc
    start = me * ntasks
    stop = (me + 1) * ntasks
    if me + 1 == nproc:
        stop += multiplier**3 % nproc
    # the srumma algorithm, more or less
    task_prev = task_list[start]
    a_prev, a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi)
    b_prev, b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi)
    for i in range(start + 1, stop):
        task_next = task_list[i]
        a_next, a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi)
        b_next, b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi)
        ga.nbwait(a_nb_prev)
        ga.nbwait(b_nb_prev)
        result = np.dot(a_prev, b_prev)
        ga.acc(g_c, result, task_prev.clo, task_prev.chi)
        task_prev = task_next
        a_prev, a_nb_prev = a_next, a_nb_next
        b_prev, b_nb_prev = b_next, b_nb_next
    ga.nbwait(a_nb_prev)
    ga.nbwait(b_nb_prev)
    result = np.dot(a_prev, b_prev)
    ga.acc(g_c, result, task_prev.clo, task_prev.chi)
    ga.sync()
Ejemplo n.º 6
0
def srumma(g_a, g_b, g_c, chunk_size, multiplier, g_counter):
    task_list = get_task_list(chunk_size, multiplier)
    ### get first integer from g_counter and assign to 'task_id'
    # the srumma algorithm, more or less
    task_prev = task_list[task_id]
    a_prev,a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi)
    b_prev,b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi)
    ### get next integer from g_counter and assign to 'task_id'
    while task_id < multiplier**3:
        task_next = task_list[task_id]
        a_next,a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi)
        b_next,b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi)
        ga.nbwait(a_nb_prev)
        ga.nbwait(b_nb_prev)
        result = np.dot(a_prev,b_prev)
        ga.acc(g_c, result, task_prev.clo, task_prev.chi)
        task_prev = task_next
        a_prev,a_nb_prev = a_next,a_nb_next
        b_prev,b_nb_prev = b_next,b_nb_next
        ### get next integer from g_counter and assign to 'task_id'
    ga.nbwait(a_nb_prev)
    ga.nbwait(b_nb_prev)
    result = np.dot(a_prev,b_prev)
    ga.acc(g_c, result, task_prev.clo, task_prev.chi)
    ga.sync()
Ejemplo n.º 7
0
def srumma(g_a, g_b, g_c, chunk_size, multiplier):
    # statically partition the task list among nprocs
    task_list = get_task_list(chunk_size, multiplier)
    ntasks = multiplier**3 // nproc
    start = me*ntasks
    stop = (me+1)*ntasks
    if me+1 == nproc:
        stop += multiplier**3 % nproc
    # the srumma algorithm, more or less
    task_prev = task_list[start]
    a_prev,a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi)
    b_prev,b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi)
    for i in range(start+1,stop):
        task_next = task_list[i]
        a_next,a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi)
        b_next,b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi)
        ga.nbwait(a_nb_prev)
        ga.nbwait(b_nb_prev)
        result = np.dot(a_prev,b_prev)
        ga.acc(g_c, result, task_prev.clo, task_prev.chi)
        task_prev = task_next
        a_prev,a_nb_prev = a_next,a_nb_next
        b_prev,b_nb_prev = b_next,b_nb_next
    ga.nbwait(a_nb_prev)
    ga.nbwait(b_nb_prev)
    result = np.dot(a_prev,b_prev)
    ga.acc(g_c, result, task_prev.clo, task_prev.chi)
    ga.sync()
Ejemplo n.º 8
0
def time_acc1(g_a, lo, hi, buf, chunk, jump, local):
    # Note: differs from test.F because the passed buffer must be the same
    # size/shape as the patch. The slicing should be fast as the buffer is 1D
    # and contiguous (and so is the slice).
    count = 0
    rows = hi[0]-lo[0]
    shift = [rows, 2*rows, 3*rows]
    seconds = time.time()
    # distance between consecutive patches increased by jump
    # to destroy locality of reference
    for ilo in range(lo[0], hi[0]-chunk-jump+1, chunk+jump):
        ihi = ilo+chunk
        count += 1
        if local:
            ga.acc(g_a, buf[ilo:ihi], [ilo], [ihi], 1.0)
        else:
            index = count%3
            ga.acc(g_a, buf[ilo:ihi], ilo+shift[index], ihi+shift[index], 1.0)
    seconds = time.time() - seconds
    return seconds/count
Ejemplo n.º 9
0
def time_acc1(g_a, lo, hi, buf, chunk, jump, local):
    # Note: differs from test.F because the passed buffer must be the same
    # size/shape as the patch. The slicing should be fast as the buffer is 1D
    # and contiguous (and so is the slice).
    count = 0
    rows = hi[0] - lo[0]
    shift = [rows, 2 * rows, 3 * rows]
    seconds = time.time()
    # distance between consecutive patches increased by jump
    # to destroy locality of reference
    for ilo in range(lo[0], hi[0] - chunk - jump + 1, chunk + jump):
        ihi = ilo + chunk
        count += 1
        if local:
            ga.acc(g_a, buf[ilo:ihi], [ilo], [ihi], 1.0)
        else:
            index = count % 3
            ga.acc(g_a, buf[ilo:ihi], ilo + shift[index], ihi + shift[index],
                   1.0)
    seconds = time.time() - seconds
    return seconds / count
Ejemplo n.º 10
0
def check_accumulate_overlap(gatype):
    if 0 == me:
        print '> Checking overlapping accumulate ...',
    g_a = create_global_array(gatype)
    ga.zero(g_a)
    ga.acc(g_a, [1], (n/2,n/2), (n/2+1,n/2+1), 1)
    ga.sync()
    if MIRROR:
        if 0 == iproc:
            x = abs(ga.get(g_a, (n/2,n/2), (n/2+1,n/2+1))[0,0] - lprocs)
            if not 0 == x:
                ga.error('overlapping accumulate failed -- expected %s got %s'%(
                        x, lprocs))
    else:
        if 0 == me:
            x = abs(ga.get(g_a, (n/2,n/2), (n/2+1,n/2+1))[0,0] - nproc)
            if not 0 == x:
                ga.error('overlapping accumulate failed -- expected %s got %s'%(
                        x, nproc))
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Ejemplo n.º 11
0
def check_accumulate_disjoint(gatype):
    """Each node accumulates into disjoint sections of the array."""
    if 0 == me:
        print '> Checking disjoint accumulate ...',
    g_a = create_global_array(gatype)
    a = create_local_a(gatype)
    b = np.fromfunction(lambda i,j: i+j+2, (n,n), dtype=ga.dtype(gatype))
    if 0 == me:
        ga.put(g_a, a)
    ga.sync()
    inc = (n-1)/20 + 1
    ij = 0
    for i in range(0,n,inc):
        for j in range(0,n,inc):
            x = 10.0
            lo = [i,j]
            hi = [min(i+inc,n), min(j+inc,n)]
            piece = b[ga.zip(lo,hi)]
            check = False
            if MIRROR:
                check = ij % lprocs == iproc
            else:
                check = ij % nproc == me
            if check:
                ga.acc(g_a, piece, lo, hi, x)
            ga.sync()
            ij += 1
            # each process applies all updates to its local copy
            a[ga.zip(lo,hi)] += x * piece
    ga.sync()
    # all nodes check all of a
    if not np.all(ga.get(g_a) == a):
        ga.error('acc failed')
    if 0 == me:
        print 'OK'
    ga.destroy(g_a)
Ejemplo n.º 12
0
nprocs = ga.nnodes()
myrank = ga.nodeid()

g_pi = ga.create(ga.C_DBL, [1])

one_time = False
if len(sys.argv) == 2:
    n = int(sys.argv[1])
    one_time = True

while True:
    if not one_time:
        if myrank == 0:
            n = get_n()
            n = ga.brdcst(n)
        else:
            n = ga.brdcst(0)
        if n == 0:
            break
    ga.zero(g_pi)
    mypi = comp_pi(n, myrank, nprocs)
    ga.acc(g_pi, mypi)
    ga.sync()
    if myrank == 0:
        pi = ga.get(g_pi)[0]
        prn_pi(pi, PI)
    if one_time:
        break

ga.destroy(g_pi)
Ejemplo n.º 13
0
nprocs = ga.nnodes()
myrank = ga.nodeid()

g_pi = ga.create(ga.C_DBL, [1])

one_time = False
if len(sys.argv) == 2:
    n = int(sys.argv[1])
    one_time = True

while True:
    if not one_time:
        if myrank == 0:
            n = get_n()
            n = ga.brdcst(n)
        else:
            n = ga.brdcst(0)
        if n == 0:
            break
    ga.zero(g_pi)
    mypi = comp_pi(n, myrank, nprocs)
    ga.acc(g_pi, mypi)
    ga.sync()
    if myrank == 0:
        pi = ga.get(g_pi)[0]
        prn_pi(pi, PI)
    if one_time:
        break

ga.destroy(g_pi)