Example #1
0
def srumma(g_a, g_b, g_c, chunk_size, multiplier):
    # statically partition the task list among nprocs
    task_list = get_task_list(chunk_size, multiplier)
    ntasks = multiplier**3 // nproc
    start = me*ntasks
    stop = (me+1)*ntasks
    if me+1 == nproc:
        stop += multiplier**3 % nproc
    # the srumma algorithm, more or less
    task_prev = task_list[start]
    ### use a nonblocking get to request first block and nb handle from 'g_a'
    ###     and assign to 'a_prev' and 'a_nb_prev'
    ### use a nonblocking get to request first block and nb handle from 'g_b'
    ###     and assign to 'b_prev' and 'b_nb_prev'
    for i in range(start+1,stop):
        task_next = task_list[i]
        ### use a nonblocking get to request next block and nb handle from 'g_a'
        ###     and assign to 'a_next' and 'a_nb_next'
        ### use a nonblocking get to request next block and nb handle from 'g_b'
        ###     and assign to 'b_next' and 'b_nb_next'
        ### wait on the previoius nb handle for 'g_a'
        ### wait on the previoius nb handle for 'g_b'
        result = np.dot(a_prev,b_prev)
        ### accumulate the result into 'g_c' at the previous block location
        task_prev = task_next
        a_prev,a_nb_prev = a_next,a_nb_next
        b_prev,b_nb_prev = b_next,b_nb_next
    ### wait on the previoius nb handle for 'g_a'
    ### wait on the previoius nb handle for 'g_b'
    ga.nbwait(a_nb_prev)
    ga.nbwait(b_nb_prev)
    result = np.dot(a_prev,b_prev)
Example #2
0
def srumma(g_a, g_b, g_c, chunk_size, multiplier):
    # statically partition the task list among nprocs
    task_list = get_task_list(chunk_size, multiplier)
    ntasks = multiplier**3 // nproc
    start = me*ntasks
    stop = (me+1)*ntasks
    if me+1 == nproc:
        stop += multiplier**3 % nproc
    # the srumma algorithm, more or less
    task_prev = task_list[start]
    a_prev,a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi)
    b_prev,b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi)
    for i in range(start+1,stop):
        task_next = task_list[i]
        a_next,a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi)
        b_next,b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi)
        ga.nbwait(a_nb_prev)
        ga.nbwait(b_nb_prev)
        result = np.dot(a_prev,b_prev)
        ga.acc(g_c, result, task_prev.clo, task_prev.chi)
        task_prev = task_next
        a_prev,a_nb_prev = a_next,a_nb_next
        b_prev,b_nb_prev = b_next,b_nb_next
    ga.nbwait(a_nb_prev)
    ga.nbwait(b_nb_prev)
    result = np.dot(a_prev,b_prev)
    ga.acc(g_c, result, task_prev.clo, task_prev.chi)
    ga.sync()
Example #3
0
def srumma(g_a, g_b, g_c, chunk_size, multiplier, g_counter):
    task_list = get_task_list(chunk_size, multiplier)
    ### get first integer from g_counter and assign to 'task_id'
    # the srumma algorithm, more or less
    task_prev = task_list[task_id]
    a_prev,a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi)
    b_prev,b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi)
    ### get next integer from g_counter and assign to 'task_id'
    while task_id < multiplier**3:
        task_next = task_list[task_id]
        a_next,a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi)
        b_next,b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi)
        ga.nbwait(a_nb_prev)
        ga.nbwait(b_nb_prev)
        result = np.dot(a_prev,b_prev)
        ga.acc(g_c, result, task_prev.clo, task_prev.chi)
        task_prev = task_next
        a_prev,a_nb_prev = a_next,a_nb_next
        b_prev,b_nb_prev = b_next,b_nb_next
        ### get next integer from g_counter and assign to 'task_id'
    ga.nbwait(a_nb_prev)
    ga.nbwait(b_nb_prev)
    result = np.dot(a_prev,b_prev)
    ga.acc(g_c, result, task_prev.clo, task_prev.chi)
    ga.sync()
Example #4
0
def srumma(g_a, g_b, g_c, chunk_size, multiplier, g_counter):
    # statically partition the task list among nprocs
    task_list = get_task_list(chunk_size, multiplier)
    task_id = ga.read_inc(g_counter, 0)
    # the srumma algorithm, more or less
    task_prev = task_list[task_id]
    a_prev,a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi)
    b_prev,b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi)
    task_id = ga.read_inc(g_counter, 0)
    while task_id < multiplier**3:
        task_next = task_list[task_id]
        a_next,a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi)
        b_next,b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi)
        ga.nbwait(a_nb_prev)
        ga.nbwait(b_nb_prev)
        result = np.dot(a_prev,b_prev)
        ga.acc(g_c, result, task_prev.clo, task_prev.chi)
        task_prev = task_next
        a_prev,a_nb_prev = a_next,a_nb_next
        b_prev,b_nb_prev = b_next,b_nb_next
        task_id = ga.read_inc(g_counter, 0)
    ga.nbwait(a_nb_prev)
    ga.nbwait(b_nb_prev)
    result = np.dot(a_prev,b_prev)
    ga.acc(g_c, result, task_prev.clo, task_prev.chi)
    ga.sync()
Example #5
0
def srumma(g_a, g_b, g_c, chunk_size, multiplier, g_counter):
    # statically partition the task list among nprocs
    task_list = get_task_list(chunk_size, multiplier)
    task_id = ga.read_inc(g_counter, 0)
    # the srumma algorithm, more or less
    task_prev = task_list[task_id]
    a_prev, a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi)
    b_prev, b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi)
    task_id = ga.read_inc(g_counter, 0)
    while task_id < multiplier**3:
        task_next = task_list[task_id]
        a_next, a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi)
        b_next, b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi)
        ga.nbwait(a_nb_prev)
        ga.nbwait(b_nb_prev)
        result = np.dot(a_prev, b_prev)
        ga.acc(g_c, result, task_prev.clo, task_prev.chi)
        task_prev = task_next
        a_prev, a_nb_prev = a_next, a_nb_next
        b_prev, b_nb_prev = b_next, b_nb_next
        task_id = ga.read_inc(g_counter, 0)
    ga.nbwait(a_nb_prev)
    ga.nbwait(b_nb_prev)
    result = np.dot(a_prev, b_prev)
    ga.acc(g_c, result, task_prev.clo, task_prev.chi)
    ga.sync()
Example #6
0
def srumma(g_a, g_b, g_c, chunk_size, multiplier):
    # statically partition the task list among nprocs
    task_list = get_task_list(chunk_size, multiplier)
    ntasks = multiplier**3 // nproc
    start = me * ntasks
    stop = (me + 1) * ntasks
    if me + 1 == nproc:
        stop += multiplier**3 % nproc
    # the srumma algorithm, more or less
    task_prev = task_list[start]
    a_prev, a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi)
    b_prev, b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi)
    for i in range(start + 1, stop):
        task_next = task_list[i]
        a_next, a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi)
        b_next, b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi)
        ga.nbwait(a_nb_prev)
        ga.nbwait(b_nb_prev)
        result = np.dot(a_prev, b_prev)
        ga.acc(g_c, result, task_prev.clo, task_prev.chi)
        task_prev = task_next
        a_prev, a_nb_prev = a_next, a_nb_next
        b_prev, b_nb_prev = b_next, b_nb_next
    ga.nbwait(a_nb_prev)
    ga.nbwait(b_nb_prev)
    result = np.dot(a_prev, b_prev)
    ga.acc(g_c, result, task_prev.clo, task_prev.chi)
    ga.sync()