def srumma(g_a, g_b, g_c, chunk_size, multiplier, g_counter): # statically partition the task list among nprocs task_list = get_task_list(chunk_size, multiplier) task_id = ga.read_inc(g_counter, 0) # the srumma algorithm, more or less task_prev = task_list[task_id] a_prev, a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi) b_prev, b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi) task_id = ga.read_inc(g_counter, 0) while task_id < multiplier**3: task_next = task_list[task_id] a_next, a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi) b_next, b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi) ga.nbwait(a_nb_prev) ga.nbwait(b_nb_prev) result = np.dot(a_prev, b_prev) ga.acc(g_c, result, task_prev.clo, task_prev.chi) task_prev = task_next a_prev, a_nb_prev = a_next, a_nb_next b_prev, b_nb_prev = b_next, b_nb_next task_id = ga.read_inc(g_counter, 0) ga.nbwait(a_nb_prev) ga.nbwait(b_nb_prev) result = np.dot(a_prev, b_prev) ga.acc(g_c, result, task_prev.clo, task_prev.chi) ga.sync()
def srumma(g_a, g_b, g_c, chunk_size, multiplier): # statically partition the task list among nprocs task_list = get_task_list(chunk_size, multiplier) ntasks = multiplier**3 // nproc start = me * ntasks stop = (me + 1) * ntasks if me + 1 == nproc: stop += multiplier**3 % nproc # the srumma algorithm, more or less task_prev = task_list[start] a_prev, a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi) b_prev, b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi) for i in range(start + 1, stop): task_next = task_list[i] a_next, a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi) b_next, b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi) ga.nbwait(a_nb_prev) ga.nbwait(b_nb_prev) result = np.dot(a_prev, b_prev) ga.acc(g_c, result, task_prev.clo, task_prev.chi) task_prev = task_next a_prev, a_nb_prev = a_next, a_nb_next b_prev, b_nb_prev = b_next, b_nb_next ga.nbwait(a_nb_prev) ga.nbwait(b_nb_prev) result = np.dot(a_prev, b_prev) ga.acc(g_c, result, task_prev.clo, task_prev.chi) ga.sync()
def srumma(g_a, g_b, g_c, chunk_size, multiplier, g_counter): task_list = get_task_list(chunk_size, multiplier) ### get first integer from g_counter and assign to 'task_id' # the srumma algorithm, more or less task_prev = task_list[task_id] a_prev,a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi) b_prev,b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi) ### get next integer from g_counter and assign to 'task_id' while task_id < multiplier**3: task_next = task_list[task_id] a_next,a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi) b_next,b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi) ga.nbwait(a_nb_prev) ga.nbwait(b_nb_prev) result = np.dot(a_prev,b_prev) ga.acc(g_c, result, task_prev.clo, task_prev.chi) task_prev = task_next a_prev,a_nb_prev = a_next,a_nb_next b_prev,b_nb_prev = b_next,b_nb_next ### get next integer from g_counter and assign to 'task_id' ga.nbwait(a_nb_prev) ga.nbwait(b_nb_prev) result = np.dot(a_prev,b_prev) ga.acc(g_c, result, task_prev.clo, task_prev.chi) ga.sync()
def srumma(g_a, g_b, g_c, chunk_size, multiplier, g_counter): # statically partition the task list among nprocs task_list = get_task_list(chunk_size, multiplier) task_id = ga.read_inc(g_counter, 0) # the srumma algorithm, more or less task_prev = task_list[task_id] a_prev,a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi) b_prev,b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi) task_id = ga.read_inc(g_counter, 0) while task_id < multiplier**3: task_next = task_list[task_id] a_next,a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi) b_next,b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi) ga.nbwait(a_nb_prev) ga.nbwait(b_nb_prev) result = np.dot(a_prev,b_prev) ga.acc(g_c, result, task_prev.clo, task_prev.chi) task_prev = task_next a_prev,a_nb_prev = a_next,a_nb_next b_prev,b_nb_prev = b_next,b_nb_next task_id = ga.read_inc(g_counter, 0) ga.nbwait(a_nb_prev) ga.nbwait(b_nb_prev) result = np.dot(a_prev,b_prev) ga.acc(g_c, result, task_prev.clo, task_prev.chi) ga.sync()
def srumma(g_a, g_b, g_c, chunk_size, multiplier): # statically partition the task list among nprocs task_list = get_task_list(chunk_size, multiplier) ntasks = multiplier**3 // nproc start = me*ntasks stop = (me+1)*ntasks if me+1 == nproc: stop += multiplier**3 % nproc # the srumma algorithm, more or less task_prev = task_list[start] a_prev,a_nb_prev = ga.nbget(g_a, task_prev.alo, task_prev.ahi) b_prev,b_nb_prev = ga.nbget(g_b, task_prev.blo, task_prev.bhi) for i in range(start+1,stop): task_next = task_list[i] a_next,a_nb_next = ga.nbget(g_a, task_next.alo, task_next.ahi) b_next,b_nb_next = ga.nbget(g_b, task_next.blo, task_next.bhi) ga.nbwait(a_nb_prev) ga.nbwait(b_nb_prev) result = np.dot(a_prev,b_prev) ga.acc(g_c, result, task_prev.clo, task_prev.chi) task_prev = task_next a_prev,a_nb_prev = a_next,a_nb_next b_prev,b_nb_prev = b_next,b_nb_next ga.nbwait(a_nb_prev) ga.nbwait(b_nb_prev) result = np.dot(a_prev,b_prev) ga.acc(g_c, result, task_prev.clo, task_prev.chi) ga.sync()