def task_daemon(): while True: time.sleep(interval) ntasks = len(tasks) loads = comm.allgather(ntasks) if all(n <= 1 for n in loads): tasks.insert(0, 'OUT_OF_TASKS') break elif any(n <= 1 for n in loads): loads = numpy.array(loads) ntasks_mean = int(loads.mean() + .9999) mpi_size = pool.size to_send = [tasks.pop() for i in range(ntasks - ntasks_mean)] to_distriubte = comm.gather(to_send) if rank == 0: to_distriubte = lib.flatten(to_distriubte) to_append = [[] for i in range(mpi_size)] i = 1 while to_distriubte and i < mpi_size: npop = ntasks_mean - loads[i] to_append[i] = to_distriubte[:npop] to_distriubte = to_distriubte[npop:] i += 1 to_append[0] = to_distriubte else: to_append = None to_append = comm.scatter(to_append) for task in to_append: tasks.insert(0, task)
def _vijk_indices(kpt_indices, orb_indices, transpose=(0, 1, 2)): '''Get indices needed for t3 construction and a given transpose of (a,b,c).''' kpt_indices = ([kpt_indices[x] for x in transpose] + [kpt_indices[x + 3] for x in transpose]) orb_indices = lib.flatten( [[orb_indices[2 * x], orb_indices[2 * x + 1]] for x in transpose]) ki, kj, kk, ka, kb, kc = kpt_indices a0, a1, b0, b1, c0, c1 = orb_indices kf = kconserv[ka, ki, kb] km = kconserv[kc, kk, kb] sl00 = slice(None, None) vvop_idx = [ka, kb, ki, slice(a0, a1), slice(b0, b1), sl00, sl00] vooo_idx = [ka, ki, kj, slice(a0, a1), sl00, sl00, sl00] t2T_vvop_idx = [kc, kf, kj, slice(c0, c1), sl00, sl00, sl00] t2T_vooo_idx = [kc, kb, km, slice(c0, c1), sl00, sl00, sl00] return vvop_idx, vooo_idx, t2T_vvop_idx, t2T_vooo_idx
def task_daemon(): while True: time.sleep(interval) ntasks = len(tasks) loads = comm.allgather(ntasks) if all(n <= 1 for n in loads): tasks.insert(0, 'OUT_OF_TASKS') break elif any(n <= 1 for n in loads) and any(n >= 3 for n in loads): loads = numpy.array(loads) ntasks_mean = int(loads.mean()) + 1 mpi_size = pool.size # number of tasks may be changed when reaching this spot. It # may lead to the condition that len(tasks) is less than the # ntasks-ntasks_mean and an error "pop from empty list". The # status of tasks should be checked before calling tasks.pop to_send = [ tasks.pop() for i in range(len(tasks) - ntasks_mean) if tasks ] to_distriubte = comm.gather(to_send) if rank == 0: to_distriubte = lib.flatten(to_distriubte) to_append = [[] for i in range(mpi_size)] i = 1 while to_distriubte and i < mpi_size: npop = ntasks_mean - loads[i] to_append[i] = to_distriubte[:npop] to_distriubte = to_distriubte[npop:] i += 1 to_append[0] = to_distriubte else: to_append = None to_append = comm.scatter(to_append) for task in to_append: tasks.insert(0, task)
def task_daemon(): while True: time.sleep(interval) ntasks = len(tasks) loads = comm.allgather(ntasks) if all(n <= 1 for n in loads): tasks.insert(0, 'OUT_OF_TASKS') break elif any(n <= 1 for n in loads) and any(n >= 3 for n in loads): loads = numpy.array(loads) ntasks_mean = int(loads.mean()) + 1 mpi_size = pool.size # number of tasks may be changed when reaching this spot. It # may lead to the condition that len(tasks) is less than the # ntasks-ntasks_mean and an error "pop from empty list". The # status of tasks should be checked before calling tasks.pop to_send = [tasks.pop() for i in range(len(tasks)-ntasks_mean) if tasks] to_distriubte = comm.gather(to_send) if rank == 0: to_distriubte = lib.flatten(to_distriubte) to_append = [[] for i in range(mpi_size)] i = 1 while to_distriubte and i < mpi_size: npop = ntasks_mean - loads[i] to_append[i] = to_distriubte[:npop] to_distriubte = to_distriubte[npop:] i += 1 to_append[0] = to_distriubte else: to_append = None to_append = comm.scatter(to_append) for task in to_append: tasks.insert(0, task)
def _eval_jk(mf, dm, hermi, gen_jobs): cpu0 = (logger.process_clock(), logger.perf_counter()) mol = mf.mol ao_loc = mol.ao_loc_nr() nao = ao_loc[-1] bas_groups = _partition_bas(mol) jobs = gen_jobs(len(bas_groups), hermi) njobs = len(jobs) logger.debug1(mf, 'njobs %d', njobs) # Each job has multiple recipes. n_recipes = len(jobs[0][1:]) dm = numpy.asarray(dm).reshape(-1, nao, nao) n_dm = dm.shape[0] vk = numpy.zeros((n_recipes, n_dm, nao, nao)) if mf.opt is None: vhfopt = mf.init_direct_scf(mol) else: vhfopt = mf.opt # Assign the entire dm_cond to vhfopt. # The prescreen function CVHFnrs8_prescreen will index q_cond and dm_cond # over the entire basis. "set_dm" in function jk.get_jk/direct_bindm only # creates a subblock of dm_cond which is not compatible with # CVHFnrs8_prescreen. vhfopt.set_dm(dm, mol._atm, mol._bas, mol._env) # Then skip the "set_dm" initialization in function jk.get_jk/direct_bindm. vhfopt._dmcondname = None logger.timer_debug1(mf, 'get_jk initialization', *cpu0) for job_id in mpi.work_stealing_partition(range(njobs)): group_ids = jobs[job_id][0] recipes = jobs[job_id][1:] shls_slice = lib.flatten([bas_groups[i] for i in group_ids]) loc = ao_loc[shls_slice].reshape(4, 2) dm_blks = [] for i_dm in range(n_dm): for ir, recipe in enumerate(recipes): for i, rec in enumerate(recipe): p0, p1 = loc[rec[0]] q0, q1 = loc[rec[1]] dm_blks.append(dm[i_dm, p0:p1, q0:q1]) scripts = [ 'ijkl,%s%s->%s%s' % tuple(['ijkl'[x] for x in rec]) for recipe in recipes for rec in recipe ] * n_dm kparts = jk.get_jk(mol, dm_blks, scripts, shls_slice=shls_slice, vhfopt=vhfopt) for i_dm in range(n_dm): for ir, recipe in enumerate(recipes): for i, rec in enumerate(recipe): p0, p1 = loc[rec[2]] q0, q1 = loc[rec[3]] vk[ir, i_dm, p0:p1, q0:q1] += kparts[i] # Pop the results of one recipe kparts = kparts[i + 1:] vk = mpi.reduce(vk) if rank == 0: if hermi: for i in range(n_recipes): for j in range(n_dm): lib.hermi_triu(vk[i, j], hermi, inplace=True) else: # Zero out vk on workers. If reduce(get_jk()) is called twice, # non-zero vk on workers can cause error. vk[:] = 0 logger.timer(mf, 'get_jk', *cpu0) return vk