def func_collect(eulers, positions, templates): nbodies = len(templates) assert eulers.shape[1] == positions.shape[1] == nbodies nstruc = eulers.shape[0] assert eulers.shape[0] == positions.shape[0] e = TVArray("e", eulers) p = TVArray("p", positions) eT = TVArray("eT") pT = TVArray("pT") g_e = TVArray("g_e", gpu=True) g_p = TVArray("g_p", gpu=True) g_rotmats = TVArray("g_rotmats", gpu=True) g_coors = [] o_coors = [] g_templates = [] i_templates = TVArrayList("i_templates", templates) for b in range(nbodies): g_coors.append(TVArray("g_coors[%d]" % b, gpu=True)) o_coors.append(TVArray("o_coors[%d]" % b)) g_templates.append(TVArray("g_templates[%d]" % b, gpu=True)) copy(i_templates[b]) > g_templates[b] transpose(e, (1, 0, 2)) > eT transpose(p, (1, 0, 2)) > pT copy(eT) > g_e copy(pT) > g_p g_ee = g_e.rsplit() g_pp = g_p.rsplit() for b in range(nbodies): euler2rotmat(g_ee[b]) > g_rotmats collect(g_pp[b], g_rotmats, g_templates[b]) > g_coors[b] copy(g_coors[b]) > o_coors[b] return tuple([o for o in o_coors])
def func_radist(eulers, positions, templates, saxs_factors, binsize, nbins): nbodies = len(templates) assert eulers.shape[1] == positions.shape[1] == nbodies nstruc = eulers.shape[0] assert eulers.shape[0] == positions.shape[0] o_radist = TVArray("radist", shape=(nstruc, nbins), dtype="float32") e = TVArray("e", eulers) p = TVArray("p", positions) eT = TVArray("eT") pT = TVArray("pT") g_e = TVArray("g_e", gpu=True) g_p = TVArray("g_p", gpu=True) g_rotmats = TVArray("g_rotmats", gpu=True) g_coors = [] g_templates = [] i_templates = TVArrayList("i_templates", templates) g_saxs_factors = [] i_saxs_factors = TVArrayList("i_saxs_factors", saxs_factors) for b in range(nbodies): g_coors.append(TVArray("g_coors[%d]" % b, gpu=True)) g_templates.append(TVArray("g_templates[%d]" % b, gpu=True)) copy(i_templates[b]) > g_templates[b] g_saxs_factors.append(TVArray("g_saxs_factors[%d]" % b, gpu=True)) copy(i_saxs_factors[b]) > g_saxs_factors[b] transpose(e, (1, 0, 2)) > eT transpose(p, (1, 0, 2)) > pT copy(eT) > g_e copy(pT) > g_p g_ee = g_e.rsplit() g_pp = g_p.rsplit() for b in range(nbodies): euler2rotmat(g_ee[b]) > g_rotmats collect(g_pp[b], g_rotmats, g_templates[b]) > g_coors[b] coors_chunks = [g.rchunks(CHUNKSIZE) for g in g_coors] o_radist_chunks = o_radist.wchunks(CHUNKSIZE) g_radist = TVArray("g_radist", gpu=True, shape=(CHUNKSIZE, nbins), dtype="float32") for i in range(len(o_radist_chunks)): g_radist.shape = (coors_chunks[b][i].shape[0], nbins) fill(0) > g_radist for b in range(nbodies): for bb in range(b + 1, nbodies): calc_radist(coors_chunks[b][i], g_saxs_factors[b], coors_chunks[bb][i], g_saxs_factors[bb], binsize, nbins) >> g_radist copy(g_radist) > o_radist_chunks[i] o_radist.join() return o_radist
def gvm(refe, eulers, positions, templates, gridshape, origin, gridspacing, chunksize): chunklen = min(chunksize, len(eulers)) nbodies = len(templates) assert eulers.shape[1] == positions.shape[1] == nbodies nstruc = eulers.shape[0] assert eulers.shape[0] == positions.shape[0] e = TVArray("e", eulers) p = TVArray("p", positions) eT = TVArray("eT") pT = TVArray("pT") g_e = TVArray("g_e", gpu=True) g_p = TVArray("g_p", gpu=True) g_rotmats = TVArray("g_rotmats", gpu=True) g_coors = [] g_templates = [] i_templates = TVArrayList("i_templates", templates) i_refe = TVArrayList("i_refe", refe) for b in range(nbodies): g_coors.append(TVArray("g_coors[%d]" % b, gpu=True)) g_templates.append(TVArray("g_templates[%d]" % b, gpu=True)) copy(i_templates[b]) > g_templates[b] transpose(e, (1, 0, 2)) > eT transpose(p, (1, 0, 2)) > pT copy(eT) > g_e copy(pT) > g_p g_ee = g_e.rsplit() g_pp = g_p.rsplit() for b in range(nbodies): euler2rotmat(g_ee[b]) > g_rotmats collect(g_pp[b], g_rotmats, g_templates[b]) > g_coors[b] chunk_coors = [] for b in range(nbodies): chunk_coors.append(g_coors[b].rchunks(chunklen)) maps = TVArray("maps", gpu=True, dtype="float32", shape=(chunklen, ) + tuple(gridshape)) g_refe = [] for n in range(3): assert gridshape[0] == refe[n].shape[0] + 2 assert gridshape[1] == refe[n].shape[1] + 2 assert gridshape[2] == refe[n].shape[2] + 2 g_refe.append(TVArray("g_refe[%d]" % n, gpu=True)) copy(i_refe[n]) > g_refe[n] g_sumx, g_sumxx, g_sumxy = [], [], [] sumx, sumxx, sumxy = [], [], [] chunk_sumx, chunk_sumxx, chunk_sumxy = [], [], [] for n in range(3): g_sumx.append( TVArray("g_sumx[%d]" % n, gpu=True, shape=(chunklen, ), dtype="float32")) g_sumxx.append( TVArray("g_sumxx[%d]" % n, gpu=True, shape=(chunklen, ), dtype="float32")) g_sumxy.append( TVArray("g_sumxy[%d]" % n, gpu=True, shape=(chunklen, ), dtype="float32")) sumx.append( TVArray("sumx[%d]" % n, shape=(len(eulers), ), dtype="float32")) sumxx.append( TVArray("sumxx[%d]" % n, shape=(len(eulers), ), dtype="float32")) sumxy.append( TVArray("sumxy[%d]" % n, shape=(len(eulers), ), dtype="float32")) chunk_sumx.append(sumx[n].wchunks(chunksize)) chunk_sumxx.append(sumxx[n].wchunks(chunksize)) chunk_sumxy.append(sumxy[n].wchunks(chunksize)) for i in range(len(chunk_sumx[0])): fill(0) > maps for b in range(nbodies): gridify(chunk_coors[b][i], origin, gridspacing) >> maps gvm_x(maps, g_refe[0]) > (g_sumx[0], g_sumxx[0], g_sumxy[0]) gvm_y(maps, g_refe[1]) > (g_sumx[1], g_sumxx[1], g_sumxy[1]) gvm_z(maps, g_refe[2]) > (g_sumx[2], g_sumxx[2], g_sumxy[2]) for n in range(3): copy(g_sumx[n]) > chunk_sumx[n][i] copy(g_sumxx[n]) > chunk_sumxx[n][i] copy(g_sumxy[n]) > chunk_sumxy[n][i] for n in range(3): sumx[n].join() sumxx[n].join() sumxy[n].join() return sumx[0], sumx[1], sumx[2], sumxx[0], sumxx[1], sumxx[2], sumxy[ 0], sumxy[1], sumxy[2]
def overlap(eulers, positions, templates, weights, origin, gridspacing, reps_emdata, hash_emdata, coor_chunksize, grid_chunksize, maxdensity): """ each element in reps_emdata: emdata multiplied by -maxdensity, replicated grid_chunksize times, and uploaded to the GPU """ assert grid_chunksize <= coor_chunksize nbodies = len(templates) assert eulers.shape[1] == positions.shape[1] == nbodies nstruc = eulers.shape[0] assert eulers.shape[0] == positions.shape[0] assert len(reps_emdata) == grid_chunk_parallel for rep_emdata in reps_emdata: assert len(rep_emdata.shape) == 4 assert rep_emdata.shape[0] == grid_chunksize assert rep_emdata.shape[1] == rep_emdata.shape[2] == rep_emdata.shape[3] #g_rep_emdata = TVArray("g_rep_emdata", rep_emdata, gpu=True, hash=hash_emdata) g_reps_emdata = TVArrayList("g_reps_emdata", reps_emdata, gpu=True, hashes=[hash_emdata] * grid_chunk_parallel) overlaps = TVArray("overlaps", dtype="float32", shape=(nstruc, )) e = TVArray("e", eulers) p = TVArray("p", positions) i_templates = TVArrayList("i_templates", templates) i_weights = TVArrayList("i_weights", weights) assert len(templates) == len(weights) g_grids = [] for k in range(grid_chunk_parallel): g_grids0 = TVArray("g_grids{%d}" % k, shape=reps_emdata[k].shape, dtype="float32", gpu=True) g_grids.append(g_grids0) g_templates = [] g_weights = [] for t, w in zip(i_templates, i_weights): tt = TVArray("g_" + t.name()[0], gpu=True) copy(t) > tt g_templates.append(tt) ww = TVArray("g_" + w.name()[0], gpu=True) copy(w) > ww g_weights.append(ww) e_chunk_T = TVArray("e_chunk_T") p_chunk_T = TVArray("p_chunk_T") g_e_chunk = TVArray("g_e_chunk", gpu=True) g_p_chunk = TVArray("g_p_chunk", gpu=True) g_rotmats = TVArray("g_rotmats", gpu=True) g_coors = [] for n in range(nbodies): a = TVArray("g_coors{%d}" % n, gpu=True) g_coors.append(a) e_chunks = e.rchunks(coor_chunksize) p_chunks = p.rchunks(coor_chunksize) overlaps_chunks = overlaps.wchunks(coor_chunksize) overlaps_chunk = TVArray("overlaps_chunk", dtype="float32") overlaps_chunk2 = [] for k in range(grid_chunk_parallel): overlaps_chunk2_0 = TVArray("overlaps_chunk2{%d}" % k, dtype="float32") overlaps_chunk2.append(overlaps_chunk2_0) #overlaps_chunk2_0.cache() ov, g_ov = [], [] for k in range(grid_chunk_parallel): ov0 = TVArray("ov{%d}" % k) g_ov0 = TVArray("g_ov{%d}" % k, gpu=True) ov.append(ov0) g_ov.append(g_ov0) for i in range(len(e_chunks)): print >> sys.stderr, "CHUNK", i + 1 e_chunk, p_chunk = e_chunks[i], p_chunks[i] transpose(e_chunk, (1, 0, 2)) > e_chunk_T transpose(p_chunk, (1, 0, 2)) > p_chunk_T copy(e_chunk_T) > g_e_chunk copy(p_chunk_T) > g_p_chunk g_ee = g_e_chunk.rsplit() g_pp = g_p_chunk.rsplit() for n in range(nbodies): euler2rotmat(g_ee[n]) > g_rotmats collect(g_pp[n], g_rotmats, g_templates[n]) > g_coors[n] g_coors_chunks = [a.rchunks(grid_chunksize) for a in g_coors] g_e_chunk = g_e_chunk.join() g_p_chunk = g_p_chunk.join() #"overlaps_chunk" represents overlaps::i. since we cannot shard a shard overlaps_chunk._current().shape = (len(e_chunk), ) overlaps_chunks2 = overlaps_chunk.wchunks(grid_chunksize) for j in range(len(g_coors_chunks[n])): k = j % grid_chunk_parallel overlaps_chunk2[k]._current().shape = overlaps_chunks2[j].shape copy(g_reps_emdata[k]) > g_grids[k] for n in range(nbodies): atomdensitymask1(g_coors_chunks[n][j], g_weights[n], origin, gridspacing) >> g_grids[k] fill(0) > overlaps_chunk2[k] for n in range(nbodies): atomdensitymask2(g_coors_chunks[n][j], g_grids[k], origin, gridspacing, maxdensity) > g_ov[k] copy(g_ov[k]) > ov[k] nptask(ov[k], ".sum(axis=1)") >> overlaps_chunk2[k] copy(overlaps_chunk2[k]) > overlaps_chunks2[ j] #cannot accumulate a shard g_coors = [a.join() for a in g_coors] overlaps_chunk = overlaps_chunk.join() copy(overlaps_chunk) > overlaps_chunks[i] #cannot shard a shard overlaps = overlaps.join() #overlaps.cache() return overlaps