def test_gpus_many_map(): N = 2048 Nproj = N Ns = 2048 filter_type = 'None' cor = N//2 interp_type = 'cubic' # init random array R = np.float32(np.sin(np.arange(0, Ns*Nproj*N)/float(Ns*Nproj))) R = np.reshape(R, [Ns, Nproj, N]) # input parameters tomo = R reg_par = 0.001 # *np.max(tomo) num_iter = 100 recon = np.zeros([Ns, N, N], dtype="float32")+1e-3 method = "em" gpu_list = [0, 1, 2, 3] # list of available methods for reconstruction lpmethods_list = { 'fbp': lpmethods.fbp, 'grad': lpmethods.grad, 'cg': lpmethods.cg, 'tv': lpmethods.tv, 'em': lpmethods.em } ngpus = len(gpu_list) # number of slices for simultaneous processing by 1 gpu # (depends on gpu memory size, chosen for gpus with >= 8GB memory) Nssimgpu = min(int(pow(2, 26)/float(N*N)), int(np.ceil(Ns/float(ngpus)))) tic() # class lprec lp = lpTransform.lpTransform( N, Nproj, Nssimgpu, filter_type, cor, interp_type) # if not fbp, precompute for the forward transform lp.precompute(method != 'fbp') print("Init time %f" % (toc())) # list of slices sets for simultaneous processing b gpus ids_list = [None]*int(np.ceil(Ns/float(Nssimgpu))) for k in range(0, len(ids_list)): ids_list[k] = range(k*Nssimgpu, min(Ns, (k+1)*Nssimgpu)) tic() # init memory for each gpu for igpu in range(0, ngpus): gpu = gpu_list[igpu] # if not fbp, allocate memory for the forward transform arrays lp.initcmem(method != 'fbp', gpu) # run reconstruciton on many gpus with cf.ThreadPoolExecutor(ngpus) as e: shift = 0 for reconi in e.map(partial(lpmultigpu, lp, lpmethods_list[method], recon, tomo, num_iter, reg_par, gpu_list), ids_list): recon[np.arange(0, reconi.shape[0])+shift] = reconi shift += reconi.shape[0] print("Rec time %f" % (toc()))
def task_gen_photo_imagepath(root, query): print('Get photo ids.'); db_helper = DBHelper(); db_helper.init(root); photo_dao = PhotoDao(db_helper); tic(); photo_ids = photo_dao.getClassPhotoIds(query, ''.join([query])); toc(); print('Get photo path.'); imagepaths = get_photo_imagepath(root, query, photo_ids) output_path = ''.join([db_helper.datasetDir, '/', query, '_imagepath.txt']); file_io = FileIO(); file_io.write_strings_to_file(imagepaths, output_path);
h01 = psi1 h02 = psi2 for k in range(niter): # registration tic() flow = dslv.registration_flow_batch(psi1, data, mmin, mmax, flow.copy(), pars, nproc=42) t1 = toc() tic() # deformation subproblem psi1 = dslv.cg_deform(data, psi1, flow, 4, tslv.fwd_lam(u, theta) + lamd1 / rho1, rho1, nproc=42) t2 = toc() psi2 = tslv.solve_reg(u, lamd2, rho2, alpha) # tomo subproblem tic()
# u, psi, lamd, flow = interpdense(u,psi,lamd,flow) # optical flow parameters pars = [0.5, 1, w[il], 4, 5, 1.1, 4] with tc.SolverTomo(theta, ntheta, nz, ne, pnz, center / pow(2, binning), ngpus) as tslv: with dc.SolverDeform(ntheta, nz, n, ptheta) as dslv: rho = 0.5 h0 = psi for k in range(niter[il]): # registration # print(np.linalg.norm(psi-data)) tic() flow = dslv.registration_flow_batch( psi, data, mmin, mmax, flow.copy(), pars, 40) print(toc()) # Tpsi = dslv.apply_flow_gpu_batch(psi, flow) # print(np.linalg.norm(Tpsi-data)) # deformation subproblem tic() psi = dslv.cg_deform_gpu_batch( data, psi, flow, 4, unpad(tslv.fwd_tomo_batch(u), ne, n) + lamd / rho, rho) print(toc()) # tomo subproblem tic() u = tslv.cg_tomo_batch(pad(psi - lamd / rho, ne, n), u, 4) print(toc()) h = unpad(tslv.fwd_tomo_batch(u), ne, n)
def streaming(): """ Main computational function, take data from pvdata ('2bmbSP1:Pva1:Image'), reconstruct orthogonal slices and write the result to pvrec ('AdImage') """ ##### init pvs ###### # init ca pvs chscanDelta = pva.Channel('2bma:PSOFly2:scanDelta', pva.CA) chrotangle = pva.Channel('2bma:m82', pva.CA) chrotangleset = pva.Channel('2bma:m82.SET', pva.CA) chrotanglestop = pva.Channel('2bma:m82.STOP', pva.CA) chStreamX = pva.Channel('2bmS1:StreamX', pva.CA) chStreamY = pva.Channel('2bmS1:StreamY', pva.CA) chStreamZ = pva.Channel('2bmS1:StreamZ', pva.CA) # init pva streaming pv for the detector chdata = pva.Channel('2bmbSP1:Pva1:Image') pvdata = chdata.get('') # init pva streaming pv for reconstrucion with coping dictionary from pvdata pvdict = pvdata.getStructureDict() pvrec = pva.PvObject(pvdict) # take dimensions n = pvdata['dimension'][0]['size'] nz = pvdata['dimension'][1]['size'] # set dimensions for reconstruction pvrec['dimension'] = [{ 'size': 3 * n, 'fullSize': 3 * n, 'binning': 1 }, { 'size': n, 'fullSize': n, 'binning': 1 }] ##### run server for reconstruction pv ##### s = pva.PvaServer('AdImage', pvrec) ##### procedures before running fly ####### # 0) form circular buffer, whenever the angle goes higher than 180 # than corresponding projection is replacing the first one scanDelta = chscanDelta.get('')['value'] ntheta = np.int(180 / scanDelta + 0.5) databuffer = np.zeros([ntheta, nz * n], dtype='uint8') thetabuffer = np.zeros(ntheta, dtype='float32') # 1) stop rotation, replace rotation stage angle to a value in [0,360) chrotanglestop.put(1) time.sleep(3) rotangle = chrotangle.get('')['value'] chrotangleset.put(1) chrotangle.put(rotangle - rotangle // 360 * 360) chrotangleset.put(0) # 2) take flat field flat = takeflat(chdata) firstid = chdata.get('')['uniqueId'] # 3) create solver class on GPU, and copy flat field to gpu slv = OrthoRec(ntheta, n, nz) slv.set_flat(flat) # 4) allocate memory for result slices recall = np.zeros([n, 3 * n], dtype='float32') # 5) start monitoring the detector pv for data collection def addProjection(pv): with mrwlock.w_locked(): curid = pv['uniqueId'] databuffer[np.mod(curid, ntheta)] = pv['value'][0]['ubyteValue'] thetabuffer[np.mod(curid, ntheta)] = (curid - firstid) * scanDelta #print(firstid, curid) chdata.monitor(addProjection, '') ##### start acquisition ####### start_fly() ##### streaming reconstruction ###### while (True): # infinite loop over angular partitions with mrwlock.r_locked(): # lock buffer before reading datap = databuffer.copy() thetap = thetabuffer.copy() # take 3 ortho slices ids idx = chStreamX.get('')['value'] idy = chStreamY.get('')['value'] idz = chStreamZ.get('')['value'] # reconstruct on GPU tic() recx, recy, recz = slv.rec_ortho(datap, thetap * np.pi / 180, n // 2, idx, idy, idz) print('rec time:', toc()) # concatenate (supposing nz<n) recall[:nz, :n] = recx recall[:nz, n:2 * n] = recy recall[:, 2 * n:] = recz # write to pv pvrec['value'] = ({'floatValue': recall.flatten()}, ) # reconstruction rate limit time.sleep(0.1)