def _diffusion_child(comm, bm=None): rank = comm.Get_rank() ngpus = comm.Get_size() nodename = socket.gethostname() name = '%s %s' % (nodename, rank) print(name) if rank == 0: # split indices on GPUs indices_split = _split_indices(bm.indices, ngpus) print('Indices:', indices_split) # send data to GPUs for k in range(1, ngpus): sendToChild(comm, bm.indices, indices_split[k], k, bm.data, bm.labels, bm.label.nbrw, bm.label.sorw, bm.label.allaxis) # init cuda device cuda.init() dev = cuda.Device(rank) ctx = dev.make_context() # select the desired script if bm.label.allaxis: from pycuda_small_allx import walk else: from pycuda_small import walk # run random walks tic = time.time() walkmap = walk(bm.data, bm.labels, bm.indices, indices_split[0], bm.label.nbrw, bm.label.sorw, name) tac = time.time() print('Walktime_%s: ' % (name) + str(int(tac - tic)) + ' ' + 'seconds') # gather data zsh_tmp = bm.argmax_z - bm.argmin_z ysh_tmp = bm.argmax_y - bm.argmin_y xsh_tmp = bm.argmax_x - bm.argmin_x if ngpus > 1: final_zero = np.empty((bm.nol, zsh_tmp, ysh_tmp, xsh_tmp), dtype=np.float32) for k in range(bm.nol): sendbuf = np.copy(walkmap[k]) recvbuf = np.empty((zsh_tmp, ysh_tmp, xsh_tmp), dtype=np.float32) comm.Barrier() comm.Reduce([sendbuf, MPI.FLOAT], [recvbuf, MPI.FLOAT], root=0, op=MPI.SUM) final_zero[k] = recvbuf else: final_zero = walkmap # block and grid size block = (32, 32, 1) x_grid = (xsh_tmp // 32) + 1 y_grid = (ysh_tmp // 32) + 1 grid = (int(x_grid), int(y_grid), int(zsh_tmp)) xsh_gpu = np.int32(xsh_tmp) ysh_gpu = np.int32(ysh_tmp) # smooth if bm.label.smooth: try: update_gpu = _build_update_gpu() curvature_gpu = _build_curvature_gpu() a_gpu = gpuarray.empty((zsh_tmp, ysh_tmp, xsh_tmp), dtype=np.float32) b_gpu = gpuarray.zeros((zsh_tmp, ysh_tmp, xsh_tmp), dtype=np.float32) except Exception as e: print( 'Warning: GPU out of memory to allocate smooth array. Process starts without smoothing.' ) bm.label.smooth = 0 if bm.label.smooth: final_smooth = np.copy(final_zero) for k in range(bm.nol): a_gpu = gpuarray.to_gpu(final_smooth[k]) for l in range(bm.label.smooth): curvature_gpu(a_gpu, b_gpu, xsh_gpu, ysh_gpu, block=block, grid=grid) update_gpu(a_gpu, b_gpu, xsh_gpu, ysh_gpu, block=block, grid=grid) final_smooth[k] = a_gpu.get() final_smooth = np.argmax(final_smooth, axis=0).astype(np.uint8) final_smooth = get_labels(final_smooth, bm.allLabels) final = np.zeros((bm.zsh, bm.ysh, bm.xsh), dtype=np.uint8) final[bm.argmin_z:bm.argmax_z, bm.argmin_y:bm.argmax_y, bm.argmin_x:bm.argmax_x] = final_smooth final = final[1:-1, 1:-1, 1:-1] save_data(bm.path_to_smooth, final, bm.header, bm.final_image_type, bm.label.compression) # uncertainty if bm.label.uncertainty: try: max_gpu = gpuarray.zeros((3, zsh_tmp, ysh_tmp, xsh_tmp), dtype=np.float32) a_gpu = gpuarray.zeros((zsh_tmp, ysh_tmp, xsh_tmp), dtype=np.float32) kernel_uncertainty = _build_kernel_uncertainty() kernel_max = _build_kernel_max() for k in range(bm.nol): a_gpu = gpuarray.to_gpu(final_zero[k]) kernel_max(max_gpu, a_gpu, xsh_gpu, ysh_gpu, block=block, grid=grid) kernel_uncertainty(max_gpu, a_gpu, xsh_gpu, ysh_gpu, block=block, grid=grid) uq = a_gpu.get() uq *= 255 uq = uq.astype(np.uint8) final = np.zeros((bm.zsh, bm.ysh, bm.xsh), dtype=np.uint8) final[bm.argmin_z:bm.argmax_z, bm.argmin_y:bm.argmax_y, bm.argmin_x:bm.argmax_x] = uq final = final[1:-1, 1:-1, 1:-1] save_data(bm.path_to_uq, final, compress=bm.label.compression) except Exception as e: print( 'Warning: GPU out of memory to allocate uncertainty array. Process starts without uncertainty.' ) bm.label.uncertainty = False # free device ctx.pop() del ctx # argmax final_zero = np.argmax(final_zero, axis=0).astype(np.uint8) # save finals final_zero = get_labels(final_zero, bm.allLabels) final = np.zeros((bm.zsh, bm.ysh, bm.xsh), dtype=np.uint8) final[bm.argmin_z:bm.argmax_z, bm.argmin_y:bm.argmax_y, bm.argmin_x:bm.argmax_x] = final_zero final = final[1:-1, 1:-1, 1:-1] save_data(bm.path_to_final, final, bm.header, bm.final_image_type, bm.label.compression) # computation time t = int(time.time() - bm.TIC) if t < 60: time_str = str(t) + ' sec' elif 60 <= t < 3600: time_str = str(t // 60) + ' min ' + str(t % 60) + ' sec' elif 3600 < t: time_str = str(t // 3600) + ' h ' + str( (t % 3600) // 60) + ' min ' + str(t % 60) + ' sec' print('Computation time:', time_str) else: data_z, data_y, data_x, data_dtype = comm.recv(source=0, tag=0) data = np.empty((data_z, data_y, data_x), dtype=data_dtype) if data_dtype == 'uint8': comm.Recv([data, MPI.BYTE], source=0, tag=1) else: comm.Recv([data, MPI.FLOAT], source=0, tag=1) allx, nbrw, sorw = comm.recv(source=0, tag=2) if allx: labels = [] for k in range(3): labels_z, labels_y, labels_x = comm.recv(source=0, tag=k + 3) labels_tmp = np.empty((labels_z, labels_y, labels_x), dtype=np.int32) comm.Recv([labels_tmp, MPI.INT], source=0, tag=k + 6) labels.append(labels_tmp) else: labels_z, labels_y, labels_x = comm.recv(source=0, tag=3) labels = np.empty((labels_z, labels_y, labels_x), dtype=np.int32) comm.Recv([labels, MPI.INT], source=0, tag=6) indices = comm.recv(source=0, tag=9) indices_child = comm.recv(source=0, tag=10) # init cuda device cuda.init() dev = cuda.Device(rank % cuda.Device.count()) ctx = dev.make_context() # select the desired script if allx: from pycuda_small_allx import walk else: from pycuda_small import walk # run random walks tic = time.time() walkmap = walk(data, labels, indices, indices_child, nbrw, sorw, name) tac = time.time() print('Walktime_%s: ' % (name) + str(int(tac - tic)) + ' ' + 'seconds') # free device ctx.pop() del ctx # send data for k in range(walkmap.shape[0]): datatemporaer = np.copy(walkmap[k]) comm.Barrier() comm.Reduce([datatemporaer, MPI.FLOAT], None, root=0, op=MPI.SUM)
def predict_semantic_segmentation(img, position, path_to_model, path_to_final, z_patch, y_patch, x_patch, z_shape, y_shape, x_shape, compress, header, img_header, channels, stride_size, allLabels, batch_size, region_of_interest): # img shape zsh, ysh, xsh = img.shape # list of IDs list_IDs = [] # get nIds of patches for k in range(0, zsh-z_patch+1, stride_size): for l in range(0, ysh-y_patch+1, stride_size): for m in range(0, xsh-x_patch+1, stride_size): list_IDs.append(k*ysh*xsh+l*xsh+m) # make length of list divisible by batch size rest = batch_size - (len(list_IDs) % batch_size) list_IDs = list_IDs + list_IDs[:rest] # parameters params = {'dim': (z_patch, y_patch, x_patch), 'dim_img': (zsh, ysh, xsh), 'batch_size': batch_size, 'n_channels': channels} # data generator predict_generator = PredictDataGenerator(img, position, list_IDs, **params) # create a MirroredStrategy if os.name == 'nt': cdo = tf.distribute.HierarchicalCopyAllReduce() else: cdo = tf.distribute.NcclAllReduce() strategy = tf.distribute.MirroredStrategy(cross_device_ops=cdo) # load model with strategy.scope(): model = load_model(str(path_to_model)) # predict probabilities = model.predict(predict_generator, verbose=0, steps=None) # create final final = np.zeros((zsh, ysh, xsh, probabilities.shape[4]), dtype=np.float32) nb = 0 for k in range(0, zsh-z_patch+1, stride_size): for l in range(0, ysh-y_patch+1, stride_size): for m in range(0, xsh-x_patch+1, stride_size): final[k:k+z_patch, l:l+y_patch, m:m+x_patch] += probabilities[nb] nb += 1 # get final out = np.argmax(final, axis=3) out = out.astype(np.uint8) # rescale final to input size np_unique = np.unique(out) label = np.zeros((z_shape, y_shape, x_shape), dtype=out.dtype) for k in np_unique: tmp = np.zeros_like(out) tmp[out==k] = 1 tmp = img_resize(tmp, z_shape, y_shape, x_shape) label[tmp==1] = k # revert automatic cropping if np.any(region_of_interest): min_z,max_z,min_y,max_y,min_x,max_x,z_shape,y_shape,x_shape = region_of_interest[:] tmp = np.zeros((z_shape, y_shape, x_shape), dtype=out.dtype) tmp[min_z:max_z,min_y:max_y,min_x:max_x] = label label = np.copy(tmp) # save final label = label.astype(np.uint8) label = get_labels(label, allLabels) if header is not None: header = get_image_dimensions(header, label) if img_header is not None: header = get_physical_size(header, img_header) save_data(path_to_final, label, header=header, compress=compress)
def refine_semantic_segmentation(path_to_img, path_to_final, path_to_model, patch_size, compress, header, img_header, normalize, stride_size, allLabels, mu, sig, batch_size): # load refine data img, label, final, z_shape, y_shape, x_shape = load_refine_data(path_to_img, path_to_final, patch_size, normalize, allLabels, mu, sig) # get number of 3D-patches nb = 0 zsh, ysh, xsh = img.shape for k in range(0, zsh-patch_size+1, stride_size): for l in range(0, ysh-patch_size+1, stride_size): for m in range(0, xsh-patch_size+1, stride_size): tmp = label[k:k+patch_size, l:l+patch_size, m:m+patch_size] #if 0.1 * patch_size**3 < np.sum(tmp > 0) < 0.9 * patch_size**3: if np.any(tmp[1:]!=tmp[0,0,0]): nb += 1 # create prediction set x_test = np.empty((nb, patch_size, patch_size, patch_size, 2), dtype=img.dtype) nb = 0 zsh, ysh, xsh = img.shape for k in range(0, zsh-patch_size+1, stride_size): for l in range(0, ysh-patch_size+1, stride_size): for m in range(0, xsh-patch_size+1, stride_size): tmp = label[k:k+patch_size, l:l+patch_size, m:m+patch_size] #if 0.1 * patch_size**3 < np.sum(tmp > 0) < 0.9 * patch_size**3: if np.any(tmp[1:]!=tmp[0,0,0]): x_test[nb,:,:,:,0] = img[k:k+patch_size, l:l+patch_size, m:m+patch_size] x_test[nb,:,:,:,1] = final[k:k+patch_size, l:l+patch_size, m:m+patch_size] nb += 1 # reshape prediction data x_test = x_test.reshape(nb, patch_size, patch_size, patch_size, 2) # create a MirroredStrategy if os.name == 'nt': cdo = tf.distribute.HierarchicalCopyAllReduce() else: cdo = tf.distribute.NcclAllReduce() strategy = tf.distribute.MirroredStrategy(cross_device_ops=cdo) # load model with strategy.scope(): model = load_model(str(path_to_model)) # predict prob = model.predict(x_test, batch_size=batch_size, verbose=0, steps=None) # create final nb = 0 zsh, ysh, xsh = img.shape final = np.zeros((zsh, ysh, xsh, prob.shape[4]), dtype=np.float32) for k in range(0, zsh-patch_size+1, stride_size): for l in range(0, ysh-patch_size+1, stride_size): for m in range(0, xsh-patch_size+1, stride_size): tmp = label[k:k+patch_size, l:l+patch_size, m:m+patch_size] #if 0.1 * patch_size**3 < np.sum(tmp > 0) < 0.9 * patch_size**3: if np.any(tmp[1:]!=tmp[0,0,0]): final[k:k+patch_size, l:l+patch_size, m:m+patch_size] += prob[nb] nb += 1 final = np.argmax(final, axis=3) final = final.astype(np.uint8) out = np.copy(label) for k in range(0, zsh-patch_size+1, stride_size): for l in range(0, ysh-patch_size+1, stride_size): for m in range(0, xsh-patch_size+1, stride_size): tmp = label[k:k+patch_size, l:l+patch_size, m:m+patch_size] #if 0.1 * patch_size**3 < np.sum(tmp > 0) < 0.9 * patch_size**3: if np.any(tmp[1:]!=tmp[0,0,0]): out[k:k+patch_size, l:l+patch_size, m:m+patch_size] = final[k:k+patch_size, l:l+patch_size, m:m+patch_size] # save final out = out.astype(np.uint8) out = get_labels(out, allLabels) out = out[:z_shape, :y_shape, :x_shape] if header is not None: header = get_image_dimensions(header, out) if img_header is not None: header = get_physical_size(header, img_header) save_data(path_to_final, out, header=header, compress=compress)
# determine y subvolume blockmin_y = int(sub_y_i * sub_size_y) blockmax_y = int((sub_y_i + 1) * sub_size_y) datamin_y = max(blockmin_y - overlap, 0) datamax_y = min(blockmax_y + overlap, ysh) # determine x subvolume blockmin_x = int(sub_x_i * sub_size_x) blockmax_x = int((sub_x_i + 1) * sub_size_x) datamin_x = max(blockmin_x - overlap, 0) datamax_x = min(blockmax_x + overlap, xsh) # extract image subvolume data, _ = load_data(path_to_data, 'split_volume') save_data( BASE_DIR + f'/tmp/sub_volume_{subvolume}.tif', data[datamin_z:datamax_z, datamin_y:datamax_y, datamin_x:datamax_x], False) del data # extract label subvolume labelData, header, final_image_type = load_data( path_to_labels, 'split_volume', True) save_data( BASE_DIR + '/tmp/labels.sub_volume.tif', labelData[datamin_z:datamax_z, datamin_y:datamax_y, datamin_x:datamax_x]) del labelData # configure command cmd = [ 'mpiexec', '-np', f'{nump}', 'python3',
def _diffusion_child(comm, bm=None): rank = comm.Get_rank() ngpus = comm.Get_size() nodename = socket.gethostname() name = '%s %s' %(nodename, rank) print(name) if rank == 0: # reduce blocksize bm.data = np.copy(bm.data[bm.argmin_z:bm.argmax_z, bm.argmin_y:bm.argmax_y, bm.argmin_x:bm.argmax_x], order='C') bm.labelData = np.copy(bm.labelData[bm.argmin_z:bm.argmax_z, bm.argmin_y:bm.argmax_y, bm.argmin_x:bm.argmax_x], order='C') # domain decomposition sizeofblocks = (bm.argmax_z - bm.argmin_z) // ngpus blocks = [0] for k in range(ngpus-1): block_temp = blocks[-1] + sizeofblocks blocks.append(block_temp) blocks.append(bm.argmax_z - bm.argmin_z) print('blocks =', blocks) # read labeled slices if bm.label.allaxis: tmp = np.swapaxes(bm.labelData, 0, 1) tmp = np.ascontiguousarray(tmp) indices_01, _ = read_labeled_slices_allx(tmp) tmp = np.swapaxes(tmp, 0, 2) tmp = np.ascontiguousarray(tmp) indices_02, _ = read_labeled_slices_allx(tmp) # send data to childs for destination in range(ngpus-1,-1,-1): # ghost blocks blockmin = blocks[destination] blockmax = blocks[destination+1] datablockmin = blockmin - 100 datablockmax = blockmax + 100 datablockmin = 0 if datablockmin < 0 else datablockmin datablockmax = (bm.argmax_z - bm.argmin_z) if datablockmax > (bm.argmax_z - bm.argmin_z) else datablockmax datablock = np.copy(bm.data[datablockmin:datablockmax], order='C') labelblock = np.copy(bm.labelData[datablockmin:datablockmax], order='C') # read labeled slices if bm.label.allaxis: labelblock = labelblock.astype(np.int32) labelblock[:blockmin - datablockmin] = -1 labelblock[blockmax - datablockmin:] = -1 indices_child, labels_child = [], [] indices_00, labels_00 = read_labeled_slices_allx(labelblock) indices_child.append(indices_00) labels_child.append(labels_00) tmp = np.swapaxes(labelblock, 0, 1) tmp = np.ascontiguousarray(tmp) labels_01 = np.zeros((0, tmp.shape[1], tmp.shape[2]), dtype=np.int32) for slcIndex in indices_01: labels_01 = np.append(labels_01, [tmp[slcIndex]], axis=0) indices_child.append(indices_01) labels_child.append(labels_01) tmp = np.swapaxes(tmp, 0, 2) tmp = np.ascontiguousarray(tmp) labels_02 = np.zeros((0, tmp.shape[1], tmp.shape[2]), dtype=np.int32) for slcIndex in indices_02: labels_02 = np.append(labels_02, [tmp[slcIndex]], axis=0) indices_child.append(indices_02) labels_child.append(labels_02) else: labelblock[:blockmin - datablockmin] = 0 labelblock[blockmax - datablockmin:] = 0 indices_child, labels_child = read_labeled_slices(labelblock) # print indices of labels print('indices child %s:' %(destination), indices_child) if destination > 0: blocks_temp = blocks[:] blocks_temp[destination] = blockmin - datablockmin blocks_temp[destination+1] = blockmax - datablockmin dataListe = splitlargedata(datablock) sendToChild(comm, indices_child, destination, dataListe, labels_child, bm.label.nbrw, bm.label.sorw, blocks_temp, bm.label.allaxis, bm.allLabels, bm.label.smooth, bm.label.uncertainty, bm.platform) else: # select platform if bm.platform == 'cuda': import pycuda.driver as cuda cuda.init() dev = cuda.Device(rank) ctx, queue = dev.make_context(), None if bm.label.allaxis: from biomedisa_features.random_walk.pycuda_large_allx import walk else: from biomedisa_features.random_walk.pycuda_large import walk else: ctx, queue = _get_device(bm.platform, rank) from biomedisa_features.random_walk.pyopencl_large import walk # run random walks tic = time.time() memory_error, final, final_uncertainty, final_smooth = walk(comm, datablock, labels_child, indices_child, bm.label.nbrw, bm.label.sorw, blockmin-datablockmin, blockmax-datablockmin, name, bm.allLabels, bm.label.smooth, bm.label.uncertainty, ctx, queue) tac = time.time() print('Walktime_%s: ' %(name) + str(int(tac - tic)) + ' ' + 'seconds') # free device if bm.platform == 'cuda': ctx.pop() del ctx if memory_error: print('GPU out of memory. Image too large.') else: # gather data for source in range(1, ngpus): lendataListe = comm.recv(source=source, tag=0) for l in range(lendataListe): data_z, data_y, data_x = comm.recv(source=source, tag=10+(2*l)) receivedata = np.empty((data_z, data_y, data_x), dtype=np.uint8) comm.Recv([receivedata, MPI.BYTE], source=source, tag=10+(2*l+1)) final = np.append(final, receivedata, axis=0) # save finals final2 = np.zeros((bm.zsh, bm.ysh, bm.xsh), dtype=np.uint8) final2[bm.argmin_z:bm.argmax_z, bm.argmin_y:bm.argmax_y, bm.argmin_x:bm.argmax_x] = final final2 = final2[1:-1, 1:-1, 1:-1] save_data(bm.path_to_final, final2, bm.header, bm.final_image_type, bm.label.compression) # uncertainty if final_uncertainty is not None: final_uncertainty *= 255 final_uncertainty = final_uncertainty.astype(np.uint8) for source in range(1, ngpus): lendataListe = comm.recv(source=source, tag=0) for l in range(lendataListe): data_z, data_y, data_x = comm.recv(source=source, tag=10+(2*l)) receivedata = np.empty((data_z, data_y, data_x), dtype=np.uint8) comm.Recv([receivedata, MPI.BYTE], source=source, tag=10+(2*l+1)) final_uncertainty = np.append(final_uncertainty, receivedata, axis=0) # save finals final2 = np.zeros((bm.zsh, bm.ysh, bm.xsh), dtype=np.uint8) final2[bm.argmin_z:bm.argmax_z, bm.argmin_y:bm.argmax_y, bm.argmin_x:bm.argmax_x] = final_uncertainty final2 = final2[1:-1, 1:-1, 1:-1] save_data(bm.path_to_uq, final2, compress=bm.label.compression) # smooth if final_smooth is not None: for source in range(1, ngpus): lendataListe = comm.recv(source=source, tag=0) for l in range(lendataListe): data_z, data_y, data_x = comm.recv(source=source, tag=10+(2*l)) receivedata = np.empty((data_z, data_y, data_x), dtype=np.uint8) comm.Recv([receivedata, MPI.BYTE], source=source, tag=10+(2*l+1)) final_smooth = np.append(final_smooth, receivedata, axis=0) # save finals final2 = np.zeros((bm.zsh, bm.ysh, bm.xsh), dtype=np.uint8) final2[bm.argmin_z:bm.argmax_z, bm.argmin_y:bm.argmax_y, bm.argmin_x:bm.argmax_x] = final_smooth final2 = final2[1:-1, 1:-1, 1:-1] save_data(bm.path_to_smooth, final2, bm.header, bm.final_image_type, bm.label.compression) # print computation time t = int(time.time() - bm.TIC) if t < 60: time_str = str(t) + ' sec' elif 60 <= t < 3600: time_str = str(t // 60) + ' min ' + str(t % 60) + ' sec' elif 3600 < t: time_str = str(t // 3600) + ' h ' + str((t % 3600) // 60) + ' min ' + str(t % 60) + ' sec' print('Computation time:', time_str) else: lendataListe = comm.recv(source=0, tag=0) for k in range(lendataListe): data_z, data_y, data_x, data_dtype = comm.recv(source=0, tag=10+(2*k)) if k==0: data = np.zeros((0, data_y, data_x), dtype=data_dtype) data_temp = np.empty((data_z, data_y, data_x), dtype=data_dtype) if data_dtype == 'uint8': comm.Recv([data_temp, MPI.BYTE], source=0, tag=10+(2*k+1)) else: comm.Recv([data_temp, MPI.FLOAT], source=0, tag=10+(2*k+1)) data = np.append(data, data_temp, axis=0) nbrw, sorw, allx, smooth, uncertainty, platform = comm.recv(source=0, tag=1) if allx: labels = [] for k in range(3): lenlabelsListe = comm.recv(source=0, tag=2+k) for l in range(lenlabelsListe): labels_z, labels_y, labels_x = comm.recv(source=0, tag=100+(2*k)) if l==0: labels_tmp = np.zeros((0, labels_y, labels_x), dtype=np.int32) tmp = np.empty((labels_z, labels_y, labels_x), dtype=np.int32) comm.Recv([tmp, MPI.INT], source=0, tag=100+(2*k+1)) labels_tmp = np.append(labels_tmp, tmp, axis=0) labels.append(labels_tmp) else: lenlabelsListe = comm.recv(source=0, tag=2) for k in range(lenlabelsListe): labels_z, labels_y, labels_x = comm.recv(source=0, tag=100+(2*k)) if k==0: labels = np.zeros((0, labels_y, labels_x), dtype=np.int32) tmp = np.empty((labels_z, labels_y, labels_x), dtype=np.int32) comm.Recv([tmp, MPI.INT], source=0, tag=100+(2*k+1)) labels = np.append(labels, tmp, axis=0) allLabels = comm.recv(source=0, tag=99) indices = comm.recv(source=0, tag=8) blocks = comm.recv(source=0, tag=9) blockmin = blocks[rank] blockmax = blocks[rank+1] # select platform if platform == 'cuda': import pycuda.driver as cuda cuda.init() dev = cuda.Device(rank) ctx, queue = dev.make_context(), None if allx: from biomedisa_features.random_walk.pycuda_large_allx import walk else: from biomedisa_features.random_walk.pycuda_large import walk else: ctx, queue = _get_device(platform, rank) from biomedisa_features.random_walk.pyopencl_large import walk # run random walks tic = time.time() memory_error, final, final_uncertainty, final_smooth = walk(comm, data, labels, indices, nbrw, sorw, blockmin, blockmax, name, allLabels, smooth, uncertainty, ctx, queue) tac = time.time() print('Walktime_%s: ' %(name) + str(int(tac - tic)) + ' ' + 'seconds') # free device if platform == 'cuda': ctx.pop() del ctx # send finals if not memory_error: dataListe = splitlargedata(final) comm.send(len(dataListe), dest=0, tag=0) for k, dataTemp in enumerate(dataListe): dataTemp = dataTemp.copy(order='C') comm.send([dataTemp.shape[0], dataTemp.shape[1], dataTemp.shape[2]], dest=0, tag=10+(2*k)) comm.Send([dataTemp, MPI.BYTE], dest=0, tag=10+(2*k+1)) if final_uncertainty is not None: final_uncertainty *= 255 final_uncertainty = final_uncertainty.astype(np.uint8) dataListe = splitlargedata(final_uncertainty) comm.send(len(dataListe), dest=0, tag=0) for k, dataTemp in enumerate(dataListe): dataTemp = dataTemp.copy(order='C') comm.send([dataTemp.shape[0], dataTemp.shape[1], dataTemp.shape[2]], dest=0, tag=10+(2*k)) comm.Send([dataTemp, MPI.BYTE], dest=0, tag=10+(2*k+1)) if final_smooth is not None: dataListe = splitlargedata(final_smooth) comm.send(len(dataListe), dest=0, tag=0) for k, dataTemp in enumerate(dataListe): dataTemp = dataTemp.copy(order='C') comm.send([dataTemp.shape[0], dataTemp.shape[1], dataTemp.shape[2]], dest=0, tag=10+(2*k)) comm.Send([dataTemp, MPI.BYTE], dest=0, tag=10+(2*k+1))
def crop_volume(img, path_to_volume, path_to_model, z_shape, y_shape, x_shape, batch_size, debug_cropping, x_puffer=25,y_puffer=25,z_puffer=25): # path to cropped image filename = os.path.basename(path_to_volume) filename = os.path.splitext(filename)[0] if filename[-4:] in ['.nii']: filename = filename[:-4] filename = filename + '_cropped.tif' path_to_final = path_to_volume.replace(os.path.basename(path_to_volume), filename) # img shape zsh, ysh, xsh, channels = img.shape # list of IDs list_IDs = [x for x in range(zsh)] # make length of list divisible by batch size rest = batch_size - (len(list_IDs) % batch_size) list_IDs = list_IDs + list_IDs[:rest] # parameters params = {'dim': (ysh,xsh), 'dim_img': (zsh, ysh, xsh), 'batch_size': batch_size, 'n_channels': channels} # data generator predict_generator = PredictDataGeneratorCrop(img, list_IDs, **params) # create a MirroredStrategy if os.name == 'nt': cdo = tf.distribute.HierarchicalCopyAllReduce() else: cdo = tf.distribute.NcclAllReduce() strategy = tf.distribute.MirroredStrategy(cross_device_ops=cdo) # input shape input_shape = (ysh, xsh, channels) # load model with strategy.scope(): model = make_densenet(input_shape) # load weights hf = h5py.File(path_to_model, 'r') cropping_weights = hf.get('cropping_weights') iterator = 0 for layer in model.layers: if layer.get_weights() != []: new_weights = [] for arr in layer.get_weights(): new_weights.append(cropping_weights.get(str(iterator))) iterator += 1 layer.set_weights(new_weights) hf.close() # predict probabilities = model.predict(predict_generator, verbose=0, steps=None) probabilities = probabilities[:zsh] probabilities = np.ravel(probabilities) # plot prediction if debug_cropping: import matplotlib.pyplot as plt import matplotlib x = range(len(probabilities)) y = list(probabilities) plt.plot(x, y) # create mask probabilities[probabilities > 0.5] = 1 probabilities[probabilities <= 0.5] = 0 # remove outliers for k in range(4,zsh-4): if np.all(probabilities[k-1:k+2] == np.array([0,1,0])): probabilities[k-1:k+2] = 0 elif np.all(probabilities[k-2:k+2] == np.array([0,1,1,0])): probabilities[k-2:k+2] = 0 elif np.all(probabilities[k-2:k+3] == np.array([0,1,1,1,0])): probabilities[k-2:k+3] = 0 elif np.all(probabilities[k-3:k+3] == np.array([0,1,1,1,1,0])): probabilities[k-3:k+3] = 0 elif np.all(probabilities[k-3:k+4] == np.array([0,1,1,1,1,1,0])): probabilities[k-3:k+4] = 0 elif np.all(probabilities[k-4:k+4] == np.array([0,1,1,1,1,1,1,0])): probabilities[k-4:k+4] = 0 elif np.all(probabilities[k-4:k+5] == np.array([0,1,1,1,1,1,1,1,0])): probabilities[k-4:k+5] = 0 # plot cleaned result if debug_cropping: y = list(probabilities) plt.plot(x, y, '--') plt.tight_layout() # To prevent overlapping of subplots matplotlib.use("GTK3Agg") plt.savefig(path_to_final.replace('.tif','.png'), dpi=300) # create final z_upper = max(0,np.argmax(probabilities[:z_shape]) - z_puffer) z_lower = min(z_shape,z_shape - np.argmax(np.flip(probabilities[:z_shape])) + z_puffer +1) y_upper = max(0,np.argmax(probabilities[z_shape:z_shape+y_shape]) - y_puffer) y_lower = min(y_shape,y_shape - np.argmax(np.flip(probabilities[z_shape:z_shape+y_shape])) + y_puffer +1) x_upper = max(0,np.argmax(probabilities[z_shape+y_shape:]) - x_puffer) x_lower = min(x_shape,x_shape - np.argmax(np.flip(probabilities[z_shape+y_shape:])) + x_puffer +1) # crop image data if debug_cropping: volume, _ = load_data(path_to_volume) final = volume[z_upper:z_lower,y_upper:y_lower,x_upper:x_lower] save_data(path_to_final, final, compress=False) return z_upper, z_lower, y_upper, y_lower, x_upper, x_lower