def get_content(self): # 1.提取基本文本块 self.parser = Parser(self.url) ns_list = self.parser.ns() self.title = self.parser.get_title() # 2.文本串分块 self.partitioner = Partitioner() blocks = self.partitioner.partition(ns_list) # 3.抽取正文块,副产品为分析信息 self.judge = Judge(self.title.string, ns_list) res = self.judge.select(blocks, ns_list) flag = res['flag'] cblock = res['block'] confidence = res['confidence'] detail = res['detail'] #if flag: content = cblock.to_str() (srcs, images) = self.get_images(cblock) cblock = self.insert_images(cblock, images) content_with_format = cblock.to_str_with_format() #else: # content = "" # content_with_format = "" # srcs = None return (flag, self.title.string.strip(), content, content_with_format, srcs, confidence, detail)
def __init__(self, context, partition_set): self.__aggregation_sets = partition_set self.__aggregation_sets[c.KEY_TABLES] = {} self.__partitioner = Partitioner(context[c.KEY_PARTITIONS], context[c.KEY_SEPERATOR_PARTITION]) self.__context = context self.__info = {} self.__info[c.INFO_TOTAL_BYTES] = 0 self.__info[c.INFO_TOTAL_ROWS] = 0 self.__info[c.INFO_TOTAL_MESSAGES] = 0 self.__logger = logging.getLogger() self.__logger.setLevel(logging.ERROR)
def __init__(self, table_type, storage_provider): # bind the underlaying block device providing class instance # to this object (e.g loop) if present. This is done to guarantee # the correct destructor order when the device should be released. self.storage_provider = storage_provider self.partition_map = {} self.partition_id_map = {} self.partition_id = {} self.is_mapped = False self.partitioner = Partitioner( table_type, storage_provider ) self.table_type = table_type
data_orig = xr.open_dataarray(filepath) # let's first try only one var data = data_orig[0, :, :, :].copy() shape = np.shape(data) nx = shape[0] ny = shape[1] nz = shape[2] # making shape parameters available everywhere nx = comm.bcast(nx, root=0) ny = comm.bcast(ny, root=0) nz = comm.bcast(nz, root=0) print(nx, ny, nz) # setting up the partitioner # the field dimensions need to be the real ones - the halo points. p = Partitioner(comm, [nx, ny - 2 * 2, nz - 2 * 2], num_halo=2) # distribute the work onto the ranks data_work = p.scatter(data) """ # subset more for speedup of first tests print(f'subset even more because very large dataset') data = data[:,::10,:,:] """ # create a mask of nans mask = ~np.isnan(data_work) # nan values have zero weight (i.e. are False) # gapfilling the missing values with spatiotemporal mean print('gapfilling missing values with spatiotemporal mean') tic = datetime.now()
def main(nx, ny, nz, num_iter, num_halo=2, plot_result=False): """Driver for apply_diffusion that sets up fields and does timings""" assert 0 < nx <= 1024 * 1024, 'You have to specify a reasonable value for nx' assert 0 < ny <= 1024 * 1024, 'You have to specify a reasonable value for ny' assert 0 < nz <= 1024, 'You have to specify a reasonable value for nz' assert 0 < num_iter <= 1024 * 1024, 'You have to specify a reasonable value for num_iter' assert 0 < num_halo <= 256, 'Your have to specify a reasonable number of halo points' alpha = 1. / 32. comm = MPI.COMM_WORLD rank = comm.Get_rank() p = Partitioner(comm, [nz, ny, nx], num_halo) if rank == 0: f = np.zeros((nz, ny + 2 * num_halo, nx + 2 * num_halo)) # Option 1: Original stencil2d-mpi during HPC4WC course: # f[nz // 4:3 * nz // 4, num_halo + ny // 4:num_halo + 3 * ny // 4, num_halo + nx // 4:num_halo + 3 * nx // 4] = 1.0 # Option 2: Similar to option 1, but positive region extended towards tile edges: # f[nz // 10:9 * nz // 10, num_halo + ny // 10:num_halo + 9 * ny // 10, num_halo + nx // 10:num_halo + 9 * nx // 10] = 1.0 # Option 3: One positive region in bottom-left (0-0) corner, one positive region in top-right (ny-nx) corner # f[nz // 4:3 * nz // 4, num_halo:num_halo + ny // 4, num_halo:num_halo + nx // 4] = 1.0 # f[nz // 4:3 * nz // 4, num_halo + 3 * ny // 4:-num_halo, num_halo + 3 * nx // 4:-num_halo] = 1.0 # Option 4: Positive region line prime number fraction off-center across tile: f[nz // 4:3 * nz // 4, num_halo + ny // 7:num_halo + 2 * ny // 7, num_halo:-num_halo] = 1.0 else: f = np.empty(1) in_field = p.scatter(f) out_field = np.copy(in_field) f = p.gather(in_field) if rank == 0: np.save('in_field', f) if plot_result: plt.ioff() plt.imshow(f[in_field.shape[0] // 2, :, :], origin='lower') plt.colorbar() plt.savefig('in_field.png') plt.close() # warmup caches apply_diffusion(in_field, out_field, alpha, num_halo, p=p) comm.Barrier() # time the actual work tic = time.time() apply_diffusion(in_field, out_field, alpha, num_halo, num_iter=num_iter, p=p) toc = time.time() comm.Barrier() if rank == 0: print("Elapsed time for work = {} s".format(toc - tic)) update_halo(out_field, num_halo, p) f = p.gather(out_field) if rank == 0: np.save('out_field', f) if plot_result: plt.imshow(f[out_field.shape[0] // 2, :, :], origin='lower') plt.colorbar() plt.savefig('out_field.png') plt.close()
def main(nx, ny, nz, num_iter, num_halo=2, plot_result=False): """Driver for apply_diffusion that sets up fields and does timings""" assert 0 < nx <= 1024 * 1024, 'You have to specify a reasonable value for nx' assert 0 < ny <= 1024 * 1024, 'You have to specify a reasonable value for ny' assert 0 < nz <= 1024, 'You have to specify a reasonable value for nz' assert 0 < num_iter <= 1024 * 1024, 'You have to specify a reasonable value for num_iter' assert 0 < num_halo <= 256, 'Your have to specify a reasonable number of halo points' alpha = 1. / 32. comm = MPI.COMM_WORLD rank = comm.Get_rank() p = Partitioner(comm, [nz, ny, nx], num_halo) if rank == 0: f = np.zeros((nz, ny + 2 * num_halo, nx + 2 * num_halo)) f[nz // 4:3 * nz // 4, num_halo + ny // 4:num_halo + 3 * ny // 4, num_halo + nx // 4:num_halo + 3 * nx // 4] = 1.0 else: f = np.empty(1) in_field = p.scatter(f) out_field = np.copy(in_field) f = p.gather(in_field) if rank == 0: np.save('in_field', f) if plot_result: plt.ioff() plt.imshow(f[in_field.shape[0] // 2, :, :], origin='lower') plt.colorbar() plt.savefig('in_field.png') plt.close() # warmup caches apply_diffusion(in_field, out_field, alpha, num_halo, p=p) comm.Barrier() # time the actual work tic = time.time() apply_diffusion(in_field, out_field, alpha, num_halo, num_iter=num_iter, p=p) toc = time.time() comm.Barrier() if rank == 0: print("Elapsed time for work = {} s".format(toc - tic)) update_halo(out_field, num_halo, p) f = p.gather(out_field) if rank == 0: np.save('out_field', f) if plot_result: plt.imshow(f[out_field.shape[0] // 2, :, :], origin='lower') plt.colorbar() plt.savefig('out_field.png') plt.close()