def forward(Q, cfg, args, timeout): """To be executed by a local thread. Pops items from the queue and forwards them.""" global comm, rank logger = logging.getLogger("middleman") logger.info( f"Worker: Creating writer_gen: engine={cfg[args.transport_tx]['engine']}" ) # suffix = "" # if not args.debug else '-MM' ch_name = gen_channel_name(cfg["diagnostic"]) writer = writer_gen(cfg[args.transport_tx], ch_name) logger.info(f"Worker: Streaming channel name = {ch_name}") tx_list = [] is_first = True while True: # msg = None try: msg = Q.get(timeout=timeout) logger.info( f"Worker: Receiving from Queue: {msg} - {msg.data.shape}, {msg.data.dtype}" ) if is_first: writer.DefineVariable( gen_var_name(cfg)[rank], msg.data.shape, msg.data.dtype) #if msg.attrs is not None: writer.DefineAttributes("stream_attrs", msg.attrs) logger.info( f"Worker: Defining stream_attrs for forwarded stream: {msg.attrs}" ) writer.Open() logger.info("Worker: Starting forwarding process") is_first = False except queue.Empty: logger.info( "Worker: Empty queue after waiting until time-out. Exiting") break logger.info( f"Worker Forwarding chunk {msg.tstep_idx}. Data = {msg.data.shape}" ) writer.BeginStep() writer.put_data(msg) writer.EndStep() logger.info(f"Worker: Done writing chunk {msg.tstep_idx}.") tx_list.append(msg.tstep_idx) Q.task_done() logger.info(f"Consumed tidx={msg.tstep_idx}") writer.Close() logger.info( f"Worker: Exiting send loop. Transmitted {len(tx_list)} time chunks: {tx_list}" ) logger.info(writer.transfer_stats())
def main(): """Procesess a stream of data chunks on an executor.""" comm = MPI.COMM_WORLD # Parse command line arguments and read configuration file parser = argparse.ArgumentParser( description="Receive data and dispatch analysis" + "tasks to a mpi queue") parser.add_argument('--config', type=str, help='Lists the configuration file', default='configs/config_null.json') parser.add_argument( "--num_ranks_preprocess", type=int, help="Number of processes used in preprocessing executor", default=4) parser.add_argument("--num_ranks_analysis", type=int, help="Number of processes used in analysis executor", default=4) parser.add_argument( "--num_queue_threads", type=int, help="Number of worker threads that consume item from the queue", default=4) parser.add_argument( "--transport", type=str, help="Specifies the transport section used to configure the reader", default="transport_rx") parser.add_argument( "--run_id", type=str, help="Name of database collection to store analysis results in", required=True) args = parser.parse_args() with open(args.config, "r") as df: cfg = json.load(df) df.close() # Load logger configuration from file: # http://zetcode.com/python/logging/ with open("configs/logger.yaml", "r") as f: log_cfg = yaml.safe_load(f.read()) logging.config.dictConfig(log_cfg) logger = logging.getLogger('simple') # PoolExecutor for pre-processing, on-node. executor_pre = ThreadPoolExecutor(max_workers=args.num_ranks_preprocess) # PoolExecutor for data analysis. off-node executor_anl = MPIPoolExecutor(max_workers=args.num_ranks_analysis) stream_varname = gen_var_name(cfg)[0] cfg["run_id"] = args.run_id cfg["storage"]["run_id"] = cfg["run_id"] logger.info(f"Starting run {cfg['run_id']}") # Instantiate a storage backend and store the run configuration and task configuration store_type = get_storage_object(cfg["storage"]) store_backend = store_type(cfg["storage"]) store_backend.store_one({"run_id": cfg['run_id'], "run_config": cfg}) # TODO: (RMC) Should this be moved to where cfg updated? # (would allow updating channels to process remotely) reader = reader_gen(cfg[args.transport], gen_channel_name(cfg["diagnostic"])) reader.Open() dq = queue.Queue() # In a streaming setting, (SST, dataman) attributes can only be accessed after # reading the first time step of a variable. # Initialize stream_attrs with None and load it in the main loop below. stream_attrs = None data_model_gen = data_model_generator(cfg["diagnostic"]) my_preprocessor = preprocessor(executor_pre, cfg) my_task_list = tasklist(executor_anl, cfg) worker_thread_list = [] for _ in range(args.num_queue_threads): new_worker = threading.Thread(target=consume, args=(dq, my_task_list, my_preprocessor)) new_worker.start() worker_thread_list.append(new_worker) logger.info("Starting main loop") tic_main = time.perf_counter() rx_list = [] while True: stepStatus = reader.BeginStep(timeoutSeconds=5.0) if stepStatus: # Load attributes if stream_attrs is None: logger.info("Waiting for attributes") stream_attrs = reader.get_attrs("stream_attrs") logger.info(f"Got attributes: {stream_attrs}") # Read data stream_data = reader.Get(stream_varname, save=False) # if reader.CurrentStep() in [0, 140]: rx_list.append(reader.CurrentStep()) # Create a datamodel instance from the raw data and push into the queue msg = data_model_gen.new_chunk(stream_data, stream_attrs, reader.CurrentStep()) dq.put_nowait(msg) logger.info(f"Published tidx {reader.CurrentStep()}") reader.EndStep() else: logger.info(f"Exiting: StepStatus={stepStatus}") break if reader.CurrentStep() > 100: break dq.join() logger.info("Queue joined") logger.info("Exiting main loop") for thr in worker_thread_list: thr.join() logger.info("Workers have joined") # Shutdown the executioner executor_anl.shutdown(wait=True) executor_pre.shutdown(wait=True) # executor.shutdown(wait=True) toc_main = time.perf_counter() logger.info( f"Run {cfg['run_id']} finished in {(toc_main - tic_main):6.4f}s") logger.info(f"Processed {len(rx_list)} time_chunks: {rx_list}")
def main(): """Reads items from a ADIOS2 connection and forwards them.""" global comm, rank, args comm = MPI.COMM_WORLD rank = comm.Get_rank() parser = argparse.ArgumentParser(description="Receive data and dispatch" + "analysis tasks to a mpi queue") parser.add_argument('--config', type=str, help='Lists the configuration file', default='configs/config-middle.json') parser.add_argument( "--transport_rx", help= "Specifies the name of the transport section that is used to configure the reader", default="transport_rx") parser.add_argument( "--transport_tx", help= "Specifies the name of the transport section that is used to configure the writer", default="transport_tx") args = parser.parse_args() with open(args.config, "r") as df: cfg = json.load(df) timeout = 5 # The middleman uses both a reader and a writer. Each is configured with using # their respective section of the config file. Therefore some keys are duplicated, # such as channel_range. Make sure that these items are the same in both sections with open("configs/logger.yaml", "r") as f: log_cfg = yaml.safe_load(f.read()) logging.config.dictConfig(log_cfg) logger = logging.getLogger('middleman') # Create ADIOS reader object reader = reader_gen(cfg[args.transport_rx], gen_channel_name(cfg["diagnostic"])) reader.Open() stream_attrs = None stream_varname = gen_var_name(cfg)[rank] logger.info(f"Main: Stream varname: {stream_varname}") dq = queue.Queue() msg = None worker = threading.Thread(target=forward, args=(dq, cfg, args, timeout)) worker.start() rx_list = [] stream_data = None while True: stepStatus = reader.BeginStep() logger.info( f"Main: stepStatus = {stepStatus}, currentStep = {reader.CurrentStep()}" ) if stepStatus: # Read data stream_data = reader.Get(stream_varname, save=False) if stream_attrs is None: if reader.InquireAttribute("stream_attrs"): stream_attrs = reader.get_attrs("stream_attrs") stream_data = reader.Get(stream_varname, save=False) rx_list.append(reader.CurrentStep()) # Generate message id and publish is msg = AdiosMessage(tstep_idx=reader.CurrentStep(), data=stream_data, attrs=stream_attrs) dq.put_nowait(msg) logger.info(f"Main: Published message {msg}") reader.EndStep() else: logger.info(f"Main: Exiting: StepStatus={stepStatus}") break # last_step = reader.CurrentStep() logger.info("Main: Exiting main loop") worker.join() logger.info("Main: Workers have joined") dq.join() logger.info("Main: Queue joined") logger.info("Main: Finished")
with open(args.config, "r") as df: cfg = json.load(df) # Set up the logger with open('configs/logger.yaml', 'r') as f: log_cfg = yaml.safe_load(f.read()) logging.config.dictConfig(log_cfg) logger = logging.getLogger("generator") logger.info("Starting up...") # Instantiate a dataloader dataloader = get_loader(cfg) configname = "transport_tx" if not args.kstar else "transport_rx" logger.info(f"Creating writer_gen: engine={cfg[configname]['engine']}") writer = writer_gen(cfg[configname], gen_channel_name(cfg["diagnostic"])) logger.info(f"Streaming channel name = {gen_channel_name(cfg['diagnostic'])}") # Give the writer hints on what kind of data to transfer writer.DefineVariable( gen_var_name(cfg)[rank], dataloader.get_chunk_shape(), dataloader.dtype) # TODO: Clean up naming conventions for stream attributes logger.info(f"Writing attributes: {dataloader.attrs}") writer.Open() writer.DefineAttributes("stream_attrs", dataloader.attrs) logger.info("Start sending on channel:") batch_gen = dataloader.batch_generator() for nstep, chunk in enumerate(batch_gen): # TODO: Do we want to place filtering in the generator? This would allow us to
with open(args.config, "r") as df: cfg = json.load(df) # Set up the logger with open('configs/logger.yaml', 'r') as f: log_cfg = yaml.safe_load(f.read()) logging.config.dictConfig(log_cfg) logger = logging.getLogger("generator") logger.info("Starting up...") # Instantiate a dataloader dataloader = get_loader(cfg) sectionname = "transport_tx" if not args.kstar else "transport_rx" logger.info(f"Creating writer_gen: engine={cfg[sectionname]['engine']}") writer = writer_gen(cfg[sectionname], gen_channel_name(cfg["diagnostic"])) logger.info(f"Streaming channel name = {gen_channel_name(cfg['diagnostic'])}") # Give the writer hints on what kind of data to transfer writer.DefineVariable(gen_var_name(cfg)[rank], dataloader.get_chunk_shape(), dataloader.dtype) # TODO: Clean up naming conventions for stream attributes logger.info(f"Writing attributes: {dataloader.attrs}") writer.Open() writer.DefineAttributes("stream_attrs", dataloader.attrs) logger.info("Start sending on channel:") batch_gen = dataloader.batch_generator() for nstep, chunk in enumerate(batch_gen):