def main(): comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() # Parse command line arguments and read configuration file parser = argparse.ArgumentParser( description="Receive data and dispatch analysis tasks to a mpi queue") parser.add_argument('--config', type=str, help='Lists the configuration file', default='configs/config_null.json') parser.add_argument('--benchmark', action="store_true") args = parser.parse_args() global cfg with open(args.config, "r") as df: cfg = json.load(df) df.close() # Load logger configuration from file: # http://zetcode.com/python/logging/ with open("configs/logger.yaml", "r") as f: log_cfg = yaml.safe_load(f.read()) logging.config.dictConfig(log_cfg) logger = logging.getLogger('simple') # Create a global executor #executor = concurrent.futures.ThreadPoolExecutor(max_workers=60) executor_fft = MPIPoolExecutor(max_workers=16) executor_anl = MPIPoolExecutor(max_workers=16) #executor = MPIPoolExecutor(max_workers=120) adios2_varname = channel_range.from_str( cfg["transport_nersc"]["channel_range"][0]) cfg["run_id"] = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) cfg["run_id"] = "ABC128" cfg["storage"]["run_id"] = cfg["run_id"] logger.info(f"Starting run {cfg['run_id']}") # Instantiate a storage backend and store the run configuration and task configuration if cfg['storage']['backend'] == "numpy": store_backend = backends.backend_numpy(cfg['storage']) elif cfg['storage']['backend'] == "mongo": store_backend = backends.backend_mongodb(cfg["storage"]) elif cfg['storage']['backend'] == "null": store_backend = backends.backend_null(cfg['storage']) else: raise NameError( f"Unknown storage backend requested: {cfg['storage']['backend']}") store_backend.store_one({"run_id": cfg['run_id'], "run_config": cfg}) logger.info(f"Stored one") # Create ADIOS reader object reader = reader_gen(cfg["transport_nersc"]) task_list = task_list_spectral(executor_anl, executor_fft, cfg["task_list"], cfg["fft_params"], cfg["ECEI_cfg"], cfg["storage"]) #task_list = task_list_spectral(executor, cfg["task_list"], cfg["fft_params"], cfg["ECEI_cfg"], cfg["storage"]) dq = queue.Queue() msg = None # tic_main = timeit.default_timer() workers = [] for _ in range(4): worker = threading.Thread(target=consume, args=(dq, task_list)) worker.start() workers.append(worker) # reader.Open() is blocking until it opens the data file or receives the # data stream. Put this right before entering the main loop logger.info(f"{rank} Waiting for generator") reader.Open() logger.info(f"Starting main loop") rx_list = [] while True: stepStatus = reader.BeginStep() if stepStatus: # Read data stream_data = reader.Get(adios2_varname, save=False) rx_list.append(reader.CurrentStep()) # Generate message id and publish msg = AdiosMessage(tstep_idx=reader.CurrentStep(), data=stream_data) dq.put_nowait(msg) logger.info(f"Published tidx {msg.tstep_idx}") reader.EndStep() else: logger.info(f"Exiting: StepStatus={stepStatus}") break if reader.CurrentStep() > 100: break dq.join() logger.info("Queue joined") logger.info("Exiting main loop") for thr in workers: thr.join() logger.info("Workers have joined") # Shotdown the executioner executor_anl.shutdown(wait=True) executor_fft.shutdown(wait=True) #executor.shutdown(wait=True) toc_main = timeit.default_timer() logger.info( f"Run {cfg['run_id']} finished in {(toc_main - tic_main):6.4f}s") logger.info(f"Processed {len(rx_list)} time_chunks: {rx_list}")
def main(): """Procesess a stream of data chunks on an executor.""" comm = MPI.COMM_WORLD # Parse command line arguments and read configuration file parser = argparse.ArgumentParser( description="Receive data and dispatch analysis" + "tasks to a mpi queue") parser.add_argument('--config', type=str, help='Lists the configuration file', default='configs/config_null.json') parser.add_argument( "--num_ranks_preprocess", type=int, help="Number of processes used in preprocessing executor", default=4) parser.add_argument("--num_ranks_analysis", type=int, help="Number of processes used in analysis executor", default=4) parser.add_argument( "--num_queue_threads", type=int, help="Number of worker threads that consume item from the queue", default=4) parser.add_argument( "--transport", type=str, help="Specifies the transport section used to configure the reader", default="transport_rx") parser.add_argument( "--run_id", type=str, help="Name of database collection to store analysis results in", required=True) args = parser.parse_args() with open(args.config, "r") as df: cfg = json.load(df) df.close() # Load logger configuration from file: # http://zetcode.com/python/logging/ with open("configs/logger.yaml", "r") as f: log_cfg = yaml.safe_load(f.read()) logging.config.dictConfig(log_cfg) logger = logging.getLogger('simple') # PoolExecutor for pre-processing, on-node. executor_pre = ThreadPoolExecutor(max_workers=args.num_ranks_preprocess) # PoolExecutor for data analysis. off-node executor_anl = MPIPoolExecutor(max_workers=args.num_ranks_analysis) stream_varname = gen_var_name(cfg)[0] cfg["run_id"] = args.run_id cfg["storage"]["run_id"] = cfg["run_id"] logger.info(f"Starting run {cfg['run_id']}") # Instantiate a storage backend and store the run configuration and task configuration store_type = get_storage_object(cfg["storage"]) store_backend = store_type(cfg["storage"]) store_backend.store_one({"run_id": cfg['run_id'], "run_config": cfg}) # TODO: (RMC) Should this be moved to where cfg updated? # (would allow updating channels to process remotely) reader = reader_gen(cfg[args.transport], gen_channel_name(cfg["diagnostic"])) reader.Open() dq = queue.Queue() # In a streaming setting, (SST, dataman) attributes can only be accessed after # reading the first time step of a variable. # Initialize stream_attrs with None and load it in the main loop below. stream_attrs = None data_model_gen = data_model_generator(cfg["diagnostic"]) my_preprocessor = preprocessor(executor_pre, cfg) my_task_list = tasklist(executor_anl, cfg) worker_thread_list = [] for _ in range(args.num_queue_threads): new_worker = threading.Thread(target=consume, args=(dq, my_task_list, my_preprocessor)) new_worker.start() worker_thread_list.append(new_worker) logger.info("Starting main loop") tic_main = time.perf_counter() rx_list = [] while True: stepStatus = reader.BeginStep(timeoutSeconds=5.0) if stepStatus: # Load attributes if stream_attrs is None: logger.info("Waiting for attributes") stream_attrs = reader.get_attrs("stream_attrs") logger.info(f"Got attributes: {stream_attrs}") # Read data stream_data = reader.Get(stream_varname, save=False) # if reader.CurrentStep() in [0, 140]: rx_list.append(reader.CurrentStep()) # Create a datamodel instance from the raw data and push into the queue msg = data_model_gen.new_chunk(stream_data, stream_attrs, reader.CurrentStep()) dq.put_nowait(msg) logger.info(f"Published tidx {reader.CurrentStep()}") reader.EndStep() else: logger.info(f"Exiting: StepStatus={stepStatus}") break if reader.CurrentStep() > 100: break dq.join() logger.info("Queue joined") logger.info("Exiting main loop") for thr in worker_thread_list: thr.join() logger.info("Workers have joined") # Shutdown the executioner executor_anl.shutdown(wait=True) executor_pre.shutdown(wait=True) # executor.shutdown(wait=True) toc_main = time.perf_counter() logger.info( f"Run {cfg['run_id']} finished in {(toc_main - tic_main):6.4f}s") logger.info(f"Processed {len(rx_list)} time_chunks: {rx_list}")
def main(): # Parse command line arguments and read configuration file parser = argparse.ArgumentParser( description="Receive data and dispatch analysis tasks to a mpi queue") parser.add_argument('--config', type=str, help='Lists the configuration file', default='configs/config_null.json') parser.add_argument('--benchmark', action="store_true") args = parser.parse_args() global cfg with open(args.config, "r") as df: cfg = json.load(df) df.close() # Load logger configuration from file: # http://zetcode.com/python/logging/ with open("configs/logger.yaml", "r") as f: log_cfg = yaml.safe_load(f.read()) logging.config.dictConfig(log_cfg) comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() # Create a global executor #executor = concurrent.futures.ThreadPoolExecutor(max_workers=60) #executor = MPIPoolExecutor(max_workers=24) adios2_varname = channel_range.from_str( cfg["transport"]["channel_range"][0]) with MPICommExecutor(MPI.COMM_WORLD) as executor: if executor is not None: logger = logging.getLogger('simple') cfg["run_id"] = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(6)) cfg["run_id"] = "ABC125" cfg["storage"]["run_id"] = cfg["run_id"] logger.info(f"Starting run {cfg['run_id']}") # Instantiate a storage backend and store the run configuration and task configuration if cfg['storage']['backend'] == "numpy": store_backend = backends.backend_numpy(cfg['storage']) elif cfg['storage']['backend'] == "mongo": store_backend = backends.backend_mongodb(cfg) elif cfg['storage']['backend'] == "null": store_backend = backends.backend_null(cfg['storage']) logger.info("Storing one") store_backend.store_one({ "run_id": cfg['run_id'], "run_config": cfg }) logger.info("Done storing. Continuing:") # Create the FFT task cfg["fft_params"]["fsample"] = cfg["ECEI_cfg"]["SampleRate"] * 1e3 my_fft = task_fft_scipy(10_000, cfg["fft_params"], normalize=True, detrend=True) fft_params = my_fft.get_fft_params() # Create ADIOS reader object reader = reader_gen(cfg["transport"]) # Create a list of individual spectral tasks #task_list = [] #for task_config in cfg["task_list"]: # #task_list.append(task_spectral(task_config, fft_params, cfg["ECEI_cfg"])) # #task_list.append(task_spectral(task_config, cfg["fft_params"], cfg["ECEI_cfg"], cfg["storage"])) # #store_backend.store_metadata(task_config, task_list[-1].get_dispatch_sequence()) dq = queue.Queue() msg = None tic_main = timeit.default_timer() workers = [] for _ in range(16): #thr = ConsumeThread(dq, executor, task_list, cfg) worker = threading.Thread(target=consume, args=(dq, executor, my_fft, task_list)) worker.start() workers.append(worker) # logger.info(f"Started thread {thr}") # reader.Open() is blocking until it opens the data file or receives the # data stream. Put this right before entering the main loop logger.info(f"{rank} Waiting for generator") reader.Open() last_step = 0 logger.info(f"Starting main loop") rx_list = [] while True: stepStatus = reader.BeginStep() logger.info(f"currentStep = {reader.CurrentStep()}") if stepStatus: # Read data stream_data = reader.Get(adios2_varname, save=False) rx_list.append(reader.CurrentStep()) # Generate message id and publish is msg = AdiosMessage(tstep_idx=reader.CurrentStep(), data=stream_data) dq.put_nowait(msg) logger.info(f"Published message {msg}") reader.EndStep() else: logger.info(f"Exiting: StepStatus={stepStatus}") break #Early stopping for debug if reader.CurrentStep() > 100: logger.info( f"Exiting: CurrentStep={reader.CurrentStep()}, StepStatus={stepStatus}" ) dq.put(AdiosMessage(tstep_idx=None, data=None)) break last_step = reader.CurrentStep() dq.join() logger.info("Queue joined") logger.info("Exiting main loop") for thr in workers: thr.join() logger.info("Workers have joined") # Shotdown the executioner executor.shutdown(wait=True) toc_main = timeit.default_timer() logger.info( f"Run {cfg['run_id']} finished in {(toc_main - tic_main):6.4f}s" ) logger.info(f"Processed time_chunks {rx_list}")
def main(): """Reads items from a ADIOS2 connection and forwards them.""" global comm, rank, args comm = MPI.COMM_WORLD rank = comm.Get_rank() parser = argparse.ArgumentParser(description="Receive data and dispatch" + "analysis tasks to a mpi queue") parser.add_argument('--config', type=str, help='Lists the configuration file', default='configs/config-middle.json') parser.add_argument( "--transport_rx", help= "Specifies the name of the transport section that is used to configure the reader", default="transport_rx") parser.add_argument( "--transport_tx", help= "Specifies the name of the transport section that is used to configure the writer", default="transport_tx") args = parser.parse_args() with open(args.config, "r") as df: cfg = json.load(df) timeout = 5 # The middleman uses both a reader and a writer. Each is configured with using # their respective section of the config file. Therefore some keys are duplicated, # such as channel_range. Make sure that these items are the same in both sections with open("configs/logger.yaml", "r") as f: log_cfg = yaml.safe_load(f.read()) logging.config.dictConfig(log_cfg) logger = logging.getLogger('middleman') # Create ADIOS reader object reader = reader_gen(cfg[args.transport_rx], gen_channel_name(cfg["diagnostic"])) reader.Open() stream_attrs = None stream_varname = gen_var_name(cfg)[rank] logger.info(f"Main: Stream varname: {stream_varname}") dq = queue.Queue() msg = None worker = threading.Thread(target=forward, args=(dq, cfg, args, timeout)) worker.start() rx_list = [] stream_data = None while True: stepStatus = reader.BeginStep() logger.info( f"Main: stepStatus = {stepStatus}, currentStep = {reader.CurrentStep()}" ) if stepStatus: # Read data stream_data = reader.Get(stream_varname, save=False) if stream_attrs is None: if reader.InquireAttribute("stream_attrs"): stream_attrs = reader.get_attrs("stream_attrs") stream_data = reader.Get(stream_varname, save=False) rx_list.append(reader.CurrentStep()) # Generate message id and publish is msg = AdiosMessage(tstep_idx=reader.CurrentStep(), data=stream_data, attrs=stream_attrs) dq.put_nowait(msg) logger.info(f"Main: Published message {msg}") reader.EndStep() else: logger.info(f"Main: Exiting: StepStatus={stepStatus}") break # last_step = reader.CurrentStep() logger.info("Main: Exiting main loop") worker.join() logger.info("Main: Workers have joined") dq.join() logger.info("Main: Queue joined") logger.info("Main: Finished")
{ "IPAddress": "128.55.205.18", "Timeout": "120", "Port": "50001", "TransportMode": "reliable" } } # KSTAR DTN ip: 203.230.120.125 channel_name = gen_channel_name(2408, rank) if rank == 0: logging.info("==================I am test_reader_multichannel===================") logging.info(f"rank {rank:d} / size {size:d}. Channel_name = {channel_name}") r = reader_gen(cfg_transport, channel_name) r.Open() stats = stream_stats() tstep = 0 while True: stepStatus = r.BeginStep(timeoutSeconds=5.0) if stepStatus: tic = time.time() stream_data = r.Get("dummy", save=False) tstep_data = r.Get("tstep", save=False) logging.info(f"rank {rank:d}, tstep = {tstep_data[0,0]}, mean = {stream_data.mean()}") r.EndStep() toc = time.time() rx_bytes = stream_data.size * 8 stats.add_transfer(rx_bytes, toc-tic, tstep_data[0, 0])