Exemple #1
0
def forward(Q, cfg, args, timeout):
    """To be executed by a local thread. Pops items from the queue and forwards them."""
    global comm, rank
    logger = logging.getLogger("middleman")
    logger.info(
        f"Worker: Creating writer_gen: engine={cfg[args.transport_tx]['engine']}"
    )

    # suffix = ""  # if not args.debug else '-MM'
    ch_name = gen_channel_name(cfg["diagnostic"])
    writer = writer_gen(cfg[args.transport_tx], ch_name)
    logger.info(f"Worker: Streaming channel name = {ch_name}")

    tx_list = []
    is_first = True
    while True:
        # msg = None
        try:
            msg = Q.get(timeout=timeout)
            logger.info(
                f"Worker: Receiving from Queue: {msg} - {msg.data.shape}, {msg.data.dtype}"
            )
            if is_first:
                writer.DefineVariable(
                    gen_var_name(cfg)[rank], msg.data.shape, msg.data.dtype)
                #if msg.attrs is not None:
                writer.DefineAttributes("stream_attrs", msg.attrs)
                logger.info(
                    f"Worker: Defining stream_attrs for forwarded stream: {msg.attrs}"
                )
                writer.Open()
                logger.info("Worker: Starting forwarding process")
                is_first = False
        except queue.Empty:
            logger.info(
                "Worker: Empty queue after waiting until time-out. Exiting")
            break

        logger.info(
            f"Worker Forwarding chunk {msg.tstep_idx}. Data = {msg.data.shape}"
        )
        writer.BeginStep()
        writer.put_data(msg)
        writer.EndStep()
        logger.info(f"Worker: Done writing chunk {msg.tstep_idx}.")
        tx_list.append(msg.tstep_idx)

        Q.task_done()
        logger.info(f"Consumed tidx={msg.tstep_idx}")

    writer.Close()
    logger.info(
        f"Worker: Exiting send loop. Transmitted {len(tx_list)} time chunks: {tx_list}"
    )
    logger.info(writer.transfer_stats())
Exemple #2
0
def main():
    """Procesess a stream of data chunks on an executor."""
    comm = MPI.COMM_WORLD

    # Parse command line arguments and read configuration file
    parser = argparse.ArgumentParser(
        description="Receive data and dispatch analysis" +
        "tasks to a mpi queue")
    parser.add_argument('--config',
                        type=str,
                        help='Lists the configuration file',
                        default='configs/config_null.json')
    parser.add_argument(
        "--num_ranks_preprocess",
        type=int,
        help="Number of processes used in preprocessing executor",
        default=4)
    parser.add_argument("--num_ranks_analysis",
                        type=int,
                        help="Number of processes used in analysis executor",
                        default=4)
    parser.add_argument(
        "--num_queue_threads",
        type=int,
        help="Number of worker threads that consume item from the queue",
        default=4)
    parser.add_argument(
        "--transport",
        type=str,
        help="Specifies the transport section used to configure the reader",
        default="transport_rx")
    parser.add_argument(
        "--run_id",
        type=str,
        help="Name of database collection to store analysis results in",
        required=True)

    args = parser.parse_args()

    with open(args.config, "r") as df:
        cfg = json.load(df)
        df.close()

    # Load logger configuration from file:
    # http://zetcode.com/python/logging/
    with open("configs/logger.yaml", "r") as f:
        log_cfg = yaml.safe_load(f.read())
    logging.config.dictConfig(log_cfg)
    logger = logging.getLogger('simple')

    # PoolExecutor for pre-processing, on-node.
    executor_pre = ThreadPoolExecutor(max_workers=args.num_ranks_preprocess)
    # PoolExecutor for data analysis. off-node
    executor_anl = MPIPoolExecutor(max_workers=args.num_ranks_analysis)

    stream_varname = gen_var_name(cfg)[0]

    cfg["run_id"] = args.run_id
    cfg["storage"]["run_id"] = cfg["run_id"]
    logger.info(f"Starting run {cfg['run_id']}")

    # Instantiate a storage backend and store the run configuration and task configuration
    store_type = get_storage_object(cfg["storage"])
    store_backend = store_type(cfg["storage"])
    store_backend.store_one({"run_id": cfg['run_id'], "run_config": cfg})

    # TODO: (RMC)  Should this be moved to where cfg updated?
    # (would allow updating channels to process remotely)
    reader = reader_gen(cfg[args.transport],
                        gen_channel_name(cfg["diagnostic"]))
    reader.Open()

    dq = queue.Queue()

    # In a streaming setting, (SST, dataman) attributes can only be accessed after
    # reading the first time step of a variable.
    # Initialize stream_attrs with None and load it in the main loop below.
    stream_attrs = None

    data_model_gen = data_model_generator(cfg["diagnostic"])
    my_preprocessor = preprocessor(executor_pre, cfg)
    my_task_list = tasklist(executor_anl, cfg)

    worker_thread_list = []
    for _ in range(args.num_queue_threads):
        new_worker = threading.Thread(target=consume,
                                      args=(dq, my_task_list, my_preprocessor))
        new_worker.start()
        worker_thread_list.append(new_worker)

    logger.info("Starting main loop")
    tic_main = time.perf_counter()
    rx_list = []
    while True:
        stepStatus = reader.BeginStep(timeoutSeconds=5.0)
        if stepStatus:
            # Load attributes
            if stream_attrs is None:
                logger.info("Waiting for attributes")
                stream_attrs = reader.get_attrs("stream_attrs")
                logger.info(f"Got attributes: {stream_attrs}")
            # Read data
            stream_data = reader.Get(stream_varname, save=False)
            # if reader.CurrentStep() in [0, 140]:
            rx_list.append(reader.CurrentStep())

            # Create a datamodel instance from the raw data and push into the queue
            msg = data_model_gen.new_chunk(stream_data, stream_attrs,
                                           reader.CurrentStep())
            dq.put_nowait(msg)
            logger.info(f"Published tidx {reader.CurrentStep()}")
            reader.EndStep()
        else:
            logger.info(f"Exiting: StepStatus={stepStatus}")
            break

        if reader.CurrentStep() > 100:
            break

    dq.join()
    logger.info("Queue joined")

    logger.info("Exiting main loop")
    for thr in worker_thread_list:
        thr.join()

    logger.info("Workers have joined")

    # Shutdown the executioner
    executor_anl.shutdown(wait=True)
    executor_pre.shutdown(wait=True)
    # executor.shutdown(wait=True)

    toc_main = time.perf_counter()
    logger.info(
        f"Run {cfg['run_id']} finished in {(toc_main - tic_main):6.4f}s")
    logger.info(f"Processed {len(rx_list)} time_chunks: {rx_list}")
Exemple #3
0
def main():
    """Reads items from a ADIOS2 connection and forwards them."""
    global comm, rank, args
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    parser = argparse.ArgumentParser(description="Receive data and dispatch" +
                                     "analysis tasks to a mpi queue")
    parser.add_argument('--config',
                        type=str,
                        help='Lists the configuration file',
                        default='configs/config-middle.json')
    parser.add_argument(
        "--transport_rx",
        help=
        "Specifies the name of the transport section that is used to configure the reader",
        default="transport_rx")
    parser.add_argument(
        "--transport_tx",
        help=
        "Specifies the name of the transport section that is used to configure the writer",
        default="transport_tx")
    args = parser.parse_args()

    with open(args.config, "r") as df:
        cfg = json.load(df)
    timeout = 5

    # The middleman uses both a reader and a writer. Each is configured with using
    # their respective section of the config file. Therefore some keys are duplicated,
    # such as channel_range. Make sure that these items are the same in both sections

    with open("configs/logger.yaml", "r") as f:
        log_cfg = yaml.safe_load(f.read())
    logging.config.dictConfig(log_cfg)
    logger = logging.getLogger('middleman')

    # Create ADIOS reader object
    reader = reader_gen(cfg[args.transport_rx],
                        gen_channel_name(cfg["diagnostic"]))
    reader.Open()
    stream_attrs = None
    stream_varname = gen_var_name(cfg)[rank]
    logger.info(f"Main: Stream varname: {stream_varname}")

    dq = queue.Queue()
    msg = None
    worker = threading.Thread(target=forward, args=(dq, cfg, args, timeout))
    worker.start()

    rx_list = []
    stream_data = None
    while True:
        stepStatus = reader.BeginStep()
        logger.info(
            f"Main: stepStatus = {stepStatus}, currentStep = {reader.CurrentStep()}"
        )
        if stepStatus:
            # Read data
            stream_data = reader.Get(stream_varname, save=False)
            if stream_attrs is None:
                if reader.InquireAttribute("stream_attrs"):
                    stream_attrs = reader.get_attrs("stream_attrs")

            stream_data = reader.Get(stream_varname, save=False)
            rx_list.append(reader.CurrentStep())

            # Generate message id and publish is
            msg = AdiosMessage(tstep_idx=reader.CurrentStep(),
                               data=stream_data,
                               attrs=stream_attrs)
            dq.put_nowait(msg)
            logger.info(f"Main: Published message {msg}")
            reader.EndStep()
        else:
            logger.info(f"Main: Exiting: StepStatus={stepStatus}")
            break

        # last_step = reader.CurrentStep()

    logger.info("Main: Exiting main loop")
    worker.join()
    logger.info("Main: Workers have joined")
    dq.join()
    logger.info("Main: Queue joined")
    logger.info("Main: Finished")
Exemple #4
0
with open(args.config, "r") as df:
    cfg = json.load(df)

# Set up the logger
with open('configs/logger.yaml', 'r') as f:
    log_cfg = yaml.safe_load(f.read())
logging.config.dictConfig(log_cfg)
logger = logging.getLogger("generator")
logger.info("Starting up...")

# Instantiate a dataloader
dataloader = get_loader(cfg)
configname = "transport_tx" if not args.kstar else "transport_rx"
logger.info(f"Creating writer_gen: engine={cfg[configname]['engine']}")

writer = writer_gen(cfg[configname], gen_channel_name(cfg["diagnostic"]))
logger.info(f"Streaming channel name = {gen_channel_name(cfg['diagnostic'])}")
# Give the writer hints on what kind of data to transfer

writer.DefineVariable(
    gen_var_name(cfg)[rank], dataloader.get_chunk_shape(), dataloader.dtype)
# TODO: Clean up naming conventions for stream attributes
logger.info(f"Writing attributes: {dataloader.attrs}")

writer.Open()
writer.DefineAttributes("stream_attrs", dataloader.attrs)

logger.info("Start sending on channel:")
batch_gen = dataloader.batch_generator()
for nstep, chunk in enumerate(batch_gen):
    # TODO: Do we want to place filtering in the generator? This would allow us to
Exemple #5
0
with open(args.config, "r") as df:
    cfg = json.load(df)

# Set up the logger
with open('configs/logger.yaml', 'r') as f:
    log_cfg = yaml.safe_load(f.read())
logging.config.dictConfig(log_cfg)
logger = logging.getLogger("generator")
logger.info("Starting up...")

# Instantiate a dataloader
dataloader = get_loader(cfg)
sectionname = "transport_tx" if not args.kstar else "transport_rx"
logger.info(f"Creating writer_gen: engine={cfg[sectionname]['engine']}")

writer = writer_gen(cfg[sectionname], gen_channel_name(cfg["diagnostic"]))
logger.info(f"Streaming channel name = {gen_channel_name(cfg['diagnostic'])}")
# Give the writer hints on what kind of data to transfer

writer.DefineVariable(gen_var_name(cfg)[rank],
                      dataloader.get_chunk_shape(),
                      dataloader.dtype)
# TODO: Clean up naming conventions for stream attributes
logger.info(f"Writing attributes: {dataloader.attrs}")

writer.Open()
writer.DefineAttributes("stream_attrs", dataloader.attrs)

logger.info("Start sending on channel:")
batch_gen = dataloader.batch_generator()
for nstep, chunk in enumerate(batch_gen):