Ejemplo n.º 1
0
class SmdReaderManager(object):
    def __init__(self, smd_fds, dsparms, configs=None):
        self.n_files = len(smd_fds)
        self.dsparms = dsparms
        self.configs = configs
        assert self.n_files > 0

        # Sets no. of events Smd0 sends to each EventBuilder core. This gets
        # overridden by max_events set by DataSource if max_events is smaller.
        self.smd0_n_events = int(os.environ.get('PS_SMD_N_EVENTS', 1000))
        if self.dsparms.max_events:
            if self.dsparms.max_events < self.smd0_n_events:
                self.smd0_n_events = self.dsparms.max_events

        # Sets the memory size for smalldata buffer for each stream file.
        self.chunksize = int(os.environ.get('PS_SMD_CHUNKSIZE', 0x1000000))

        self.smdr = SmdReader(smd_fds, self.chunksize,
                              self.dsparms.max_retries)
        self.processed_events = 0
        self.got_events = -1
        self._run = None

        # Collecting Smd0 performance using prometheus
        self.c_read = self.dsparms.prom_man.get_metric('psana_smd0_read')

    def _get(self):
        st = time.monotonic()
        self.smdr.get(self.dsparms.smd_inprogress_converted)
        en = time.monotonic()
        logger.debug(
            f'read {self.smdr.got/1e6:.3f} MB took {en-st}s. rate: {self.smdr.got/(1e6*(en-st))} MB/s'
        )
        self.c_read.labels('MB', 'None').inc(self.smdr.got / 1e6)
        self.c_read.labels('seconds', 'None').inc(en - st)

        if self.smdr.chunk_overflown > 0:
            msg = f"SmdReader found dgram ({self.smdr.chunk_overflown} MB) larger than chunksize ({self.chunksize/1e6} MB)"
            raise ValueError(msg)

    def get_next_dgrams(self):
        """ Returns list of dgrams as appeared in the current offset of the smd chunks.

        Currently used to retrieve Configure and BeginRun. This allows read with wait
        for these two types of dgram.
        """
        if self.dsparms.max_events > 0 and \
                self.processed_events >= self.dsparms.max_events:
            logger.debug(f'max_events={self.dsparms.max_events} reached')
            return None

        dgrams = None
        if not self.smdr.is_complete():
            self._get()

        if self.smdr.is_complete():
            # Get chunks with only one dgram each. There's no need to set
            # integrating stream id here since Configure and BeginRun
            # must exist in this stream too.
            self.smdr.view(batch_size=1)

            # For configs, we need to copy data from smdreader's buffers
            # This prevents it from getting overwritten by other dgrams.
            bytearray_bufs = [
                bytearray(self.smdr.show(i)) for i in range(self.n_files)
            ]

            if self.configs is None:
                dgrams = [
                    dgram.Dgram(view=ba_buf, offset=0)
                    for ba_buf in bytearray_bufs
                ]
                self.configs = dgrams
                self.smdr.set_configs(self.configs)
            else:
                dgrams = [
                    dgram.Dgram(view=ba_buf, config=config, offset=0)
                    for ba_buf, config in zip(bytearray_bufs, self.configs)
                ]
        return dgrams

    def __iter__(self):
        return self

    def __next__(self):
        """
        Returns a batch of events as an iterator object.
        This is used by non-parallel run. Parallel run uses chunks
        generator that yields chunks of raw smd data and steps (no
        event building). 
        
        The iterator stops reading under two conditions. Either there's
        no more data or max_events reached.
        """
        intg_stream_id = self.dsparms.intg_stream_id

        if self.dsparms.max_events and self.processed_events >= self.dsparms.max_events:
            raise StopIteration

        if not self.smdr.is_complete():
            self._get()
            if not self.smdr.is_complete():
                raise StopIteration

        self.smdr.view(batch_size=self.smd0_n_events,
                       intg_stream_id=intg_stream_id)
        mmrv_bufs = [self.smdr.show(i) for i in range(self.n_files)]
        batch_iter = BatchIterator(mmrv_bufs,
                                   self.configs,
                                   self._run,
                                   batch_size=self.dsparms.batch_size,
                                   filter_fn=self.dsparms.filter,
                                   destination=self.dsparms.destination,
                                   timestamps=self.dsparms.timestamps,
                                   intg_stream_id=intg_stream_id)
        self.got_events = self.smdr.view_size
        self.processed_events += self.got_events
        return batch_iter

    def chunks(self):
        """ Generates a tuple of smd and step dgrams """
        is_done = False
        d_view, d_read = 0, 0
        cn_chunks = 0
        while not is_done:
            logger.debug(f'SMD0 1. STARTCHUNK {time.monotonic()}')
            st_view, en_view, st_read, en_read = 0, 0, 0, 0

            l1_size = 0
            tr_size = 0
            got_events = 0
            if self.smdr.is_complete():

                st_view = time.monotonic()

                # Gets the next batch of already read-in data.
                self.smdr.view(batch_size=self.smd0_n_events,
                               intg_stream_id=self.dsparms.intg_stream_id)
                self.got_events = self.smdr.view_size
                got_events = self.got_events
                self.processed_events += self.got_events

                # sending data to prometheus
                logger.debug('got %d events' % (self.got_events))

                if self.dsparms.max_events and self.processed_events >= self.dsparms.max_events:
                    logger.debug(
                        f'max_events={self.dsparms.max_events} reached')
                    is_done = True

                en_view = time.monotonic()
                d_view += en_view - st_view
                logger.debug(f'SMD0 2. DONECREATEVIEW {time.monotonic()}')

                if self.got_events:
                    cn_chunks += 1
                    yield cn_chunks

            else:  # if self.smdr.is_complete()
                st_read = time.monotonic()
                self._get()
                en_read = time.monotonic()
                logger.debug(f'SMD0 3. DONEREAD {time.monotonic()}')
                d_read += en_read - st_read
                if not self.smdr.is_complete():
                    is_done = True
                    break

    @property
    def min_ts(self):
        return self.smdr.min_ts

    @property
    def max_ts(self):
        return self.smdr.max_ts

    def set_run(self, run):
        self._run = run

    def get_run(self):
        return self._run
Ejemplo n.º 2
0
class SmdReaderManager(object):
    def __init__(self, smd_fds, dsparms, configs=None):
        self.n_files = len(smd_fds)
        self.dsparms = dsparms
        self.configs = configs
        assert self.n_files > 0

        self.smd0_n_events = int(os.environ.get('PS_SMD_N_EVENTS', 1000))
        if self.dsparms.max_events:
            if self.dsparms.max_events < self.smd0_n_events:
                self.smd0_n_events = self.dsparms.max_events

        self.chunksize = int(os.environ.get('PS_SMD_CHUNKSIZE', 0x1000000))
        self.smdr = SmdReader(smd_fds, self.chunksize,
                              self.dsparms.max_retries)
        self.processed_events = 0
        self.got_events = -1
        self._run = None

        # Collecting Smd0 performance using prometheus
        self.c_read = self.dsparms.prom_man.get_metric('psana_smd0_read')

    def _get(self):
        st = time.time()
        self.smdr.get()
        en = time.time()
        logger.debug(
            f'smdreader_manager: read {self.smdr.got/1e6:.5f} MB took {en-st}s. rate: {self.smdr.got/(1e6*(en-st))} MB/s'
        )
        self.c_read.labels('MB', 'None').inc(self.smdr.got / 1e6)
        self.c_read.labels('seconds', 'None').inc(en - st)

        if self.smdr.chunk_overflown > 0:
            msg = f"SmdReader found dgram ({self.smdr.chunk_overflown} MB) larger than chunksize ({self.chunksize/1e6} MB)"
            raise ValueError(msg)

    def get_next_dgrams(self):
        if self.dsparms.max_events > 0 and \
                self.processed_events >= self.dsparms.max_events:
            logger.debug(
                f'smdreader_manager: get_next_dgrams max_events={self.dsparms.max_events} reached'
            )
            return None

        dgrams = None
        if not self.smdr.is_complete():
            self._get()

        if self.smdr.is_complete():
            mmrv_bufs, _ = self.smdr.view(batch_size=1)

            # For configs, we need to copy data from smdreader's buffers
            # This prevents it from getting overwritten by other dgrams.
            bytearray_bufs = [bytearray(mmrv_buf) for mmrv_buf in mmrv_bufs]

            if self.configs is None:
                dgrams = [
                    dgram.Dgram(view=ba_buf, offset=0)
                    for ba_buf in bytearray_bufs
                ]
                self.configs = dgrams
            else:
                dgrams = [
                    dgram.Dgram(view=ba_buf, config=config, offset=0)
                    for ba_buf, config in zip(bytearray_bufs, self.configs)
                ]
        return dgrams

    def __iter__(self):
        return self

    def __next__(self):
        """
        Returns a batch of events as an iterator object.
        This is used by non-parallel run. Parallel run uses chunks
        generator that yields chunks of raw smd data and steps (no
        event building). 
        
        The iterator stops reading under two conditions. Either there's
        no more data or max_events reached.
        """
        if self.dsparms.max_events and self.processed_events >= self.dsparms.max_events:
            raise StopIteration

        if not self.smdr.is_complete():
            self._get()
            if not self.smdr.is_complete():
                raise StopIteration

        mmrv_bufs, _ = self.smdr.view(batch_size=self.smd0_n_events)
        batch_iter = BatchIterator(mmrv_bufs,
                                   self.configs,
                                   self._run,
                                   batch_size=self.dsparms.batch_size,
                                   filter_fn=self.dsparms.filter,
                                   destination=self.dsparms.destination)
        self.got_events = self.smdr.view_size
        self.processed_events += self.got_events

        return batch_iter

    def chunks(self):
        """ Generates a tuple of smd and step dgrams """
        is_done = False
        while not is_done:
            if self.smdr.is_complete():
                mmrv_bufs, mmrv_step_bufs = self.smdr.view(
                    batch_size=self.smd0_n_events)
                self.got_events = self.smdr.view_size
                self.processed_events += self.got_events

                # sending data to prometheus
                logger.debug('smdreader_manager: smd0 got %d events' %
                             (self.got_events))

                if self.dsparms.max_events and self.processed_events >= self.dsparms.max_events:
                    logger.debug(
                        f'smdreader_manager: max_events={self.dsparms.max_events} reached'
                    )
                    is_done = True

                smd_view = bytearray()
                smd_pf = PacketFooter(n_packets=self.n_files)
                step_view = bytearray()
                step_pf = PacketFooter(n_packets=self.n_files)

                for i, (mmrv_buf, mmrv_step_buf) in enumerate(
                        zip(mmrv_bufs, mmrv_step_bufs)):
                    if mmrv_buf != 0:
                        smd_view.extend(mmrv_buf)
                        smd_pf.set_size(i, memoryview(mmrv_buf).nbytes)

                    if mmrv_step_buf != 0:
                        step_view.extend(mmrv_step_buf)
                        step_pf.set_size(i, memoryview(mmrv_step_buf).nbytes)

                if smd_view or step_view:
                    if smd_view:
                        smd_view.extend(smd_pf.footer)
                    if step_view:
                        step_view.extend(step_pf.footer)
                    yield (smd_view, step_view)

            else:  # if self.smdr.is_complete()
                self._get()
                if not self.smdr.is_complete():
                    is_done = True
                    break

    @property
    def min_ts(self):
        return self.smdr.min_ts

    @property
    def max_ts(self):
        return self.smdr.max_ts

    def set_run(self, run):
        self._run = run

    def get_run(self):
        return self._run
Ejemplo n.º 3
0
class SmdReaderManager(object):
    def __init__(self, run):
        self.n_files = len(run.smd_fds)
        assert self.n_files > 0
        self.run = run
        
        self.batch_size = int(os.environ.get('PS_SMD_N_EVENTS', 1000))
        if self.run.max_events:
            if self.run.max_events < self.batch_size:
                self.batch_size = self.run.max_events
        
        self.chunksize = int(os.environ.get('PS_SMD_CHUNKSIZE', 0x100000))
        self.smdr = SmdReader(run.smd_fds, self.chunksize)
        self.processed_events = 0
        self.got_events = -1
        
        # Collecting Smd0 performance using prometheus
        if self.run.prom_man:
            self.c_read = self.run.prom_man.get_counter('psana_smd0_read')

    def get_next_dgrams(self, configs=None):
        dgrams = None
        if not self.smdr.is_complete():
            self.smdr.get()
         
        if self.smdr.is_complete():
            mmrv_bufs, _ = self.smdr.view(batch_size=1)

            # For configs, we need to copy data from smdreader's buffers
            # This prevents it from getting overwritten by other dgrams.
            bytearray_bufs = [bytearray(mmrv_buf) for mmrv_buf in mmrv_bufs]
            
            if configs is None:
                dgrams = [dgram.Dgram(view=ba_buf, offset=0) for ba_buf in bytearray_bufs]
            else:
                dgrams = [dgram.Dgram(view=ba_buf, config=config, offset=0) for ba_buf, config in zip(bytearray_bufs, configs)]
        return dgrams


    def __iter__(self):
        return self


    def __next__(self):
        """
        Returns a batch of events as an iterator object.
        This is used by non-parallel run. Parallel run uses chunks
        generator that yields chunks of raw smd data and steps (no
        event building). 
        
        The iterator stops reading under two conditions. Either there's
        no more data or max_events reached.
        """
        if self.run.max_events and self.processed_events >= self.run.max_events:
            raise StopIteration
        
        if not self.smdr.is_complete():
            self.smdr.get()
            if self.run.prom_man:
                self.c_read.labels('MB', 'None').inc(self.smdr.got/1e6)
            if not self.smdr.is_complete():
                raise StopIteration
        
        mmrv_bufs, _ = self.smdr.view(batch_size=self.batch_size)
        batch_iter = BatchIterator(mmrv_bufs, batch_size=self.run.batch_size, \
                filter_fn=self.run.filter_callback, destination=self.run.destination)
        self.got_events = self.smdr.view_size
        self.processed_events += self.got_events

        # sending data to prometheus
        if self.run.prom_man:
            self.c_read.labels('evts', 'None').inc(self.got_events)
            self.c_read.labels('batches', 'None').inc()

        return batch_iter
        

    def chunks(self):
        """ Generates a tuple of smd and step dgrams """
        is_done = False
        while not is_done:
            if self.smdr.is_complete():
                mmrv_bufs, mmrv_step_bufs = self.smdr.view(batch_size=self.batch_size)
                self.got_events = self.smdr.view_size
                self.processed_events += self.got_events
                
                # sending data to prometheus
                if self.run.prom_man:
                    logging.debug('Smd0 got %d events'%(self.got_events))
                    self.c_read.labels('evts', 'None').inc(self.got_events)
                    self.c_read.labels('batches', 'None').inc()

                if self.run.max_events and self.processed_events >= self.run.max_events:
                    is_done = True
                
                smd_view = bytearray()
                smd_pf = PacketFooter(n_packets=self.n_files)
                step_view = bytearray()
                step_pf = PacketFooter(n_packets=self.n_files)
                
                for i, (mmrv_buf, mmrv_step_buf) in enumerate(zip(mmrv_bufs, mmrv_step_bufs)):
                    if mmrv_buf != 0:
                        smd_view.extend(mmrv_buf)
                        smd_pf.set_size(i, memoryview(mmrv_buf).nbytes)
                    
                    if mmrv_step_buf != 0:
                        step_view.extend(mmrv_step_buf)
                        step_pf.set_size(i, memoryview(mmrv_step_buf).nbytes)

                if smd_view or step_view:
                    if smd_view:
                        smd_view.extend(smd_pf.footer)
                    if step_view:
                        step_view.extend(step_pf.footer)
                    yield (smd_view, step_view)

            else:
                self.smdr.get()
                if self.run.prom_man:
                    logging.debug('Smd0 read %.2f MB'%(self.smdr.got/1e6))
                    self.c_read.labels('MB', 'None').inc(self.smdr.got/1e6)
                if not self.smdr.is_complete():
                    is_done = True
                    break
        

    @property
    def min_ts(self):
        return self.smdr.min_ts


    @property
    def max_ts(self):
        return self.smdr.max_ts