Ejemplo n.º 1
0
    def process_file(self, filename, flush_after=True):
        """Process the file specified by ``filename`` using a G3IndexedReader.
        Each frame from the file is passed to self.Process, with the
        optional index_info argument set to a dictionary containing
        the filename and byte_offset of the frame.

        Internal data grouping will be somewhat cleaner if the
        multiple files from a single aggregator "session" are passed
        to this function in acquisition order.  In that case, call
        with flush_after=False.

        """
        reader = so3g.G3IndexedReader(filename)
        while True:
            info = {'filename': filename,
                    'byte_offset': reader.Tell()}
            frames = reader.Process(None)
            assert len(frames) <= 1
            if len(frames) == 0:
                break
            self(frames[0], info)
        # Calling flush() here protects us against the odd case that
        # we process files from a single session in non-consecutive
        # order.  In that case the start' and 'end' times will get
        # messed up because we can't tell the stream has been
        # re-initialized.
        if flush_after:
            self.flush()
Ejemplo n.º 2
0
    def load_status(self, time, show_pb=False):
        """
        Returns the status dict at specified unix timestamp.
        Loads all status frames between session start frame and specified time.

        Args:
            time (timestamp): Time at which you want the rogue status

        Returns:
            status (dict): Dictionary of rogue variables at specified time.
        """
        session = self.Session()
        session_start, = session.query(Frame.time).filter(
            Frame.type_name == 'Observation',
            Frame.time <= dt.datetime.fromtimestamp(time)).order_by(
                Frame.time.desc()).first()

        status_frames = session.query(Frame).filter(
            Frame.type_name == 'Wiring', Frame.time >= session_start,
            Frame.time <= dt.datetime.fromtimestamp(time)).order_by(Frame.time)

        status = {}
        cur_file = None
        for frame_info in tqdm(status_frames.all(), disable=(not show_pb)):
            file = frame_info.file.path
            if file != cur_file:
                reader = so3g.G3IndexedReader(file)
                cur_file = file
            reader.Seek(frame_info.offset)
            frame = reader.Process(None)[0]
            status.update(yaml.safe_load(frame['status']))

        return SmurfStatus(status)
Ejemplo n.º 3
0
    def add_file(self, path, session):
        """
        Indexes a single file and adds it to the sqlite database.

        Args
        ----
            path (path): Path of the file to index
        """

        frame_types = {
            ft.type_name: ft
            for ft in session.query(FrameType).all()
        }

        db_file = Files(path=path)
        session.add(db_file)

        reader = so3g.G3IndexedReader(path)

        total_channels = 0
        file_start, file_stop = None, None
        frame_idx = 0
        while True:
            db_frame = Frame(frame_idx=frame_idx, file=db_file)
            db_frame.offset = reader.Tell()

            frames = reader.Process(None)
            if not frames:
                break
            frame = frames[0]
            frame_idx += 1

            if str(frame.type) not in type_key:
                continue

            db_frame.frame_type = frame_types[str(frame.type)]

            timestamp = frame['time'].time / core.G3Units.s
            db_frame.time = dt.datetime.fromtimestamp(timestamp)

            data = frame.get('data')
            if data is not None:
                db_frame.samples = data.n_samples
                db_frame.channels = len(data)
                db_frame.start = dt.datetime.fromtimestamp(data.start.time /
                                                           core.G3Units.s)
                db_frame.stop = dt.datetime.fromtimestamp(data.stop.time /
                                                          core.G3Units.s)

                if file_start is None:
                    file_start = db_frame.start
                file_stop = db_frame.stop
                total_channels = max(total_channels, db_frame.channels)

            session.add(db_frame)

        db_file.start = file_start
        db_file.stop = file_stop
        db_file.channels = total_channels
        db_file.frames = frame_idx
Ejemplo n.º 4
0
def unpack_frames(filename, field_request, streams):
    """Read frames from the specified file and expand the data by stream.
    Only the requested fields, specified through *_fields arguments,
    are expanded.

    Arguments:
      filename (str): Full path to the file to load.
      field_request: Instructions for what fields to load.
      streams: Structure to which to append the
        streams from this file (perhaps obtained from running
        unpack_frames on a preceding file).

    Returns:
      streams (structure containing lists of numpy arrays).

    """
    if streams is None:
        streams = field_request.empty()

    reader = so3g.G3IndexedReader(filename)
    while True:
        frames = reader.Process(None)
        if len(frames) == 0:
            break
        frame = frames[0]
        if frame.type == g3core.G3FrameType.Scan:
            unpack_frame_object(frame, field_request, streams)
    return streams
Ejemplo n.º 5
0
def unpack_frames(filename, field_request, streams, samples=None):
    """Read frames from the specified file and expand the data by stream.
    Only the requested fields, specified through *_fields arguments,
    are expanded.

    Arguments:
      filename (str): Full path to the file to load.
      field_request: Instructions for what fields to load.
      streams: Structure to which to append the
        streams from this file (perhaps obtained from running
        unpack_frames on a preceding file).
      samples (int, int): Start and end of sample range to unpack
        *from this file*.  First argument must be non-negative.  Second
        argument may be None, indicating to read forever.

    Returns:
      streams (structure containing lists of numpy arrays).

    """
    if streams is None:
        streams = field_request.empty()
    if samples is None:
        offset = 0
        to_read = None
    else:
        offset, to_read = samples
        if to_read is not None:
            to_read -= offset

    reader = so3g.G3IndexedReader(filename)
    while to_read is None or to_read > 0:
        frames = reader.Process(None)
        if len(frames) == 0:
            break
        frame = frames[0]
        if frame.type != g3core.G3FrameType.Scan:
            continue
        _consumed = unpack_frame_object(frame,
                                        field_request,
                                        streams,
                                        offset=offset,
                                        max_count=to_read)
        offset -= _consumed
        if offset < 0:
            if to_read is not None:
                to_read += offset
            offset = 0

    return streams
Ejemplo n.º 6
0
    def test_50_compression(self):
        test_file = 'test_g3super.g3'

        # Entropy?
        sigma_bits = 8
        sigma = 2**sigma_bits
        _get_ts = lambda dtype: self._get_ts(
            100, 10000, sigma=sigma, dtype=dtype, seed=12345)

        w = core.G3Writer(test_file)

        sizes = {d: [] for d in ALL_DTYPES}
        for dtype in ALL_DTYPES:
            # No compression
            f = core.G3Frame()
            ts = _get_ts(dtype)
            sizes[dtype].append(ts.data.nbytes)
            ts.options(enable=0)
            f['ts_%s' % dtype] = ts
            w.Process(f)

            # Yes compression
            f = core.G3Frame()
            ts = _get_ts(dtype)
            f['ts_%s' % dtype] = ts
            w.Process(f)
        del w

        # Readback
        r = so3g.G3IndexedReader(test_file)
        last = 0
        for dtype in ALL_DTYPES:
            for i in range(2):
                r.Process(None)[0]
                here = r.Tell()
                sizes[dtype].append(here - last)
                last = here

        # Process the results...
        for dtype in ALL_DTYPES:
            err_msg = f'Failed for dtype={dtype}'
            n, s_uncomp, s_comp = sizes[dtype]
            comp_ratio = 1. - (s_uncomp - s_comp) / n
            # But really what matters is the bits-per-word, compressed.
            bits_per_word = comp_ratio * 8 * np.dtype(dtype).itemsize
            #print(dtype, bits_per_word / sigma_bits)
            # I think the theoretical limit is 1.3 or so...
            self.assertLess(bits_per_word, sigma_bits * 1.4, err_msg)
Ejemplo n.º 7
0
    def test_seek(self):
        """Test the Seek/Tell functionality of the G3IndexedReader. We read the
        first four frames, recording the position of the only Wiring frame in the file
        with Tell(). Then we Seek to that location and start reading again, expecting
        the first frame after Seek() to be the wiring frame.

        """
        print("Testing Seek/Tell in G3IndexedReader")
        r = so3g.G3IndexedReader(self._file)
        # Limit the number of Process calls, if we hit the end of the file,
        # then Seek won't work...
        for i in range(4):
            pos = r.Tell()
            f = r.Process(None)[0]
            print("  " + str(f.type))
            if f.type == core.G3FrameType.Wiring:
                w_pos = pos
                print('  Saved wiring frame position: {}'.format(w_pos))

        r.Seek(w_pos)

        # Now that we've seeked, our next frame should be Wiring
        assert r.Process(None)[0].type == core.G3FrameType.Wiring
Ejemplo n.º 8
0
    def get_data(self, field=None, start=None, end=None, min_stride=None,
                 raw=False, short_match=False):
        """Load data from specified field(s) between specified times.

        Arguments ``field``, ``start``, ``end``, ``short_match`` are
        as described in _get_groups.

        Arguments:
            min_stride (float): Specifies the minimum sample spacing,
                in seconds.  Ignored in this implementation.
            raw (bool): If true, return G3 blocks instead of numpy
                arrays.

        Returns:
            Pair of dictionaries, (data, timelines).  The ``data``
            dictionary is a simple map from field name to a numpy
            array of readings.  The ``timelines`` dictionary is a map
            from field group name to a dictionary of timeline
            information, which has entries:

            - ``'t'``: numpy array of timestamps
            - ``'fields'``: list of fields belonging to this group.
            - ``'finalized_until'``: in cases where the data are still
              in flux, this field provides a timestamp that may be
              taken as the earliest time that needs to be requeried.
              This is part of the interface in order to support data
              streams that are being updated in real time.

            If user requested raw=True, then return value is a list
            of tuples of the form (group_name, block) where block is a
            single G3TimesampleMap carrying all the data for that
            co-sampled group.

        """
        grouped = self._get_groups(field, start, end, short_match=short_match)
        hk_logger.debug('get_data: _get_groups returns %i groups.' % len(grouped))

        # Pass through the metadata.  Collect information on what
        # field groups are present in what frames of what files; sort
        # that info by file and offset so we make a single monotonic
        # pass through the frames.
        group_info = {
            #  group_name: {'types': [dtype, ...],
            #               'fields': [(full_name, short_name), ...],
            #               'count': n},
            #  ...
        }
        files = {
            # filename: {
            #   offset: [(block_index, group_name, output_offset), ...]],
            #   ...
            #   },
            # ...
        }
        for group_name, fields, fgrps in grouped:
            # This is a group of co-sampled fields.  The fields share
            # a sample count and a frame-index map.
            all_frame_refs = []
            for fg in fgrps:
                all_frame_refs.extend(
                    [(b['timestamp'], b['count'], b['filename'], b['byte_offset'], b['block_index'])
                     for b in fg.index_info])
            all_frame_refs.sort()
            vector_offset = 0
            for _, n, filename, byte_offset, block_index in all_frame_refs:
                if not filename in files:
                    files[filename] = {}
                if byte_offset not in files[filename]:
                    files[filename][byte_offset] = []
                files[filename][byte_offset].append((block_index, group_name, vector_offset))
                vector_offset += n
            group_info[group_name] = {
                'count': vector_offset,
                'fields': [(f, f.split('.')[-1]) for f in fields],
                'types': [],
            }

        # Pass through the data.  Should read the files in order,
        # jumping monotonically through the needed frames.  The data
        # type of output arrays is determined from whatever
        # np.array(G3Object) returns on the first block.  Note strings
        # ('U') have to be handled differently because we can't know
        # the maximum string length from the first block.
        data = {}
        timelines = {}
        for filename, file_map in sorted(files.items()):
            hk_logger.info('get_data: reading %s' % filename)
            reader = so3g.G3IndexedReader(filename)
            for byte_offset, frame_info in sorted(file_map.items()):
                # Seek and decode.
                hk_logger.debug('get_data: seeking to %i for %i block extractions' %
                                (byte_offset, len(frame_info)))
                reader.Seek(byte_offset)
                frames = reader.Process(None)
                assert(len(frames) == 1)
                frames = self.translator(frames[0])
                frame = frames[0]
                # Now expand all blocks.
                for block_index, group_name, offset in frame_info:
                    block = frame['blocks'][block_index]
                    gi = group_info[group_name]
                    if raw:
                        # Short-circuit if in raw mode, just collect all blocks.
                        if group_name not in data:
                            data[group_name] = {}
                            for field, f_short in gi['fields']:
                                data[group_name] = []
                        data[group_name].append(block)
                        continue
                    if group_name not in timelines:
                        # This block is init that happens only once per group.
                        timelines[group_name] = {
                            't': np.zeros(gi['count']),
                            'fields': [f for f,s in gi['fields']],
                        }
                        hk_logger.debug('get_data: creating group "%s" with %i fields'
                                        % (group_name, len(gi['fields'])))
                        # Determine data type of each field and create output arrays.
                        for field, f_short in gi['fields']:
                            dtype = np.array(block[f_short]).dtype
                            gi['types'].append(dtype)
                            if dtype.char == 'U':
                                data[field] = []
                            else:
                                data[field] = np.empty(gi['count'], dtype)
                            hk_logger.debug('get_data: field "%s" has type %s' % (
                                f_short, dtype))
                    # Copy in block data.
                    n = len(block.times)
                    # Note this is in G3 time units for now... fixed later.
                    timelines[group_name]['t'][offset:offset+n] = [_t.time for _t in block.times]
                    for (field, f_short), dtype in zip(gi['fields'], gi['types']):
                        if dtype.char == 'U':
                            data[field].append((offset, list(map(str, block[f_short]))))
                        else:
                            # This is a relatively quick copy because
                            # of buffer pass-through from G3... don't
                            # hit the RHS with np.array!
                            data[field][offset:offset+n] = block[f_short]

        if raw:
            return [(group_name, _concat_hk_stream(data[group_name]))
                    for group_name, _, _ in grouped]

        # Finalize string fields.
        for group_name, fields, fgrps in grouped:
            gi = group_info[group_name]
            for (field, f_short), dtype in zip(gi['fields'], gi['types']):
                if dtype.char == 'U':
                    data[field] = np.array(list(itertools.chain(
                        *[x for i, x in sorted(data[field])])))
                    assert(len(data[field]) == gi['count'])

        # Scale out time units and mark last time.
        for timeline in timelines.values():
            timeline['t'] /= core.G3Units.seconds
            timeline['finalized_until'] = timeline['t'][-1]

        return (data, timelines)
Ejemplo n.º 9
0
    def load_data(self, start, end, show_pb=True, load_biases=True):
        """
        Loads smurf G3 data for a given time range. For the specified time range
        this will return a chunk of data that includes that time range.

        Args
        -----
            start (timestamp): start timestamp
            end   (timestamp): end timestamp
            show_pb (bool, optional): If True, will show progress bar.
            load_biases (bool, optional): If True, will return biases.

        Returns
        --------
            Returns a tuple ``SmurfData(times, data, primary, status, biases, timing_paradigm)``
            with the following fields:

                times (np.ndarray[samples]):
                    Array of unix timestamps for loaded data
                data (np.ndarray[channels, samples]):
                    Array of the squid phase in units of radians for each
                    channel with data in the specified time range. The index of
                    the array is the readout channel number.
                primary (Dict[np.ndarray]):
                    Dict of numpy arrays holding the "primary" data included in
                    the packet headers, including 'AveragingResetBits',
                    'Counter0', 'Counter1', 'Counter2', 'FluxRampIncrement',
                    'FluxRampOffset', 'FrameCounter', 'TESRelaySetting',
                    'UnixTime'
                status (SmurfStatus):
                    SmurfStatus object containing metadata info at the time of
                    the first Scan frame in the requested interval. If there
                    are no Scan frames in the interval, this will be None.
                biases (optional, np.ndarray[NTES, samples]):
                    An array containing the TES bias values.
                    If ``load_biases`` is False, this will be None.
                timing_paradigm(TimingParadigm):
                    Tells you the method used to extract timestamps from the
                    frame data.
        """
        session = self.Session()

        frames = session.query(Frame).filter(
            Frame.type_name == 'Scan',
            Frame.stop >= dt.datetime.fromtimestamp(start),
            Frame.start < dt.datetime.fromtimestamp(end)).order_by(Frame.time)
        session.close()

        samples, channels = 0, 0
        num_frames = 0
        for f in frames:
            num_frames += 1
            samples += f.samples
            channels = max(f.channels, channels)

        timestamps = np.full((samples, ), np.nan, dtype=np.float64)
        data = np.full((channels, samples), 0, dtype=np.int32)
        if load_biases:
            biases = np.full((num_bias_lines, samples), 0, dtype=np.int32)
        else:
            biases = None

        primary = {}

        cur_sample = 0
        cur_file = None
        timing_paradigm = None
        for frame_info in tqdm(frames, total=num_frames,
                               disable=(not show_pb)):
            file = frame_info.file.path
            if file != cur_file:
                reader = so3g.G3IndexedReader(file)
                cur_file = file

            reader.Seek(frame_info.offset)
            frame = reader.Process(None)[0]
            nsamp = frame['data'].n_samples

            key_order = [int(k[1:]) for k in frame['data'].keys()]
            data[key_order, cur_sample:cur_sample + nsamp] = frame['data']

            if load_biases:
                bias_order = [int(k[-2:]) for k in frame['tes_biases'].keys()]
                biases[bias_order,
                       cur_sample:cur_sample + nsamp] = frame['tes_biases']

            # Loads primary data
            if 'primary' in frame.keys():
                for k, v in frame['primary'].items():
                    if k not in primary:
                        primary[k] = np.zeros(samples, dtype=np.int64)
                    primary[k][cur_sample:cur_sample + nsamp] = v

            ts, paradigm = get_sample_timestamps(frame)
            if timing_paradigm is None:
                timing_paradigm = paradigm
            elif timing_paradigm != paradigm:
                timing_paradigm = TimingParadigm.Mixed

            timestamps[cur_sample:cur_sample + nsamp] = ts

            cur_sample += nsamp

        # Conversion from DAC counts to squid phase
        rad_per_count = np.pi / 2**15
        data = data * rad_per_count

        if len(timestamps) > 0:
            status = self.load_status(timestamps[0])
        else:
            status = None

        SmurfData = namedtuple(
            'SmurfData', 'times data primary status biases timing_paradigm')
        if load_biases:
            return SmurfData(timestamps, data, primary, status, biases,
                             timing_paradigm)
        else:
            return SmurfData(timestamps, data, primary, status, None,
                             timing_paradigm)
Ejemplo n.º 10
0
    def get_data(self,
                 field=None,
                 start=None,
                 end=None,
                 min_stride=None,
                 raw=False,
                 short_match=False):
        """Load data from specified field(s) between specified times.

        Arguments ``field``, ``start``, ``end``, ``short_match`` are
        as described in _get_groups.

        Returns:
            Pair of dictionaries, (data, timelines).  The ``data``
            dictionary is a simple map from field name to a numpy
            array of readings.  The ``timelines`` dictionary is a map
            from field group name to a dictionary of timeline
            information, which has entries:

            - ``'t'``: numpy array of timestamps
            - ``'fields'``: list of fields belonging to this group.
            - ``'finalized_until'``: in cases where the data are still
              in flux, this field provides a timestamp that may be
              taken as the earliest time that needs to be requeried.
              This is part of the interface in order to support data
              streams that are being updated in real time.

        """
        grouped = self._get_groups(field, start, end, short_match=short_match)
        handles = {}  # filename -> G3IndexedReader map.
        blocks_out = []
        for group_name, fields, fgrps in grouped:
            blocks_in = []
            for fg in fgrps:
                for r in fg.index_info:
                    fn, off = r['filename'], r['byte_offset']
                    if not fn in handles:
                        handles[fn] = so3g.G3IndexedReader(fn)
                    handles[fn].Seek(off)
                    fn = handles[fn].Process(None)
                    assert (len(fn) == 1)
                    # Find the right block.
                    for blk in fn[0]['blocks']:
                        test_f = fields[0].split('.')[-1]  ## dump prefix.
                        if test_f in blk.data.keys():
                            blocks_in.append(blk)
                            break
            # Sort those blocks by timestamp. (Otherwise they'll stay sorted by object id :)
            blocks_in.sort(key=lambda b: b.t[0])
            # Create a new Block for this group.
            blk = so3g.IrregBlockDouble()
            blk.t = np.hstack([b.t for b in blocks_in])
            for f in fields:
                blk.data[f] = np.hstack(
                    [b.data[f.split('.')[-1]] for b in blocks_in])
            blocks_out.append((group_name, blk))
        if raw:
            return blocks_out
        # Reformat for sisock.
        data = {}
        timelines = {}
        for group_name, block in blocks_out:
            timelines[group_name] = {
                't': np.array(block.t),
                'finalized_until': block.t[-1],
                'fields': list(block.data.keys()),
            }
            for k, v in block.data.items():
                data[k] = np.array(v)

        return (data, timelines)