def open_gwf(filename, mode='r'): """Open a filename for reading or writing GWF format data Parameters ---------- filename : `str` the path to read from, or write to mode : `str`, optional either ``'r'`` (read) or ``'w'`` (write) Returns ------- `LDAStools.frameCPP.IFrameFStream` the input frame stream (if `mode='r'`), or `LDAStools.frameCPP.IFrameFStream` the output frame stream (if `mode='w'`) """ if mode not in ('r', 'w'): raise ValueError("mode must be either 'r' or 'w'") from LDAStools import frameCPP filename = file_path(filename) if mode == 'r': return frameCPP.IFrameFStream(str(filename)) return frameCPP.OFrameFStream(str(filename))
def table_from_gwf(filename, name, columns=None, selection=None): """Read a Table from FrEvent structures in a GWF file (or files) Parameters ---------- filename : `str` path of GWF file to read name : `str` name associated with the `FrEvent` structures columns : `list` of `str` list of column names to read selection : `str`, `list` of `str` one or more column selection strings to apply, e.g. ``'snr>6'`` """ from LDAStools import frameCPP # open frame file if isinstance(filename, FILE_LIKE): filename = filename.name stream = frameCPP.IFrameFStream(filename) # parse selections and map to column indices if selection is None: selection = [] selection = parse_column_filters(selection) # read events row by row data = [] i = 0 while True: try: frevent = stream.ReadFrEvent(i, name) except IndexError: break i += 1 # read first event to get column names (and map selection) if columns is None: columns = get_columns_from_frevent(frevent) # read row with filter row = row_from_frevent(frevent, columns=columns, selection=selection) if row is not None: # if passed selection data.append(row) return Table(rows=data, names=columns)
def _read_framefile(framefile, channels, start=None, end=None, ctype=None, series_class=TimeSeries): """Internal function to read data from a single frame. """ if not start: start = 0 if not end: end = 0 # open file stream = frameCPP.IFrameFStream(framefile) # get number of frames in file try: nframe = int(stream.GetNumberOfFrames()) except (AttributeError, ValueError): nframe = None # if single frame, trust filename to provide GPS epoch of data # as required by the file-naming convention epochs = None try: if nframe == 1: epochs = [file_segment(framefile)[0]] except ValueError: pass if epochs is None: toc = stream.GetTOC() epochs = [LIGOTimeGPS(s, n) for s, n in zip(toc.GTimeS, toc.GTimeN)] toclist = {} # only get names once for channel in channels: # if ctype not declared, find it from the table-of-contents if not ctype.get(channel, None): toc = stream.GetTOC() for typename in ['Sim', 'Proc', 'ADC']: if typename not in toclist: get_ = getattr(toc, 'Get%s' % typename) try: toclist[typename] = get_().keys() except AttributeError: toclist[typename] = get_() if str(channel) in toclist[typename]: ctype[channel] = typename.lower() break # if still not found, channel isn't in the frame if not ctype.get(channel, None): raise ValueError( "Channel %s not found in frame table of contents" % str(channel)) # find channels out = series_class.DictClass() for channel in channels: name = str(channel) read_ = getattr(stream, 'ReadFr%sData' % ctype[channel].title()) ts = None i = 0 while True: try: data = read_(i, name) except IndexError as e: if 'exceeds the range' in str(e): # no more frames break else: # some other problem (likely channel not present) raise offset = data.GetTimeOffset() datastart = epochs[i] + offset i += 1 # increment frame index before any 'continue' # check overlap with user-requested span if end and datastart >= end and nframe == 1: raise ValueError("Cannot read %s from FrVect in %s " "ending at %s" % (name, framefile, end)) elif end and datastart >= end: # don't need this frame continue try: dataend = datastart + data.GetTRange() except AttributeError: # not proc channel pass else: if datastart == dataend: # tRange not set # tRange is not required, so if it is 0, it may have been # omitted, rather than actually representing an empty # data set pass elif start and dataend < start: # don't need this frame continue for vect in data.data: # loop hopefully over single vector # only read FrVect with matching name (or no name set) # frame spec allows for arbitrary other FrVects # to hold other information if vect.GetName() and vect.GetName() != name: continue # decompress data arr = vect.GetDataArray() dim = vect.GetDim(0) dx = dim.dx sx = dim.startX if isinstance(arr, buffer): arr = numpy.frombuffer( arr, dtype=NUMPY_TYPE_FROM_FRVECT[vect.GetType()]) # crop to required subset dimstart = datastart + sx dimend = dimstart + arr.size * dx a = int(max(0., float(start - dimstart)) / dx) if end: b = arr.size - int(max(0., float(dimend - end)) / dx) else: b = None # if file only has ony frame, error on overlap problems if a >= arr.size and nframe == 1: # start too large raise ValueError("Cannot read %s from FrVect in %s " "starting at %s" % (name, framefile, start)) # otherwise just skip to the next frame if a >= arr.size: # skip frame continue if a or b: arr = arr[a:b] # cast as series or append if ts is None: # get unit unit = vect.GetUnitY() or None # create array - need require() to prevent segfault ts = numpy.require(series_class(arr, t0=dimstart + a * dx, dt=dx, name=name, channel=name, unit=unit, copy=False), requirements=['O']) # add information to channel ts.channel.sample_rate = ts.sample_rate.value ts.channel.unit = unit ts.channel.dtype = ts.dtype else: ts.append(arr) if ts is None: raise ValueError("Failed to read '%s' from file '%s'" % (str(channel), framefile)) else: out[channel] = ts return out
def _read_framefile(framefile, channels, start=None, end=None, ctype=None, series_class=TimeSeries): """Internal function to read data from a single frame. """ # construct span segment span = Segment(start is not None and start or 0, end is not None and end or 0) # open file if isinstance(framefile, CacheEntry): fp = framefile.path else: fp = framefile stream = frameCPP.IFrameFStream(fp) # get number of frames in file try: nframe = int(stream.GetNumberOfFrames()) except (AttributeError, ValueError): nframe = None # if single frame, trust filename to provide GPS epoch of data # as required by the file-naming convention epochs = None try: ce = CacheEntry.from_T050017(fp) except ValueError: pass else: if nframe == 1: epochs = [float(ce.segment[0])] if epochs is None: toc = stream.GetTOC() epochs = [LIGOTimeGPS(s, n) for s, n in zip(toc.GTimeS, toc.GTimeN)] toclist = {} # only get names once for channel in channels: # if ctype not declared, find it from the table-of-contents if not ctype.get(channel, None): toc = stream.GetTOC() for typename in ['Sim', 'Proc', 'ADC']: if not typename in toclist: get_ = getattr(toc, 'Get%s' % typename) try: toclist[typename] = get_().keys() except AttributeError: toclist[typename] = get_() if str(channel) in toclist[typename]: ctype[channel] = typename.lower() break # if still not found, channel isn't in the frame if not ctype.get(channel, None): raise ValueError("Channel %s not found in frame table of contents" % str(channel)) # find channels out = series_class.DictClass() for channel in channels: name = str(channel) read_ = getattr(stream, 'ReadFr%sData' % ctype[channel].title()) ts = None i = 0 while True: try: data = read_(i, name) except IndexError as e: if 'exceeds the range' in str(e): # no more frames break else: # some other problem (likely channel not present) raise offset = data.GetTimeOffset() datastart = epochs[i] + offset for vect in data.data: # loop hopefully over single vector # decompress data arr = vect.GetDataArray() dim = vect.GetDim(0) dx = dim.dx sx = dim.startX if isinstance(arr, buffer): arr = numpy.frombuffer( arr, dtype=NUMPY_TYPE_FROM_FRVECT[vect.GetType()]) # crop to required subset dimstart = datastart + sx dimend = dimstart + arr.size * dx a = int(max(0., float(span[0]-dimstart)) / dx) if span[1]: b = arr.size - int(max(0., float(dimend-span[1])) / dx) else: b = None if a >= arr.size or b is not None and b <= a: raise ValueError("Span %s not covered by FrVect for %r " "in %s" % (span, name, fp)) if a or b: arr = arr[a:b] # cast as series or append if ts is None: # get unit unit = vect.GetUnitY() or None # create array - need require() to prevent segfault ts = numpy.require( series_class(arr, t0=dimstart+a*dx, dt=dx, name=name, channel=channel, unit=unit, copy=False), requirements=['O']) # add information to channel ts.channel.sample_rate = ts.sample_rate.value ts.channel.unit = unit else: ts.append(arr) i += 1 if ts is None: raise ValueError("Channel '%s' not found in frame '%s'" % (str(channel), fp)) else: out[channel] = ts return out