def _find_frames_datafind(obs, frametype, start, end, **kwargs): kwargs.setdefault('urltype', 'file') cache = list( map( path_from_file_url, gwdatafind.find_urls(obs[0], frametype, start, end, **kwargs), )) # use latest frame to find more recent frames that aren't in # datafind yet, this is quite hacky, and isn't guaranteed to # work at any point, but it shouldn't break anything try: latest = cache[-1] except IndexError: # no frames, `cache` is list() latestgps = start else: cache.extend(_find_more_files(latest)) latestgps = file_segment(cache[-1])[1] # if we're searching for aggregated h(t), find more files # for the equivalent short h(t) type: if frametype in AGGREGATED_HOFT and latestgps < end: cache.extend( _find_frames_datafind(obs, AGGREGATED_HOFT[frametype], latestgps, end, **kwargs)) return cache
def _find_frames_datafind(obs, frametype, start, end, **kwargs): kwargs.setdefault('urltype', 'file') cache = list(map( path_from_file_url, gwdatafind.find_urls(obs[0], frametype, start, end, **kwargs), )) # use latest frame to find more recent frames that aren't in # datafind yet, this is quite hacky, and isn't guaranteed to # work at any point, but it shouldn't break anything try: latest = cache[-1] except IndexError: # no frames, `cache` is list() latestgps = start else: cache.extend(_find_more_files(latest)) latestgps = file_segment(cache[-1])[1] # if we're searching for aggregated h(t), find more files # for the equivalent short h(t) type: if frametype in AGGREGATED_HOFT and latestgps < end: cache.extend(_find_frames_datafind( obs, AGGREGATED_HOFT[frametype], latestgps, end, **kwargs )) return cache
def find_frames(obs, frametype, start, end, on_gaps='warn', **kwargs): """Find all frames for the given frametype in the GPS interval Parameters ---------- obs : `str` the initial for the observatory frametype : `str` the name of the frame type for which to search start : `int` the GPS start time of this search end : `int` the GPS end time of this search **kwargs all other keyword arguments are passed directly to :func:`~gwdatafind.find_urls` Returns ------- paths : `list` of `str` a list of GWF file pths """ ll_kw = { key: kwargs.pop(key) for key in ( 'tmpdir', 'root', ) if key in kwargs } cache = _find_frames_datafind(obs, frametype, start, end, on_gaps='ignore', **kwargs) # find more files for low-latency under /dev/shm (or similar) if re_ll.search(frametype): try: latest = file_segment(cache[-1])[1] except IndexError: latest = start if latest < end: cache.extend(find_ll_frames(obs, frametype, latest, end, **ll_kw)) # handle missing files if on_gaps != 'ignore': seglist = SegmentList(map(file_segment, cache)).coalesce() missing = (SegmentList([Segment(start, end)]) - seglist).coalesce() msg = "Missing segments:\n%s" % '\n'.join(map(str, missing)) if missing and on_gaps == 'warn': warnings.warn(msg) elif missing: raise RuntimeError(msg) return cache
def find_ll_frames(ifo, frametype, start, end, root='/dev/shm', tmpdir=None): """Find all buffered low-latency frames in the given interval Parameters ---------- ifo : `str` the IFO prefix, e.g. 'L1' frametype : `str` the frame type identifier, e.g. 'llhoft' start : `int` the GPS start time of this search end : `int` the GPS end time of this search root : `str`, optional the base root for the buffer, defaults to `/dev/shm` on_gaps : `str`, optional what to do when the found frames don't cover the full span, one of 'warn', 'raise', or 'ignore' tmpdir : `str`, optional temporary directory into which to copy files from /dev/shm ..note:: Caller is reponsible for deleting the direcotyr and its contents when done with it. Returns ------- paths : `list` of `str` a list of GWF file pths .. warning:: This method is not safe, given that the frames may disappear from the buffer before you have had a chance to read them """ seg = Segment(start, end) cache = list( filter(lambda x: file_segment(x).intersects(seg), _find_ll_frames(ifo, frametype, root=root))) if tmpdir: out = [] if not os.path.isdir(tmpdir): os.makedirs(tmpdir) for path in cache: new = os.path.join(tmpdir, os.path.basename(path)) shutil.copyfile(path, new) out.append(new) return out return cache
def find_ll_frames(ifo, frametype, start, end, root='/dev/shm', tmpdir=None): """Find all buffered low-latency frames in the given interval Parameters ---------- ifo : `str` the IFO prefix, e.g. 'L1' frametype : `str` the frame type identifier, e.g. 'llhoft' start : `int` the GPS start time of this search end : `int` the GPS end time of this search root : `str`, optional the base root for the buffer, defaults to `/dev/shm` on_gaps : `str`, optional what to do when the found frames don't cover the full span, one of 'warn', 'raise', or 'ignore' tmpdir : `str`, optional temporary directory into which to copy files from /dev/shm ..note:: Caller is reponsible for deleting the direcotyr and its contents when done with it. Returns ------- paths : `list` of `str` a list of GWF file pths .. warning:: This method is not safe, given that the frames may disappear from the buffer before you have had a chance to read them """ seg = Segment(start, end) cache = list(filter(lambda x: file_segment(x).intersects(seg), _find_ll_frames(ifo, frametype, root=root))) if tmpdir: out = [] if not os.path.isdir(tmpdir): os.makedirs(tmpdir) for path in cache: new = os.path.join(tmpdir, os.path.basename(path)) shutil.copyfile(path, new) out.append(new) return out return cache
def find_frames(obs, frametype, start, end, on_gaps='warn', **kwargs): """Find all frames for the given frametype in the GPS interval Parameters ---------- obs : `str` the initial for the observatory frametype : `str` the name of the frame type for which to search start : `int` the GPS start time of this search end : `int` the GPS end time of this search **kwargs all other keyword arguments are passed directly to :func:`~gwdatafind.find_urls` Returns ------- paths : `list` of `str` a list of GWF file pths """ ll_kw = {key: kwargs.pop(key) for key in ('tmpdir', 'root',) if key in kwargs} cache = _find_frames_datafind(obs, frametype, start, end, on_gaps='ignore', **kwargs) # find more files for low-latency under /dev/shm (or similar) if re_ll.search(frametype): try: latest = file_segment(cache[-1])[1] except IndexError: latest = start if latest < end: cache.extend(find_ll_frames(obs, frametype, latest, end, **ll_kw)) # handle missing files if on_gaps != 'ignore': seglist = SegmentList(map(file_segment, cache)).coalesce() missing = (SegmentList([Segment(start, end)]) - seglist).coalesce() msg = "Missing segments:\n%s" % '\n'.join(map(str, missing)) if missing and on_gaps == 'warn': warnings.warn(msg) elif missing: raise RuntimeError(msg) return cache
def _find_more_files(path): """Find more files similar to ``path`` by incrementing the GPS times This is mainly to find more files for a given (ifo, type) that aren't (yet) in the datafind server cache. Parameters ---------- path : `str` the path of a file to use as the base, must conform to LIGO-T050017 Returns ------- morepaths : `list` of `str` the list of all files found by walking forward in time """ # parse the GPS epoch from the path try: ngps = len( re_gwf_gps_epoch.search( os.path.dirname(path)).groupdict()['gpsepoch']) except AttributeError: # this failed, so we can't do anything return [] else: found = [] while True: s, e = file_segment(path) # replace start time with end time in filename new = path.replace('-{start}-'.format(start=s), '-{end}-'.format(end=e)) # and 5-digit GPS path in directory new = new.replace('{start5}/'.format(start5=str(s)[:ngps]), '{end5}/'.format(end5=str(e)[:ngps])) # if this file doesn't exist, the previous file is what we want if not os.path.isfile(new): return found # otherwise keep going found.append(new) path = new
def get_latest_data_gps(obs, frametype): """Get the end GPS time of the latest available frame file Parameters ---------- obs : `str` the initial for the observatory frametype : `str` the name of the frame type for which to search Returns ------- gpstime : `int` the GPS time marking the end of the latest frame """ try: latest = _find_latest_file(obs, frametype) except IndexError as e: e.args = ('No {0[0]}-{1} frames found'.format(obs, frametype), ) raise # return end time of file as indicated by filename return int(file_segment(latest)[1])
def _find_more_files(path): """Find more files similar to ``path`` by incrementing the GPS times This is mainly to find more files for a given (ifo, type) that aren't (yet) in the datafind server cache. Parameters ---------- path : `str` the path of a file to use as the base, must conform to LIGO-T050017 Returns ------- morepaths : `list` of `str` the list of all files found by walking forward in time """ # parse the GPS epoch from the path try: ngps = len(re_gwf_gps_epoch.search( os.path.dirname(path)).groupdict()['gpsepoch']) except AttributeError: # this failed, so we can't do anything return [] else: found = [] while True: s, e = file_segment(path) # replace start time with end time in filename new = path.replace('-{start}-'.format(start=s), '-{end}-'.format(end=e)) # and 5-digit GPS path in directory new = new.replace('{start5}/'.format(start5=str(s)[:ngps]), '{end5}/'.format(end5=str(e)[:ngps])) # if this file doesn't exist, the previous file is what we want if not os.path.isfile(new): return found # otherwise keep going found.append(new) path = new
def get_latest_data_gps(obs, frametype): """Get the end GPS time of the latest available frame file Parameters ---------- obs : `str` the initial for the observatory frametype : `str` the name of the frame type for which to search Returns ------- gpstime : `int` the GPS time marking the end of the latest frame """ try: latest = _find_latest_file(obs, frametype) except IndexError as e: e.args = ('No {0[0]}-{1} frames found'.format(obs, frametype),) raise # return end time of file as indicated by filename return int(file_segment(latest)[1])
skip_missing_dependency, TEST_GWF_FILE, ) from ...timeseries import TimeSeries # import optional dependencies lal_utils = pytest.importorskip("lal.utils") lalframe = pytest.importorskip("lalframe") gwpy_lalframe = pytest.importorskip("gwpy.timeseries.io.gwf.lalframe") # get URI to test against TEST_GWF_PATH = Path(TEST_GWF_FILE).absolute() TEST_GWF_URL = TEST_GWF_PATH.as_uri() # get epoch corresponding to this file TEST_GWF_SEGMENT = file_segment(TEST_GWF_FILE) # channels to read CHANNELS = ["H1:LDAS-STRAIN", "L1:LDAS-STRAIN", "V1:h_16384Hz"] @pytest.fixture def stream(): return lalframe.FrStreamOpen(str(TEST_GWF_PATH.parent), TEST_GWF_PATH.name) def _test_open_data_source(source): """This function actually performs the test """ stream = gwpy_lalframe.open_data_source(source) assert stream.epoch == TEST_GWF_SEGMENT[0]