def test_slice(self, epochs, epochs_expected, params): stream = Stream(network="GR", station="BFO", location="", channel="LHZ") stream_epoch = StreamEpoch(stream=stream, **epochs) expected = [ StreamEpoch(stream=stream, **epoch) for epoch in epochs_expected ] assert sorted(stream_epoch.slice(**params)) == expected
def reduce_to_extent(routes): grouped = group_routes_by(routes, key="network.station.location.channel") reduced = [] for group_key, routes in grouped.items(): urls = set() _stream = None ts = set() for r in routes: assert (len( r.stream_epochs) == 1), "granular routes required" urls.add(r.url) se_orig = r.stream_epochs[0] _stream = se_orig.stream with none_as_max(se_orig.endtime) as end: ts.add(se_orig.starttime) ts.add(end) with max_as_none(max(ts)) as end: se = StreamEpoch(_stream, starttime=min(ts), endtime=end) reduced.append(Route(url=r.url, stream_epochs=[se])) # do not allow distributed stream epochs; would require # on-the-fly de-/serialization if len(urls) != 1: raise ValueError("Distributed stream epochs not allowed.") return reduced
def make_stream_epoch(self, data, **kwargs): """ Factory method generating :py:class:`eidaws.utils.sncl.StreamEpoch` objects. """ if data["location"] == "--": data["location"] = "" return StreamEpoch.from_sncl(**data)
def _parse_stream_epochs( text, domains=None, excluded_domains=None ): # compute domains to be crawled _excluded_domains = None if excluded_domains: _excluded_domains = excluded_domains[:] if excluded_domains and domains: explicitly_included = [ d for d in excluded_domains if d in domains ] for d in explicitly_included: _excluded_domains.remove(d) stream_epochs = [] url = None skip_url = False for line in text.split("\n"): if not url: url = line.strip() if excluded_domains or domains: parsed = urlparse(url) if ( _excluded_domains and parsed.netloc in _excluded_domains or domains and parsed.netloc not in domains ): skip_url = True elif not line.strip(): url = None skip_url = False else: if skip_url: continue se = StreamEpoch.from_snclline(line) stream_epochs.append(se) return stream_epochs
async def _emerge_routes( self, text, post, default_endtime, ): """ Default implementation parsing the routing service's output stream and create fully demultiplexed routes. Note that routes with an exceeded per client retry-budget are dropped. """ def validate_stream_durations(stream_duration, total_stream_duration): if (self.max_stream_epoch_duration is not None and stream_duration > self.max_stream_epoch_duration) or ( self.max_total_stream_epoch_duration is not None and total_stream_duration > self.max_total_stream_epoch_duration): self.logger.debug("Exceeded configured limits: {}{}".format( "stream_duration=" f"{stream_duration.total_seconds()}s (configured=" f"{self.max_stream_epoch_duration.total_seconds()}s), " if self.max_stream_epoch_duration else "", "total_stream_duration: " f"{total_stream_duration.total_seconds()}s " "(configured=" f"{self.max_total_stream_epoch_duration.total_seconds()}s" ")" if self.max_total_stream_epoch_duration else "", )) raise FDSNHTTPError.create( 413, self.request, request_submitted=self.request_submitted, service_version=__version__, error_desc_long=( "Exceeded configured stream epoch limits: " "({}{})".format( "limit per requested stream epoch=" f"{self.max_stream_epoch_duration.days} days, " if self.max_stream_epoch_duration else "", f"total={self.max_total_stream_epoch_duration.days}" " days" if self.max_total_stream_epoch_duration else "", )), ) url = None skip_url = False urls = set() routes = [] total_stream_duration = datetime.timedelta() for line in text.split("\n"): if not url: url = line.strip() try: e_ratio = await self.get_cretry_budget_error_ratio(url) except Exception: pass else: if e_ratio > self.client_retry_budget_threshold: self.logger.warning( f"Exceeded per client retry-budget for {url}: " f"(e_ratio={e_ratio}).") skip_url = True elif not line.strip(): urls.add(url) url = None skip_url = False else: if skip_url: continue # XXX(damb): Do not substitute an empty endtime when # performing HTTP GET requests in order to guarantee # more cache hits (if eida-federator is coupled with # HTTP caching proxy). se = StreamEpoch.from_snclline( line, default_endtime=default_endtime if post else None, ) stream_duration = se.duration try: total_stream_duration += stream_duration except OverflowError: total_stream_duration = datetime.timedelta.max validate_stream_durations(stream_duration, total_stream_duration) routes.append(Route(url=url, stream_epochs=[se])) return urls, routes
def _process_request(self, args, stream_epochs): # resolve virtual network stream epochs vnet_stream_epochs_found = [] vnet_stream_epochs_resolved = [] for stream_epoch in stream_epochs: self.logger.debug(f"Resolving {stream_epoch!r} regarding VNET.") resolved = resolve_vnetwork(db.session, stream_epoch) if resolved: vnet_stream_epochs_resolved.extend(resolved) vnet_stream_epochs_found.append(stream_epoch) self.logger.debug( f"Stream epochs from VNETs: {vnet_stream_epochs_resolved!r}") for vnet_stream_epoch in vnet_stream_epochs_found: stream_epochs.remove(vnet_stream_epoch) stream_epochs.extend(vnet_stream_epochs_resolved) # NOTE(damb): Do neither merge epochs nor trim to query epoch if # service == "station" merge_epochs = trim_to_stream_epoch = args["service"] != "station" canonicalize_epochs = args["service"] == "station" # collect results for each stream epoch routes = [] for stream_epoch in stream_epochs: self.logger.debug(f"Processing request for {stream_epoch!r}") # query _routes = query_routes( db.session, stream_epoch, args["service"], level=args["level"], access=args["access"], method=args["method"], minlat=args["minlatitude"], maxlat=args["maxlatitude"], minlon=args["minlongitude"], maxlon=args["maxlongitude"], trim_to_stream_epoch=trim_to_stream_epoch, ) if trim_to_stream_epoch: # adjust stream epochs regarding time constraints for url, stream_epochs_handler in _routes: stream_epochs_handler.modify_with_temporal_constraints( start=stream_epoch.starttime, end=stream_epoch.endtime) elif canonicalize_epochs: # canonicalize epochs for url, stream_epochs_handler in _routes: stream_epochs_handler.canonicalize_epochs( start=stream_epoch.starttime, end=stream_epoch.endtime) routes.extend(_routes) self.logger.debug(f"Routes: {routes}") # merge routes processed_routes = collections.defaultdict(StreamEpochsHandler) for url, stream_epochs_handler in routes: for stream_epochs in generate_stream_epochs( stream_epochs_handler, merge_epochs=merge_epochs): for se in stream_epochs: processed_routes[url].add(se) self.logger.debug(f"Routes (processed): {processed_routes}") # demux for url, stream_epochs_handler in processed_routes.items(): if args["level"] in ("network", "station"): processed_routes[url] = [ StreamEpoch.from_streamepochs(stream_epochs) for stream_epochs in stream_epochs_handler ] else: processed_routes[url] = [ stream_epoch for stream_epochs in generate_stream_epochs( stream_epochs_handler, merge_epochs=merge_epochs) for stream_epoch in stream_epochs ] # sort response routes = [ Route(url=url, stream_epochs=sorted(stream_epochs)) for url, stream_epochs in processed_routes.items() ] # sort additionally by URL routes.sort() ostream = OutputStream.create( args["format"], routes=routes, ) return str(ostream)
def query_routes( session, stream_epoch, service, level="channel", access="any", method=None, minlat=-90.0, maxlat=90.0, minlon=-180.0, maxlon=180.0, like_escape="/", trim_to_stream_epoch=True, ): """ Return routes for a given stream epoch. :param session: SQLAlchemy session :type session: :py:class:`sqlalchemy.orm.session.Session` :param stream_epoch: StreamEpoch the database query is performed with :type stream_epoch: :py:class:`~eidaws.utils.sncl.StreamEpoch` :param str service: String specifying the webservice :param str level: Optional `fdsnws-station` *level* parameter :param str access: Optional access parameter :param method: Optional list of FDSNWS method tokens to be filter for :type method: List of str or None :param float minlat: Latitude larger than or equal to the specified minimum :param float maxlat: Latitude smaller than or equal to the specified maximum :param float minlon: Longitude larger than or equal to the specified minimum :param float maxlon: Longitude smaller than or equal to the specified maximum :param str like_escape: Character used for the `SQL ESCAPE` statement :param bool trim_to_stream_epoch: Indicates if resulting stream epochs should be trimmed to the `stream_epoch`'s epoch (if possible) :return: List of :py:class:`~eidaws.utils.misc.Route` objects :rtype: list """ sql_stream_epoch = stream_epoch.fdsnws_to_sql_wildcards() logger.debug(f"Processing request for (SQL) {sql_stream_epoch!r}") sta = sql_stream_epoch.station loc = sql_stream_epoch.location cha = sql_stream_epoch.channel query = _create_route_query( session, service, **sql_stream_epoch._asdict(short_keys=True), level=level, access=access, method=method, minlat=minlat, maxlat=maxlat, minlon=minlon, maxlon=maxlon, like_escape=like_escape, ) routes = collections.defaultdict(StreamEpochsHandler) for row in query.all(): # print(f"Query response: {row!r}") # NOTE(damb): Adjust epoch in case the orm.Epoch is smaller/larger # than the RoutingEpoch (regarding time constraints); at least one # starttime is mandatory to be configured starttimes = [row[4], row[6]] endtimes = [row[5], row[7]] if trim_to_stream_epoch: starttimes.append(sql_stream_epoch.starttime) endtimes.append(sql_stream_epoch.endtime) starttime = max(t for t in starttimes if t is not None) try: endtime = min(t for t in endtimes if t is not None) except ValueError: endtime = None if endtime is not None and endtime <= starttime: continue sta = row[2] loc = row[1] cha = row[0] if level == "network": sta = loc = cha = "*" elif level == "station": loc = cha = "*" # NOTE(damb): Set endtime to 'max' if undefined (i.e. device currently # acquiring data). with none_as_max(endtime) as end: stream_epoch = StreamEpoch.from_sncl( network=row[3], station=sta, location=loc, channel=cha, starttime=starttime, endtime=end, ) routes[row[8]].add(stream_epoch) return [ Route(url=url, stream_epochs=streams) for url, streams in routes.items() ]
def _prepare_history(history, stream_epoch_dict): # synchronize history with eidaws-stationlite stream epochs idx = { level: set(stream_epochs) for level, stream_epochs in stream_epoch_dict.items() } seen = defaultdict(set) total = 0 from_history = [] try: for entry in history: stream_epoch = entry["stream"] query_params = entry["params"] if not stream_epoch or not query_params: continue l = query_params["level"] f = query_params["format"] if not l or not f: continue if ( self.config["history_by_status"] and entry["status"] not in self.config["history_by_status"] ): continue stream_epoch = StreamEpoch.from_snclline(stream_epoch) if l in idx and stream_epoch in idx[l]: from_history.append( (stream_epoch, {"format": f, "level": l}) ) seen[l].add(stream_epoch) total += 1 except (KeyError, TypeError) as err: raise InvalidHistory(err) supplementary = {} if self.config["history_include_stl"]: self.logger.debug( "Checking for supplementary stream epochs" ) supplementary = {} for level, stream_epochs in stream_epoch_dict.items(): supplementary[level] = list( set(stream_epochs) - seen.get("level", set()) ) self.logger.debug( "Found {} supplementary stream epochs to be crawled".format( sum( len(se_lst) for l, se_lst in supplementary.items() ) ) ) total += _total(stream_epoch_dict) return from_history, supplementary, total