Exemple #1
0
    def test_slice(self, epochs, epochs_expected, params):

        stream = Stream(network="GR",
                        station="BFO",
                        location="",
                        channel="LHZ")
        stream_epoch = StreamEpoch(stream=stream, **epochs)

        expected = [
            StreamEpoch(stream=stream, **epoch) for epoch in epochs_expected
        ]
        assert sorted(stream_epoch.slice(**params)) == expected
Exemple #2
0
        def reduce_to_extent(routes):

            grouped = group_routes_by(routes,
                                      key="network.station.location.channel")

            reduced = []
            for group_key, routes in grouped.items():

                urls = set()
                _stream = None
                ts = set()
                for r in routes:
                    assert (len(
                        r.stream_epochs) == 1), "granular routes required"

                    urls.add(r.url)
                    se_orig = r.stream_epochs[0]
                    _stream = se_orig.stream
                    with none_as_max(se_orig.endtime) as end:
                        ts.add(se_orig.starttime)
                        ts.add(end)

                with max_as_none(max(ts)) as end:
                    se = StreamEpoch(_stream, starttime=min(ts), endtime=end)
                    reduced.append(Route(url=r.url, stream_epochs=[se]))

                # do not allow distributed stream epochs; would require
                # on-the-fly de-/serialization
                if len(urls) != 1:
                    raise ValueError("Distributed stream epochs not allowed.")

            return reduced
Exemple #3
0
 def make_stream_epoch(self, data, **kwargs):
     """
     Factory method generating
     :py:class:`eidaws.utils.sncl.StreamEpoch` objects.
     """
     if data["location"] == "--":
         data["location"] = ""
     return StreamEpoch.from_sncl(**data)
Exemple #4
0
            def _parse_stream_epochs(
                text, domains=None, excluded_domains=None
            ):
                # compute domains to be crawled
                _excluded_domains = None
                if excluded_domains:
                    _excluded_domains = excluded_domains[:]
                    if excluded_domains and domains:
                        explicitly_included = [
                            d for d in excluded_domains if d in domains
                        ]
                        for d in explicitly_included:
                            _excluded_domains.remove(d)

                stream_epochs = []
                url = None
                skip_url = False
                for line in text.split("\n"):
                    if not url:
                        url = line.strip()
                        if excluded_domains or domains:
                            parsed = urlparse(url)
                            if (
                                _excluded_domains
                                and parsed.netloc in _excluded_domains
                                or domains
                                and parsed.netloc not in domains
                            ):
                                skip_url = True

                    elif not line.strip():
                        url = None
                        skip_url = False

                    else:
                        if skip_url:
                            continue

                        se = StreamEpoch.from_snclline(line)
                        stream_epochs.append(se)

                return stream_epochs
Exemple #5
0
    async def _emerge_routes(
        self,
        text,
        post,
        default_endtime,
    ):
        """
        Default implementation parsing the routing service's output stream and
        create fully demultiplexed routes. Note that routes with an exceeded
        per client retry-budget are dropped.
        """
        def validate_stream_durations(stream_duration, total_stream_duration):
            if (self.max_stream_epoch_duration is not None
                    and stream_duration > self.max_stream_epoch_duration) or (
                        self.max_total_stream_epoch_duration is not None
                        and total_stream_duration >
                        self.max_total_stream_epoch_duration):
                self.logger.debug("Exceeded configured limits: {}{}".format(
                    "stream_duration="
                    f"{stream_duration.total_seconds()}s (configured="
                    f"{self.max_stream_epoch_duration.total_seconds()}s), "
                    if self.max_stream_epoch_duration else "",
                    "total_stream_duration: "
                    f"{total_stream_duration.total_seconds()}s "
                    "(configured="
                    f"{self.max_total_stream_epoch_duration.total_seconds()}s"
                    ")" if self.max_total_stream_epoch_duration else "",
                ))
                raise FDSNHTTPError.create(
                    413,
                    self.request,
                    request_submitted=self.request_submitted,
                    service_version=__version__,
                    error_desc_long=(
                        "Exceeded configured stream epoch limits: "
                        "({}{})".format(
                            "limit per requested stream epoch="
                            f"{self.max_stream_epoch_duration.days} days, "
                            if self.max_stream_epoch_duration else "",
                            f"total={self.max_total_stream_epoch_duration.days}"
                            " days"
                            if self.max_total_stream_epoch_duration else "",
                        )),
                )

        url = None
        skip_url = False

        urls = set()
        routes = []
        total_stream_duration = datetime.timedelta()

        for line in text.split("\n"):
            if not url:
                url = line.strip()

                try:
                    e_ratio = await self.get_cretry_budget_error_ratio(url)
                except Exception:
                    pass
                else:
                    if e_ratio > self.client_retry_budget_threshold:
                        self.logger.warning(
                            f"Exceeded per client retry-budget for {url}: "
                            f"(e_ratio={e_ratio}).")
                        skip_url = True

            elif not line.strip():
                urls.add(url)

                url = None
                skip_url = False

            else:
                if skip_url:
                    continue

                # XXX(damb): Do not substitute an empty endtime when
                # performing HTTP GET requests in order to guarantee
                # more cache hits (if eida-federator is coupled with
                # HTTP caching proxy).
                se = StreamEpoch.from_snclline(
                    line,
                    default_endtime=default_endtime if post else None,
                )

                stream_duration = se.duration
                try:
                    total_stream_duration += stream_duration
                except OverflowError:
                    total_stream_duration = datetime.timedelta.max

                validate_stream_durations(stream_duration,
                                          total_stream_duration)

                routes.append(Route(url=url, stream_epochs=[se]))

        return urls, routes
Exemple #6
0
    def _process_request(self, args, stream_epochs):
        # resolve virtual network stream epochs
        vnet_stream_epochs_found = []
        vnet_stream_epochs_resolved = []
        for stream_epoch in stream_epochs:
            self.logger.debug(f"Resolving {stream_epoch!r} regarding VNET.")
            resolved = resolve_vnetwork(db.session, stream_epoch)
            if resolved:
                vnet_stream_epochs_resolved.extend(resolved)
                vnet_stream_epochs_found.append(stream_epoch)

        self.logger.debug(
            f"Stream epochs from VNETs: {vnet_stream_epochs_resolved!r}")

        for vnet_stream_epoch in vnet_stream_epochs_found:
            stream_epochs.remove(vnet_stream_epoch)

        stream_epochs.extend(vnet_stream_epochs_resolved)

        # NOTE(damb): Do neither merge epochs nor trim to query epoch if
        # service == "station"
        merge_epochs = trim_to_stream_epoch = args["service"] != "station"
        canonicalize_epochs = args["service"] == "station"

        # collect results for each stream epoch
        routes = []
        for stream_epoch in stream_epochs:
            self.logger.debug(f"Processing request for {stream_epoch!r}")
            # query
            _routes = query_routes(
                db.session,
                stream_epoch,
                args["service"],
                level=args["level"],
                access=args["access"],
                method=args["method"],
                minlat=args["minlatitude"],
                maxlat=args["maxlatitude"],
                minlon=args["minlongitude"],
                maxlon=args["maxlongitude"],
                trim_to_stream_epoch=trim_to_stream_epoch,
            )

            if trim_to_stream_epoch:
                # adjust stream epochs regarding time constraints
                for url, stream_epochs_handler in _routes:
                    stream_epochs_handler.modify_with_temporal_constraints(
                        start=stream_epoch.starttime, end=stream_epoch.endtime)
            elif canonicalize_epochs:
                # canonicalize epochs
                for url, stream_epochs_handler in _routes:
                    stream_epochs_handler.canonicalize_epochs(
                        start=stream_epoch.starttime, end=stream_epoch.endtime)

            routes.extend(_routes)

        self.logger.debug(f"Routes: {routes}")

        # merge routes
        processed_routes = collections.defaultdict(StreamEpochsHandler)
        for url, stream_epochs_handler in routes:
            for stream_epochs in generate_stream_epochs(
                    stream_epochs_handler, merge_epochs=merge_epochs):
                for se in stream_epochs:
                    processed_routes[url].add(se)

        self.logger.debug(f"Routes (processed): {processed_routes}")
        # demux
        for url, stream_epochs_handler in processed_routes.items():
            if args["level"] in ("network", "station"):
                processed_routes[url] = [
                    StreamEpoch.from_streamepochs(stream_epochs)
                    for stream_epochs in stream_epochs_handler
                ]
            else:
                processed_routes[url] = [
                    stream_epoch for stream_epochs in generate_stream_epochs(
                        stream_epochs_handler, merge_epochs=merge_epochs)
                    for stream_epoch in stream_epochs
                ]

        # sort response
        routes = [
            Route(url=url, stream_epochs=sorted(stream_epochs))
            for url, stream_epochs in processed_routes.items()
        ]

        # sort additionally by URL
        routes.sort()

        ostream = OutputStream.create(
            args["format"],
            routes=routes,
        )
        return str(ostream)
Exemple #7
0
def query_routes(
    session,
    stream_epoch,
    service,
    level="channel",
    access="any",
    method=None,
    minlat=-90.0,
    maxlat=90.0,
    minlon=-180.0,
    maxlon=180.0,
    like_escape="/",
    trim_to_stream_epoch=True,
):
    """
    Return routes for a given stream epoch.

    :param session: SQLAlchemy session
    :type session: :py:class:`sqlalchemy.orm.session.Session`
    :param stream_epoch: StreamEpoch the database query is performed with
    :type stream_epoch: :py:class:`~eidaws.utils.sncl.StreamEpoch`
    :param str service: String specifying the webservice
    :param str level: Optional `fdsnws-station` *level* parameter
    :param str access: Optional access parameter
    :param method: Optional list of FDSNWS method tokens to be filter for
    :type method: List of str or None
    :param float minlat: Latitude larger than or equal to the specified minimum
    :param float maxlat: Latitude smaller than or equal to the specified
        maximum
    :param float minlon: Longitude larger than or equal to the specified
        minimum
    :param float maxlon: Longitude smaller than or equal to the specified
        maximum
    :param str like_escape: Character used for the `SQL ESCAPE` statement
    :param bool trim_to_stream_epoch: Indicates if resulting stream epochs
        should be trimmed to the `stream_epoch`'s epoch (if possible)
    :return: List of :py:class:`~eidaws.utils.misc.Route` objects
    :rtype: list
    """
    sql_stream_epoch = stream_epoch.fdsnws_to_sql_wildcards()
    logger.debug(f"Processing request for (SQL) {sql_stream_epoch!r}")

    sta = sql_stream_epoch.station
    loc = sql_stream_epoch.location
    cha = sql_stream_epoch.channel

    query = _create_route_query(
        session,
        service,
        **sql_stream_epoch._asdict(short_keys=True),
        level=level,
        access=access,
        method=method,
        minlat=minlat,
        maxlat=maxlat,
        minlon=minlon,
        maxlon=maxlon,
        like_escape=like_escape,
    )
    routes = collections.defaultdict(StreamEpochsHandler)
    for row in query.all():
        # print(f"Query response: {row!r}")
        # NOTE(damb): Adjust epoch in case the orm.Epoch is smaller/larger
        # than the RoutingEpoch (regarding time constraints); at least one
        # starttime is mandatory to be configured
        starttimes = [row[4], row[6]]
        endtimes = [row[5], row[7]]

        if trim_to_stream_epoch:
            starttimes.append(sql_stream_epoch.starttime)
            endtimes.append(sql_stream_epoch.endtime)

        starttime = max(t for t in starttimes if t is not None)
        try:
            endtime = min(t for t in endtimes if t is not None)
        except ValueError:
            endtime = None

        if endtime is not None and endtime <= starttime:
            continue

        sta = row[2]
        loc = row[1]
        cha = row[0]
        if level == "network":
            sta = loc = cha = "*"
        elif level == "station":
            loc = cha = "*"

        # NOTE(damb): Set endtime to 'max' if undefined (i.e. device currently
        # acquiring data).
        with none_as_max(endtime) as end:
            stream_epoch = StreamEpoch.from_sncl(
                network=row[3],
                station=sta,
                location=loc,
                channel=cha,
                starttime=starttime,
                endtime=end,
            )

            routes[row[8]].add(stream_epoch)

    return [
        Route(url=url, stream_epochs=streams)
        for url, streams in routes.items()
    ]
Exemple #8
0
            def _prepare_history(history, stream_epoch_dict):
                # synchronize history with eidaws-stationlite stream epochs
                idx = {
                    level: set(stream_epochs)
                    for level, stream_epochs in stream_epoch_dict.items()
                }

                seen = defaultdict(set)
                total = 0
                from_history = []
                try:
                    for entry in history:
                        stream_epoch = entry["stream"]
                        query_params = entry["params"]
                        if not stream_epoch or not query_params:
                            continue
                        l = query_params["level"]
                        f = query_params["format"]
                        if not l or not f:
                            continue

                        if (
                            self.config["history_by_status"]
                            and entry["status"]
                            not in self.config["history_by_status"]
                        ):
                            continue

                        stream_epoch = StreamEpoch.from_snclline(stream_epoch)
                        if l in idx and stream_epoch in idx[l]:
                            from_history.append(
                                (stream_epoch, {"format": f, "level": l})
                            )

                            seen[l].add(stream_epoch)
                            total += 1

                except (KeyError, TypeError) as err:
                    raise InvalidHistory(err)

                supplementary = {}
                if self.config["history_include_stl"]:
                    self.logger.debug(
                        "Checking for supplementary stream epochs"
                    )
                    supplementary = {}
                    for level, stream_epochs in stream_epoch_dict.items():
                        supplementary[level] = list(
                            set(stream_epochs) - seen.get("level", set())
                        )

                    self.logger.debug(
                        "Found {} supplementary stream epochs to be crawled".format(
                            sum(
                                len(se_lst)
                                for l, se_lst in supplementary.items()
                            )
                        )
                    )
                    total += _total(stream_epoch_dict)

                return from_history, supplementary, total