Ejemplo n.º 1
0
    def __init__(self, config):
        """A FileStore instance that allows pre-seeding the cache from an archive
        downloaded from a URL.

        Example configuration:

            file = {
                driver = "seeded-file",
                path = "/path/to/cache",
                url = "https://example.com/cache.tar.gz"
            }

        Configuation can include:

            path: Path on the local file system to store the cache.
            url: Where to download the preseed data from.
            archive_relpath: Path within the archive pointing to the root of the cache data.
            reseed_interval: Time in minutes after which the data will be
                              seeded again (defaults to no reseeding).

        Supported archive formats include `.zip`, `.tar`, `.tar.gz`, `.tar.bz2` or `.tar.zst`.
        """
        self._url = config['url']
        self._reseed_interval = config.get('reseed_interval', 0)
        self._archive_relpath = config.get('archive_relpath')

        self._session = requests_retry_session()

        kwargs = {
            'directory': config['path'],
        }
        if 'hash_type' in config:
            kwargs['hash_type'] = config['hash_type']

        super(SeededFileStore, self).__init__(**kwargs)
Ejemplo n.º 2
0
Archivo: query.py Proyecto: armenzg/adr
def query_activedata(query, url):
    """Runs the provided query against the ActiveData endpoint.

    :param dict query: yaml-formatted query to be run.
    :param str url: url to run query
    :returns str: json-formatted string.
    """
    start_time = time.time()
    response = requests_retry_session().post(url, data=query, stream=True)
    logger.debug("Query execution time: " + "{:.3f} ms".format((time.time() - start_time) * 1000.0))

    if response.status_code != 200:
        try:
            print(json.dumps(response.json(), indent=2))
        except ValueError:
            print(response.text)
        response.raise_for_status()

    return response.json()
Ejemplo n.º 3
0
Archivo: query.py Proyecto: armenzg/adr
def run_query(name, args, cache=True):
    """Loads and runs the specified query, yielding the result.

    Given name of a query, this method will first read the query
    from a .query file corresponding to the name.

    After queries are loaded, each query to be run is inspected
    and overridden if the provided context has values for limit.

    The actual call to the ActiveData endpoint is encapsulated
    inside the query_activedata method.

    :param str name: name of the query file to be loaded.
    :param Namespace args: namespace of ActiveData configs.
    :param bool cache: Deafults to True. It controls if to cache the results.
    :return str: json-formatted string.
    """
    context = vars(args)
    formatted_context = ", ".join([f"{k}={v}" for k, v in context.items()])
    logger.debug(f"Running query '{name}' with context: {formatted_context}")
    query = load_query(name)

    if "limit" not in query and "limit" in context:
        query["limit"] = context["limit"]
    if "format" not in query and "format" in context:
        query["format"] = context["format"]
    if config.debug:
        query["meta"] = {"save": True}

    query = jsone.render(query, context)
    query_str = json.dumps(query, indent=2, separators=(",", ":"))

    # translate "all" to a null value (which ActiveData will treat as all)
    query_str = query_str.replace('"all"', "null")
    query_hash = config.cache._hash(query_str)

    key = f"run_query.{name}.{query_hash}"
    if config.cache.has(key):
        logger.debug(f"Loading results from cache")
        return config.cache.get(key)

    logger.trace(f"JSON representation of query:\n{query_str}")
    result = query_activedata(query_str, config.url)

    if result.get('url'):
        # We must wait for the content
        problem = 0
        i = 0
        timeout = 300
        while problem < 3:
            time.sleep(2)
            i += 2
            try:
                monitor = requests_retry_session().get(result['status']).json()
                logger.debug(f"waiting: {json.dumps(monitor)}")
                problem = 0
                if monitor['status'] == 'done':
                    result = requests_retry_session().get(result['url']).json()
                    break
                elif monitor['status'] == 'error':
                    raise MissingDataError("Problem with query " + json.dumps(monitor['error']))
                elif i > timeout:
                    raise MissingDataError(f"Timed out after {timeout} seconds waiting "
                                           "for 'done' status")
                else:
                    logger.debug(f"status=\"{monitor['status']}\", waiting for \"done\"")
            except JSONDecodeError:
                # HAPPENS WHEN ASKING FOR status TOO SOON
                # (DELAY BETWEEN TIME WRITTEN TO S3 AND TIME AVAILABLE FROM S3)
                problem += 1

    if not result.get("data"):
        logger.warning(f"Query '{name}' returned no data with context: {formatted_context}")
        logger.debug("JSON Response:\n{response}", response=json.dumps(result, indent=2))
        raise MissingDataError("ActiveData didn't return any data.")

    if cache:
        config.cache.put(key, result, config["cache"]["retention"])
    return result