Esempio n. 1
0
    def query(self, query, common_prefixes=True, timeout=None, log_failure=True, preferred_media_types=None, defer=False):
        original_query = query
        query = self.normalize_query(query, common_prefixes)

        request = urllib2.Request(self._url, urllib.urlencode({
            'query': query.encode('utf-8'),
        }))

        if not defer:
            # Pick the quickest to parse, as it will never be passed through
            # verbatim.
            preferred_media_types = self._supported_media_types
        if preferred_media_types:
            request.add_header('Accept', self._get_accept_header(preferred_media_types))
        else:
            request.add_header('Accept', self._accept_header)
        request.add_header('User-Agent', USER_AGENTS['agent'])
        if timeout:
            request.add_header('Timeout', str(timeout))

        start_time = time.time()

        try:
            logging.debug("Querying %r", self._url)
            try:
                response = urllib2.urlopen(request)
            except urllib2.HTTPError as e:
                error_content = e.read()
                raise QueryError(error_content, e.code)

            time_to_start = time.time() - start_time

            content_type, params = response.headers.get('Content-Type', 'application/rdf+xml'), {}
            if ';' in content_type:
                content_type, params_ = content_type.split(';', 1)
                for param in params_.split(';'):
                    if '=' in param:
                        params.__setitem__(*param.split('=', 1))
            encoding = params.get('charset', 'UTF-8')
            if content_type in streaming.parsers:
                result = streaming.parsers[content_type](response, encoding)
            else:
                raise AssertionError("Unexpected content-type: %s" % content_type)
            result.query = query
            result.duration = time.time() - start_time
            if not defer:
                result = result.get()
            logger.debug("SPARQL query: %r; took %.2f (%.2f) seconds\n", original_query, time.time() - start_time, time_to_start)
            statsd.timing('humfrey.sparql-query.duration', (time.time() - start_time)*1000)
            statsd.incr('humfrey.sparql-query.success')
            return result
        except Exception:
            try:
                (logger.error if log_failure else logger.debug)(
                    "Failed query: %r; took %.2f seconds", original_query, time.time() - start_time,
                    exc_info=1)
            except UnboundLocalError:
                pass
            statsd.incr('humfrey.sparql-query.fail')
            raise
Esempio n. 2
0
    def __iter__(self):
        results = self._results

        if isinstance(
                results,
                StreamingParser) and results.media_type == self.media_type:
            statsd.incr('humfrey.streaming.pass-through.yes')
            return iter(results)
        else:
            statsd.incr('humfrey.streaming.pass-through.no')

        sparql_results_type, fields, bindings, boolean, triples = None, None, None, None, None

        if isinstance(results, bool):
            sparql_results_type, boolean = 'boolean', results
        elif isinstance(results, SparqlResultList):
            sparql_results_type, fields, bindings = 'resultset', results.fields, results
        elif isinstance(results, StreamingParser):
            sparql_results_type = results.get_sparql_results_type()
            if sparql_results_type == 'resultset':
                fields, bindings = results.get_fields(), results.get_bindings()
                boolean, triples = None, None
            elif sparql_results_type == 'boolean':
                boolean = results.get_boolean()
                fields, bindings, triples = None, None, None
            elif sparql_results_type == 'graph':
                triples = results.get_triples()
                fields, bindings, boolean = None, None, None
        # Assume iterable-ish things are graphs / lists of triples
        elif isinstance(results,
                        (list, types.GeneratorType, rdflib.ConjunctiveGraph)):
            sparql_results_type, triples = 'graph', results
        elif hasattr(results, '__iter__'):
            sparql_results_type, triples = 'graph', results
        else:
            raise TypeError("{0} object cannot be serialized".format(
                type(results)))

        if sparql_results_type not in self.supported_results_types:
            raise TypeError(
                "Unexpected results type: {0}".format(sparql_results_type))

        return self._iter(sparql_results_type, fields, bindings, boolean,
                          triples)
Esempio n. 3
0
    def __iter__(self):
        results = self._results

        if isinstance(results, StreamingParser) and results.media_type == self.media_type:
            statsd.incr('humfrey.streaming.pass-through.yes')
            return iter(results)
        else:
            statsd.incr('humfrey.streaming.pass-through.no')

        sparql_results_type, fields, bindings, boolean, triples = None, None, None, None, None

        if isinstance(results, bool):
            sparql_results_type, boolean = 'boolean', results
        elif isinstance(results, SparqlResultList):
            sparql_results_type, fields, bindings = 'resultset', results.fields, results
        elif isinstance(results, StreamingParser):
            sparql_results_type = results.get_sparql_results_type()
            if sparql_results_type == 'resultset':
                fields, bindings = results.get_fields(), results.get_bindings()
                boolean, triples = None, None
            elif sparql_results_type == 'boolean':
                boolean = results.get_boolean()
                fields, bindings, triples = None, None, None
            elif sparql_results_type == 'graph':
                triples = results.get_triples()
                fields, bindings, boolean = None, None, None
        # Assume iterable-ish things are graphs / lists of triples
        elif isinstance(results, (list, types.GeneratorType, rdflib.ConjunctiveGraph)):
            sparql_results_type, triples = 'graph', results
        elif hasattr(results, '__iter__'):
            sparql_results_type, triples = 'graph', results
        else:
            raise TypeError("{0} object cannot be serialized".format(type(results)))

        if sparql_results_type not in self.supported_results_types:
            raise TypeError("Unexpected results type: {0}".format(sparql_results_type))

        return self._iter(sparql_results_type, fields, bindings, boolean, triples)
Esempio n. 4
0
    def query(self,
              query,
              common_prefixes=True,
              timeout=None,
              log_failure=True,
              preferred_media_types=None,
              defer=False):
        original_query = query
        query = self.normalize_query(query, common_prefixes)

        request = urllib2.Request(
            self._url, urllib.urlencode({
                'query': query.encode('utf-8'),
            }))

        if not defer:
            # Pick the quickest to parse, as it will never be passed through
            # verbatim.
            preferred_media_types = self._supported_media_types
        if preferred_media_types:
            request.add_header('Accept',
                               self._get_accept_header(preferred_media_types))
        else:
            request.add_header('Accept', self._accept_header)
        request.add_header('User-Agent', USER_AGENTS['agent'])
        if timeout:
            request.add_header('Timeout', str(timeout))

        start_time = time.time()

        try:
            logging.debug("Querying %r", self._url)
            try:
                response = urllib2.urlopen(request)
            except urllib2.HTTPError as e:
                error_content = e.read()
                raise QueryError(error_content, e.code)

            time_to_start = time.time() - start_time

            content_type, params = response.headers.get(
                'Content-Type', 'application/rdf+xml'), {}
            if ';' in content_type:
                content_type, params_ = content_type.split(';', 1)
                for param in params_.split(';'):
                    if '=' in param:
                        params.__setitem__(*param.split('=', 1))
            encoding = params.get('charset', 'UTF-8')
            if content_type in streaming.parsers:
                result = streaming.parsers[content_type](response, encoding)
            else:
                raise AssertionError("Unexpected content-type: %s" %
                                     content_type)
            result.query = query
            result.duration = time.time() - start_time
            if not defer:
                result = result.get()
            logger.debug("SPARQL query: %r; took %.2f (%.2f) seconds\n",
                         original_query,
                         time.time() - start_time, time_to_start)
            statsd.timing('humfrey.sparql-query.duration',
                          (time.time() - start_time) * 1000)
            statsd.incr('humfrey.sparql-query.success')
            return result
        except Exception:
            try:
                (logger.error if log_failure else logger.debug)(
                    "Failed query: %r; took %.2f seconds",
                    original_query,
                    time.time() - start_time,
                    exc_info=1)
            except UnboundLocalError:
                pass
            statsd.incr('humfrey.sparql-query.fail')
            raise