def query(self, query, common_prefixes=True, timeout=None, log_failure=True, preferred_media_types=None, defer=False): original_query = query query = self.normalize_query(query, common_prefixes) request = urllib2.Request(self._url, urllib.urlencode({ 'query': query.encode('utf-8'), })) if not defer: # Pick the quickest to parse, as it will never be passed through # verbatim. preferred_media_types = self._supported_media_types if preferred_media_types: request.add_header('Accept', self._get_accept_header(preferred_media_types)) else: request.add_header('Accept', self._accept_header) request.add_header('User-Agent', USER_AGENTS['agent']) if timeout: request.add_header('Timeout', str(timeout)) start_time = time.time() try: logging.debug("Querying %r", self._url) try: response = urllib2.urlopen(request) except urllib2.HTTPError as e: error_content = e.read() raise QueryError(error_content, e.code) time_to_start = time.time() - start_time content_type, params = response.headers.get('Content-Type', 'application/rdf+xml'), {} if ';' in content_type: content_type, params_ = content_type.split(';', 1) for param in params_.split(';'): if '=' in param: params.__setitem__(*param.split('=', 1)) encoding = params.get('charset', 'UTF-8') if content_type in streaming.parsers: result = streaming.parsers[content_type](response, encoding) else: raise AssertionError("Unexpected content-type: %s" % content_type) result.query = query result.duration = time.time() - start_time if not defer: result = result.get() logger.debug("SPARQL query: %r; took %.2f (%.2f) seconds\n", original_query, time.time() - start_time, time_to_start) statsd.timing('humfrey.sparql-query.duration', (time.time() - start_time)*1000) statsd.incr('humfrey.sparql-query.success') return result except Exception: try: (logger.error if log_failure else logger.debug)( "Failed query: %r; took %.2f seconds", original_query, time.time() - start_time, exc_info=1) except UnboundLocalError: pass statsd.incr('humfrey.sparql-query.fail') raise
def __iter__(self): results = self._results if isinstance( results, StreamingParser) and results.media_type == self.media_type: statsd.incr('humfrey.streaming.pass-through.yes') return iter(results) else: statsd.incr('humfrey.streaming.pass-through.no') sparql_results_type, fields, bindings, boolean, triples = None, None, None, None, None if isinstance(results, bool): sparql_results_type, boolean = 'boolean', results elif isinstance(results, SparqlResultList): sparql_results_type, fields, bindings = 'resultset', results.fields, results elif isinstance(results, StreamingParser): sparql_results_type = results.get_sparql_results_type() if sparql_results_type == 'resultset': fields, bindings = results.get_fields(), results.get_bindings() boolean, triples = None, None elif sparql_results_type == 'boolean': boolean = results.get_boolean() fields, bindings, triples = None, None, None elif sparql_results_type == 'graph': triples = results.get_triples() fields, bindings, boolean = None, None, None # Assume iterable-ish things are graphs / lists of triples elif isinstance(results, (list, types.GeneratorType, rdflib.ConjunctiveGraph)): sparql_results_type, triples = 'graph', results elif hasattr(results, '__iter__'): sparql_results_type, triples = 'graph', results else: raise TypeError("{0} object cannot be serialized".format( type(results))) if sparql_results_type not in self.supported_results_types: raise TypeError( "Unexpected results type: {0}".format(sparql_results_type)) return self._iter(sparql_results_type, fields, bindings, boolean, triples)
def __iter__(self): results = self._results if isinstance(results, StreamingParser) and results.media_type == self.media_type: statsd.incr('humfrey.streaming.pass-through.yes') return iter(results) else: statsd.incr('humfrey.streaming.pass-through.no') sparql_results_type, fields, bindings, boolean, triples = None, None, None, None, None if isinstance(results, bool): sparql_results_type, boolean = 'boolean', results elif isinstance(results, SparqlResultList): sparql_results_type, fields, bindings = 'resultset', results.fields, results elif isinstance(results, StreamingParser): sparql_results_type = results.get_sparql_results_type() if sparql_results_type == 'resultset': fields, bindings = results.get_fields(), results.get_bindings() boolean, triples = None, None elif sparql_results_type == 'boolean': boolean = results.get_boolean() fields, bindings, triples = None, None, None elif sparql_results_type == 'graph': triples = results.get_triples() fields, bindings, boolean = None, None, None # Assume iterable-ish things are graphs / lists of triples elif isinstance(results, (list, types.GeneratorType, rdflib.ConjunctiveGraph)): sparql_results_type, triples = 'graph', results elif hasattr(results, '__iter__'): sparql_results_type, triples = 'graph', results else: raise TypeError("{0} object cannot be serialized".format(type(results))) if sparql_results_type not in self.supported_results_types: raise TypeError("Unexpected results type: {0}".format(sparql_results_type)) return self._iter(sparql_results_type, fields, bindings, boolean, triples)
def query(self, query, common_prefixes=True, timeout=None, log_failure=True, preferred_media_types=None, defer=False): original_query = query query = self.normalize_query(query, common_prefixes) request = urllib2.Request( self._url, urllib.urlencode({ 'query': query.encode('utf-8'), })) if not defer: # Pick the quickest to parse, as it will never be passed through # verbatim. preferred_media_types = self._supported_media_types if preferred_media_types: request.add_header('Accept', self._get_accept_header(preferred_media_types)) else: request.add_header('Accept', self._accept_header) request.add_header('User-Agent', USER_AGENTS['agent']) if timeout: request.add_header('Timeout', str(timeout)) start_time = time.time() try: logging.debug("Querying %r", self._url) try: response = urllib2.urlopen(request) except urllib2.HTTPError as e: error_content = e.read() raise QueryError(error_content, e.code) time_to_start = time.time() - start_time content_type, params = response.headers.get( 'Content-Type', 'application/rdf+xml'), {} if ';' in content_type: content_type, params_ = content_type.split(';', 1) for param in params_.split(';'): if '=' in param: params.__setitem__(*param.split('=', 1)) encoding = params.get('charset', 'UTF-8') if content_type in streaming.parsers: result = streaming.parsers[content_type](response, encoding) else: raise AssertionError("Unexpected content-type: %s" % content_type) result.query = query result.duration = time.time() - start_time if not defer: result = result.get() logger.debug("SPARQL query: %r; took %.2f (%.2f) seconds\n", original_query, time.time() - start_time, time_to_start) statsd.timing('humfrey.sparql-query.duration', (time.time() - start_time) * 1000) statsd.incr('humfrey.sparql-query.success') return result except Exception: try: (logger.error if log_failure else logger.debug)( "Failed query: %r; took %.2f seconds", original_query, time.time() - start_time, exc_info=1) except UnboundLocalError: pass statsd.incr('humfrey.sparql-query.fail') raise