def serialize_results(self, hash_key, results): client = self.client next_status = time.time() + 60 result_ids = set() result_count = 0 for result in results: result_count += 1 result_id = sha1(result['uri'].encode('utf-8')).hexdigest()[:8] result_ids.add(result_id) result_hash = self.hash_result(result) cached_hash = client.hget(hash_key, result_id) if cached_hash == result_hash: continue client.hset(hash_key, result_id, result_hash) yield '{0}\n{1}\n'.format(json.dumps({'index': {'_id': result_id}}), json.dumps(result)) if time.time() > next_status: logger.info("Received %d results in SPARQL resultset", result_count) next_status = time.time() + 60 result_ids = set(self.client.hkeys(hash_key)) - result_ids logger.info("Processed %d results from SPARQL resultset", result_count) logger.info("Deleting %d items", len(result_ids)) for result_id in result_ids: yield '{0}\n'.format(json.dumps({'delete': {'_id': result_id}}))
def render_autocomplete(self, request, context, template_name): if not context.get('hits'): raise self.MissingQuery() context = [{'value': hit['_source']['uri'], 'altNames': '\t'.join(l for l in hit['_source'].get('altLabel', []) + hit['_source'].get('hiddenLabel', [])), 'label': hit['_source']['label']} for hit in context['hits']['hits']] content, mimetype = json.dumps(context), 'application/json' if 'callback' in request.GET: content, mimetype = [request.GET['callback'], '(', content, ');'], 'application/javascript' return HttpResponse(content, mimetype=mimetype)
def hash_result(cls, value): def recursive_sort(value): if isinstance(value, dict): for subvalue in value.itervalues(): recursive_sort(subvalue) elif isinstance(value, list): for subvalue in value: recursive_sort(subvalue) value.sort() return hash(json.dumps(recursive_sort(value)))
def serialize_results(self, hash_key, results): client = self.client next_status = time.time() + 60 result_ids = set() result_count = 0 for result in results: result_count += 1 result_id = sha1(result['uri'].encode('utf-8')).hexdigest()[:8] result_ids.add(result_id) result_hash = self.hash_result(result) cached_hash = client.hget(hash_key, result_id) if cached_hash == result_hash: continue client.hset(hash_key, result_id, result_hash) yield '{0}\n{1}\n'.format( json.dumps({'index': { '_id': result_id }}), json.dumps(result)) if time.time() > next_status: logger.info("Received %d results in SPARQL resultset", result_count) next_status = time.time() + 60 result_ids = set(self.client.hkeys(hash_key)) - result_ids logger.info("Processed %d results from SPARQL resultset", result_count) logger.info("Deleting %d items", len(result_ids)) for result_id in result_ids: yield '{0}\n'.format(json.dumps({'delete': {'_id': result_id}}))
def render_autocomplete(self, request, context, template_name): if not context.get('hits'): raise self.MissingQuery() context = [{ 'value': hit['_source']['uri'], 'altNames': '\t'.join(l for l in hit['_source'].get('altLabel', []) + hit['_source'].get('hiddenLabel', [])), 'label': hit['_source']['label'] } for hit in context['hits']['hits']] content, mimetype = json.dumps(context), 'application/json' if 'callback' in request.GET: content, mimetype = [request.GET['callback'], '(', content, ');'], 'application/javascript' return HttpResponse(content, mimetype=mimetype)