def request_id(self): if self._request_id is not None: return self._request_id if self.request is not None: parts = [self.request.method, normalize_url(self.url)] if self.request.data: parts.append(hash_data(self.request.data)) if self.request.json: parts.append(hash_data(self.request.json)) return make_key(*parts)
def request_id(self): if self._request_id is not None: return self._request_id if self.request is not None: parts = [self.request.method, self.url] if self.request.data: parts.append(hash_data(self.request.data)) if self.request.json: parts.append(hash_data(self.request.json)) return ' '.join(parts)
def save(cls, activity, session_id, role_id, timestamp, data, keys): keys = [data.get(k) for k in keys] key = hash_data([activity, session_id, keys]) db.session.begin_nested() for attempt in range(10): try: obj = cls.all().filter_by(id=key).first() if obj is None: obj = cls() obj.id = key obj.activity = activity obj.session_id = session_id obj.created_at = timestamp obj.data = data obj.count = 0 obj.count += 1 obj.role_id = role_id obj.updated_at = timestamp db.session.add(obj) db.session.commit() return except IntegrityError: db.session.rollback() time.sleep(0.001)
def request(self, method, url, headers={}, auth=None, data=None, params=None, json=None, allow_redirects=True, lazy=False): if is_mapping(params): params = params.items() url = normalize_url(url, extra_query_args=params) method = method.upper().strip() request = Request(method, url, data=data, headers=headers, json=json, auth=auth) request_id = hash_data((url, method, request.data, request.json)) response = ContextHttpResponse(self, request=request, request_id=request_id, allow_redirects=allow_redirects) if not lazy: response._stream_content() return response
def index_matches(collection, matches, sync=False): """Index cross-referencing matches.""" actions = [] for (score, entity, match_collection_id, match) in matches: xref_id = hash_data((entity.id, collection.id, match.id)) text = ensure_list(entity.get_type_values(registry.name)) text.extend(match.get_type_values(registry.name)) actions.append({ '_id': xref_id, '_index': xref_index(), '_source': { 'score': score, 'entity_id': entity.id, 'collection_id': collection.id, 'match_id': match.id, 'match_collection_id': match_collection_id, 'countries': match.get_type_values(registry.country), 'schema': match.schema.name, 'text': text, 'created_at': datetime.utcnow(), } }) if len(actions): log.info("Indexing %d xref matches...", len(actions)) bulk_actions(actions, sync=sync)
def index_matches(collection, matches, sync=False): """Index cross-referencing matches.""" actions = [] for (score, entity, match_collection_id, match) in matches: xref_id = hash_data((entity.id, collection.id, match.id)) text = ensure_list(entity.get_type_values(registry.name)) text.extend(match.get_type_values(registry.name)) actions.append({ "_id": xref_id, "_index": xref_index(), "_source": { "score": score, "entity_id": entity.id, "collection_id": collection.id, "match_id": match.id, "match_collection_id": match_collection_id, "countries": match.get_type_values(registry.country), "schema": match.schema.name, "text": text, "created_at": datetime.utcnow(), }, }) if len(actions): log.info("Indexing %d xref matches...", len(actions)) bulk_actions(actions, sync=sync)
def post_match(self, url, proxy): data = proxy.to_dict() key = proxy.id or hash_data(data) key = hash_data((url, key)) if self.cache.has(key): # log.info("Cached [%s]: %s", self.host, proxy) return self.cache.get(key) log.info("Enrich [%s]: %s", self.host, proxy) try: res = self.session.post(url, json=data) except RequestException: log.exception("Error calling Aleph matcher") return {} if res.status_code != 200: return {} data = res.json() self.cache.store(key, data) return data
def bulk_load(queue, collection, config): """Bulk load entities from a CSV file or SQL database. This is done by mapping the rows in the source data to entities and links which can be understood by the entity index. """ queries = keys_values(config, 'queries', 'query') for query in queries: bulk_load_query(queue, collection, hash_data(query), query) queue_task(collection, OP_INDEX) queue.remove()
def setup_caching(): """Set some request attributes at the beginning of the request. By default, caching will be disabled.""" locale = get_locale() request._app_locale = str(locale) request.session_id = request.headers.get('X-Aleph-Session') if request.session_id is None: request.session_id = hash_data([ request.remote_addr, request.accept_languages, request.user_agent ]) request._http_cache = False request._http_private = False request._http_etag = None
def index_notification(event, actor_id, params, channels, sync=False): """Index a notification.""" params = params or {} params = {n: get_entity_id(params.get(n)) for n in event.params.keys()} channels = list(set([c for c in channels if c is not None])) data = { 'actor_id': actor_id, 'params': params, 'event': event.name, 'channels': channels, 'created_at': datetime.utcnow(), } index = notifications_index() id_ = hash_data((actor_id, event.name, channels, params)) return index_safe(index, id_, data, refresh=refresh_sync(sync))
def save(cls, activity, session_id, role_id, timestamp, data, keys): keys = [data.get(k) for k in keys] key = hash_data([activity, session_id, keys]) obj = cls.all().filter_by(id=key).first() if obj is None: obj = cls() obj.id = key obj.activity = activity obj.session_id = session_id obj.created_at = timestamp obj.data = data obj.count = 0 obj.count += 1 obj.role_id = role_id obj.updated_at = timestamp db.session.add(obj)
def index_notification(event, actor_id, params, channels, sync=False): """Index a notification.""" params = params or {} data = {} for param, value in params.items(): value = get_entity_id(value) if value is not None: data[param] = str(value) channels = list(set([c for c in channels if c is not None])) data = { "actor_id": actor_id, "params": data, "event": event.name, "channels": channels, "created_at": datetime.utcnow(), } index = notifications_index() id_ = hash_data((actor_id, event.name, channels, params)) return index_safe(index, id_, data, sync=sync)
def enable_cache(vary_user=True, vary=None, server_side=False): """Enable caching in the context of a view. If desired, instructions on the cache parameters can be included, such as if the data is fit for public caches (default: no, vary_user) and what values to include in the generation of an etag. """ args = sorted(set(request.args.items())) # jquery where is your god now?!? args = filter(lambda (k, v): k != '_', args) cache_parts = [args, vary] if vary_user: cache_parts.extend((request.authz.roles)) request._http_private = True request._http_cache = settings.CACHE request._http_etag = hash_data(cache_parts) if request.if_none_match == request._http_etag: raise NotModified()
def _index_form(collection, matches): now = datetime.utcnow().isoformat() for (score, entity, match_collection_id, match) in matches: xref_id = hash_data((entity.id, collection.id, match.id)) text = ensure_list(entity.get_type_values(registry.name)) text.extend(match.get_type_values(registry.name)) yield { "_id": xref_id, "_index": xref_index(), "_source": { "score": score, "entity_id": entity.id, "collection_id": collection.id, "match_id": match.id, "match_collection_id": match_collection_id, "countries": match.get_type_values(registry.country), "schema": match.schema.name, "text": text, "created_at": now, }, }
def enable_cache(vary_user=True, vary=None): """Enable caching in the context of a view. If desired, instructions on the cache parameters can be included, such as if the data is fit for public caches (default: no, vary_user) and what values to include in the generation of an etag. """ if not settings.CACHE: return request._http_cache = True request._http_revalidate = vary is not None args = sorted(set(request.args.items())) cache_parts = [args, vary, request._app_locale] if vary_user and request.authz.logged_in: cache_parts.extend((request.authz.roles)) request._http_private = True request._http_etag = hash_data(cache_parts) if request._http_etag in request.if_none_match: raise NotModified()
def _index_form(collection, matches): now = datetime.utcnow().isoformat() for (score, entity, match_collection_id, match) in matches: xref_id = hash_data((entity.id, collection.id, match.id)) text = set([entity.caption, match.caption]) text.update(entity.get_type_values(registry.name)[:MAX_NAMES]) text.update(match.get_type_values(registry.name)[:MAX_NAMES]) countries = set(entity.get_type_values(registry.country)) countries.update(match.get_type_values(registry.country)) yield { "_id": xref_id, "_index": xref_index(), "_source": { "score": score, "entity_id": entity.id, "schema": match.schema.name, "collection_id": collection.id, "match_id": match.id, "match_collection_id": match_collection_id, "countries": list(countries), "text": list(text), "created_at": now, }, }
def cache_key(self): """Generate a key for the current result.""" if not self.cache: return uuid4().hex return hash_data(self.to_dict())
def cache_key(self): """Generate a key for the current result.""" results = [result_key(r) for r in self.results] return hash_data((self.parser.cache_key, self.total, results))
def cache_key(self): """Generate a key for the current result.""" if not self.cache: return uuid4().hex return hash_data((self.args.items(), self.limit, self.offset))