def delete_index(namespace_id, namespace_public_id): """ Delete a namespace index. """ search_engine = NamespaceSearchEngine(namespace_public_id) search_engine.delete_index() log.info('Deleted namespace index', namespace_id=namespace_id, namespace_public_id=namespace_public_id)
def delete_index(namespace_id, namespace_public_id): """ Delete a namespace index. USE WITH CAUTION. """ search_engine = NamespaceSearchEngine(namespace_public_id) # TODO[k]: Error handling search_engine.delete_index() log.info('Deleted namespace index', namespace_id=namespace_id, namespace_public_id=namespace_public_id)
def message_search_api(): g.parser.add_argument('q', type=bounded_str, location='args') args = strict_parse_args(g.parser, request.args) if request.method == 'GET': if not args['q']: err_string = ('GET HTTP method must include query' ' url parameter') g.log.error(err_string) return err(400, err_string) search_client = get_search_client(g.namespace.account) results = search_client.search_messages(g.db_session, args['q']) else: data = request.get_json(force=True) query = data.get('query') validate_search_query(query) sort = data.get('sort') validate_search_sort(sort) try: search_engine = NamespaceSearchEngine(g.namespace_public_id) results = search_engine.messages.search(query=query, sort=sort, max_results=args.limit, offset=args.offset) except SearchEngineError as e: g.log.error('Search error: {0}'.format(e)) return err(501, 'Search error') return g.encoder.jsonify(results)
def index(self, objects): """ Translate database operations to Elasticsearch index operations and perform them. """ namespace_map = defaultdict(lambda: defaultdict(list)) for obj in objects: namespace_id = obj['namespace_id'] type_ = obj['object'] operation = obj['operation'] api_repr = obj['attributes'] namespace_map[namespace_id][type_].append((operation, api_repr)) self.log.info('namespaces to index count', count=len(namespace_map)) for namespace_id in namespace_map: engine = NamespaceSearchEngine(namespace_id) messages = namespace_map[namespace_id]['message'] message_count = engine.messages.bulk_index(messages) if messages \ else 0 threads = namespace_map[namespace_id]['thread'] thread_count = engine.threads.bulk_index(threads) if threads \ else 0 self.log.info('per-namespace index counts', namespace_id=namespace_id, message_count=message_count, thread_count=thread_count)
def test_index_creation(db, default_namespace): namespace_id = default_namespace.id namespace_public_id = default_namespace.public_id # Test number of indices message_indices = index_messages(namespace_id, namespace_public_id) message_count = db.session.query(Message).filter( Message.namespace_id == namespace_id).count() thread_indices = index_threads(namespace_id, namespace_public_id) thread_count = db.session.query(Thread).filter( Thread.namespace_id == namespace_id).count() assert message_indices == message_count and thread_indices == thread_count # Test index mappings search_engine = NamespaceSearchEngine(default_namespace.public_id, create_index=False) thread_mapping = search_engine.threads.get_mapping() assert thread_mapping[namespace_public_id]['mappings']['thread']['properties'] == \ THREAD_MAPPING['properties'] message_mapping = search_engine.messages.get_mapping() assert all(item in message_mapping[namespace_public_id]['mappings'] ['message']['properties'] for item in MESSAGE_MAPPING['properties'])
def index_messages(namespace, updated_since=None): """ Index the messages of a namespace. """ namespace_id, namespace_public_id = namespace if updated_since is not None: updated_since = dateutil.parser.parse(updated_since) indexed_count = 0 search_engine = NamespaceSearchEngine(namespace_public_id) with session_scope() as db_session: query = db_session.query(Message).filter( Message.namespace_id == namespace_id) if updated_since is not None: query = query.filter(Message.updated_at > updated_since) query = query.options( joinedload(Message.parts).load_only('content_disposition')) encoded = [] for obj in safer_yield_per(query, Message.id, 0, CHUNK_SIZE): encoded_obj = encode(obj, namespace_public_id=namespace_public_id) encoded.append(('index', encoded_obj)) indexed_count += search_engine.messages.bulk_index(encoded) log.info('Indexed messages', namespace_id=namespace_id, namespace_public_id=namespace_public_id, message_count=indexed_count) return indexed_count
def search_engine(db, default_namespace): index_namespaces([default_namespace.id]) engine = NamespaceSearchEngine(default_namespace.public_id) engine.refresh_index() yield engine engine.delete_index()
def message_search_api(): args = strict_parse_args(g.parser, request.args) data = request.get_json(force=True) query = data.get('query') validate_search_query(query) try: search_engine = NamespaceSearchEngine(g.namespace_public_id) results = search_engine.messages.search(query=query, max_results=args.limit, offset=args.offset) except SearchEngineError as e: g.log.error('Search error: {0}'.format(e)) return err(501, 'Search error') return g.encoder.jsonify(results)
def index_threads(namespace_id, namespace_public_id, created_before=None): """ Index the threads of a namespace. """ if created_before is not None: created_before = dateutil.parser.parse(created_before) indexed_count = 0 search_engine = NamespaceSearchEngine(namespace_public_id, create_index=True) with session_scope() as db_session: query = db_session.query(Thread).filter( Thread.namespace_id == namespace_id) if created_before is not None: query = query.filter(Thread.created_at <= created_before) query = query.options( subqueryload(Thread.messages).load_only('public_id', 'is_draft', 'from_addr', 'to_addr', 'cc_addr', 'bcc_addr'), subqueryload('tagitems').joinedload('tag').load_only( 'public_id', 'name')) encoded = [] for obj in safer_yield_per(query, Thread.id, 0, CHUNK_SIZE): if len(encoded) >= INDEX_CHUNK_SIZE: indexed_count += search_engine.threads.bulk_index(encoded) encoded = [] index_obj = encode(obj, namespace_public_id=namespace_public_id) encoded.append(('index', index_obj)) if encoded: indexed_count += search_engine.threads.bulk_index(encoded) log.info('Indexed threads', namespace_id=namespace_id, namespace_public_id=namespace_public_id, thread_count=indexed_count) return indexed_count
def index(self, transactions, db_session): """ Translate database operations to Elasticsearch index operations and perform them. """ namespace_map = defaultdict(lambda: defaultdict(list)) for trx in transactions: namespace_id = trx.namespace.public_id type_ = trx.object_type if trx.command == 'delete': operation = 'delete' api_repr = {'id': trx.object_public_id} else: operation = 'index' object_cls = transaction_objects()[trx.object_type] obj = db_session.query(object_cls).get(trx.record_id) if obj is None: continue api_repr = encode(obj, namespace_public_id=namespace_id) namespace_map[namespace_id][type_].append((operation, api_repr)) self.log.info('namespaces to index count', count=len(namespace_map)) for namespace_id in namespace_map: engine = NamespaceSearchEngine(namespace_id, create_index=True) messages = namespace_map[namespace_id]['message'] message_count = engine.messages.bulk_index(messages) if messages \ else 0 threads = namespace_map[namespace_id]['thread'] thread_count = engine.threads.bulk_index(threads) if threads \ else 0 self.log.info('per-namespace index counts', namespace_id=namespace_id, message_count=message_count, thread_count=thread_count)
def index_messages(namespace_id, namespace_public_id, created_before=None): """ Index the messages of a namespace. """ if created_before is not None: created_before = dateutil.parser.parse(created_before) indexed_count = 0 search_engine = NamespaceSearchEngine(namespace_public_id, create_index=True) with session_scope() as db_session: query = db_session.query(Message).filter( Message.namespace_id == namespace_id) if created_before is not None: query = query.filter(Message.created_at <= created_before) query = query.options( joinedload(Message.parts).load_only('content_disposition')) encoded = [] for obj in safer_yield_per(query, Message.id, 0, CHUNK_SIZE): if len(encoded) >= INDEX_CHUNK_SIZE: indexed_count += search_engine.messages.bulk_index(encoded) encoded = [] index_obj = encode(obj, namespace_public_id=namespace_public_id) encoded.append(('index', index_obj)) if encoded: indexed_count += search_engine.messages.bulk_index(encoded) log.info('Indexed messages', namespace_id=namespace_id, namespace_public_id=namespace_public_id, message_count=indexed_count) return indexed_count