import bson.json_util from elasticsearch import Elasticsearch, exceptions as es_exceptions from elasticsearch.helpers import scan, streaming_bulk from mongo_connector import errors from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL, DEFAULT_MAX_BULK) from mongo_connector.util import retry_until_ok from mongo_connector.doc_managers import DocManagerBase, exception_wrapper from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter wrap_exceptions = exception_wrapper({ es_exceptions.ConnectionError: errors.ConnectionFailed, es_exceptions.TransportError: errors.OperationFailed, es_exceptions.RequestError: errors.OperationFailed }) class DocManager(DocManagerBase): """Elasticsearch implementation of the DocManager interface. Receives documents from an OplogThread and takes the appropriate actions on Elasticsearch. """ def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL, unique_key='_id', chunk_size=DEFAULT_MAX_BULK, meta_index_name="mongodb_meta", meta_type="mongodb_meta", **kwargs):
""" import re import json from pysolr import Solr, SolrError from mongo_connector import errors from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL, DEFAULT_MAX_BULK) from mongo_connector.util import retry_until_ok from mongo_connector.doc_managers import DocManagerBase, exception_wrapper from mongo_connector.doc_managers.formatters import DocumentFlattener # pysolr only has 1 exception: SolrError wrap_exceptions = exception_wrapper({ SolrError: errors.OperationFailed}) ADMIN_URL = 'admin/luke?show=schema&wt=json' decoder = json.JSONDecoder() class DocManager(DocManagerBase): """The DocManager class creates a connection to the backend engine and adds/removes documents, and in the case of rollback, searches for them. The reason for storing id/doc pairs as opposed to doc's is so that multiple updates to the same doc reflect the most up to date version as opposed to multiple, slightly different versions of a doc. """
from elasticsearch import Elasticsearch, exceptions as es_exceptions from elasticsearch.helpers import scan, streaming_bulk from mongo_connector import errors from mongo_connector.compat import u from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL, DEFAULT_MAX_BULK) from mongo_connector.util import retry_until_ok from mongo_connector.doc_managers import DocManagerBase, exception_wrapper from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter wrap_exceptions = exception_wrapper({ es_exceptions.ConnectionError: errors.ConnectionFailed, es_exceptions.TransportError: errors.OperationFailed, es_exceptions.RequestError: errors.OperationFailed }) class DocManager(DocManagerBase): """Elasticsearch implementation of the DocManager interface. Receives documents from an OplogThread and takes the appropriate actions on Elasticsearch. """ def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL, unique_key='_id',
This file is a document manager for MongoDB, but the intent is that this file can be used as an example to add on different backends. To extend this to other systems, simply implement the exact same class and replace the method definitions with API calls for the desired backend. """ import logging import pymongo from mongo_connector import errors from mongo_connector.doc_managers import DocManagerBase, exception_wrapper wrap_exceptions = exception_wrapper({ pymongo.errors.ConnectionFailure: errors.ConnectionFailed, pymongo.errors.OperationFailure: errors.OperationFailed}) class DocManager(DocManagerBase): """The DocManager class creates a connection to the backend engine and adds/removes documents, and in the case of rollback, searches for them. The reason for storing id/doc pairs as opposed to doc's is so that multiple updates to the same doc reflect the most up to date version as opposed to multiple, slightly different versions of a doc. We are using MongoDB native fields for _id and ns, but we also store them as fields in the document, due to compatibility issues. """