replace the method definitions with API calls for the desired backend. """ import logging import pymongo from bson import SON from gridfs import GridFS from mongo_connector import errors, constants from mongo_connector.util import exception_wrapper from mongo_connector.doc_managers.doc_manager_base import DocManagerBase wrap_exceptions = exception_wrapper({ pymongo.errors.ConnectionFailure: errors.ConnectionFailed, pymongo.errors.OperationFailure: errors.OperationFailed, }) LOG = logging.getLogger(__name__) __version__ = constants.__version__ """MongoDB DocManager version information This is packaged with mongo-connector so it shares the same version. Downstream DocManager implementations should add their package __version__ string here, for example: __version__ = '0.1.0' """
import gridfs from mongo_connector import errors, util wrap_exceptions = util.exception_wrapper({ gridfs.errors.CorruptGridFile: errors.OperationFailed }) class GridFSFile(object): @wrap_exceptions def __init__(self, collection, doc): self._id = doc['_id'] self.f = gridfs.GridOut(collection, file_document=doc) self.filename = self.f.filename self.length = self.f.length self.upload_date = self.f.upload_date self.md5 = self.f.md5 self.parentId = self.f.parentId if getattr(self.f,'parentId') else None def get_metadata(self): result = { '_id': self._id, 'upload_date': self.upload_date, 'md5': self.md5, } if self.filename is not None: result['filename'] = self.filename if self.parentId is not None: result['parentId'] = self.parentId return result
This file is a document manager for MongoDB, but the intent is that this file can be used as an example to add on different backends. To extend this to other systems, simply implement the exact same class and replace the method definitions with API calls for the desired backend. """ import logging import pymongo from gridfs import GridFS from mongo_connector import errors, constants from mongo_connector.util import exception_wrapper from mongo_connector.doc_managers.doc_manager_base import DocManagerBase wrap_exceptions = exception_wrapper({ pymongo.errors.ConnectionFailure: errors.ConnectionFailed, pymongo.errors.OperationFailure: errors.OperationFailed}) LOG = logging.getLogger(__name__) class DocManager(DocManagerBase): """The DocManager class creates a connection to the backend engine and adds/removes documents, and in the case of rollback, searches for them. The reason for storing id/doc pairs as opposed to doc's is so that multiple updates to the same doc reflect the most up to date version as opposed to multiple, slightly different versions of a doc. We are using MongoDB native fields for _id and ns, but we also store them as fields in the document, due to compatibility issues.
from pysolr import Solr, SolrError from mongo_connector import errors from mongo_connector.compat import u from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL, DEFAULT_MAX_BULK) from mongo_connector.compat import (Request, urlopen, urlencode, URLError, HTTPError) from mongo_connector.util import exception_wrapper, retry_until_ok from mongo_connector.doc_managers.doc_manager_base import DocManagerBase from mongo_connector.doc_managers.formatters import DocumentFlattener wrap_exceptions = exception_wrapper({ SolrError: errors.OperationFailed, URLError: errors.ConnectionFailed, HTTPError: errors.ConnectionFailed }) ADMIN_URL = 'admin/luke?show=schema&wt=json' decoder = json.JSONDecoder() class DocManager(DocManagerBase): """The DocManager class creates a connection to the backend engine and adds/removes documents, and in the case of rollback, searches for them. The reason for storing id/doc pairs as opposed to doc's is so that multiple updates to the same doc reflect the most up to date version as opposed to multiple, slightly different versions of a doc.
import bson.json_util from elasticsearch import Elasticsearch, exceptions as es_exceptions from elasticsearch.helpers import scan, streaming_bulk from mongo_connector import errors from mongo_connector.compat import u from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL, DEFAULT_MAX_BULK) from mongo_connector.util import exception_wrapper, retry_until_ok from mongo_connector.doc_managers.doc_manager_base import DocManagerBase from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter wrap_exceptions = exception_wrapper({ es_exceptions.ConnectionError: errors.ConnectionFailed, es_exceptions.TransportError: errors.OperationFailed, es_exceptions.NotFoundError: errors.OperationFailed, es_exceptions.RequestError: errors.OperationFailed}) LOG = logging.getLogger(__name__) class DocManager(DocManagerBase): """Elasticsearch implementation of the DocManager interface. Receives documents from an OplogThread and takes the appropriate actions on Elasticsearch. """ def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL, unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
from elasticsearch import Elasticsearch, exceptions as es_exceptions from elasticsearch.helpers import scan, streaming_bulk from mongo_connector import errors from mongo_connector.compat import u from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL, DEFAULT_MAX_BULK) from mongo_connector.util import exception_wrapper, retry_until_ok from mongo_connector.doc_managers.doc_manager_base import DocManagerBase from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter wrap_exceptions = exception_wrapper({ es_exceptions.ConnectionError: errors.ConnectionFailed, es_exceptions.TransportError: errors.OperationFailed, es_exceptions.NotFoundError: errors.OperationFailed, es_exceptions.RequestError: errors.OperationFailed }) LOG = logging.getLogger(__name__) class DocManager(DocManagerBase): """Elasticsearch implementation of the DocManager interface. Receives documents from an OplogThread and takes the appropriate actions on Elasticsearch. """ def __init__(self,
from mongo_connector.doc_managers.nodes_and_relationships_builder import NodesAndRelationshipsBuilder from mongo_connector.doc_managers.nodes_and_relationships_updater import NodesAndRelationshipsUpdater from mongo_connector.doc_managers.error_handler import ErrorHandler from py2neo import Graph, authenticate from mongo_connector import errors from mongo_connector.compat import u from mongo_connector.constants import DEFAULT_COMMIT_INTERVAL, DEFAULT_MAX_BULK from mongo_connector.util import exception_wrapper, retry_until_ok from mongo_connector.doc_managers.doc_manager_base import DocManagerBase from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter errors_handler = ErrorHandler() wrap_exceptions = exception_wrapper(errors_handler.error_hash) LOG = logging.getLogger(__name__) class DocManager(DocManagerBase): """ Neo4j implementation for the DocManager. Receives documents and communicates with Neo4j Server. """ def __init__( self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL, unique_key="_id", chunk_size=DEFAULT_MAX_BULK, **kwargs ): self.graph = Graph(url)
import bson.json_util from bson.json_util import dumps from mongo_connector.doc_managers.error_handler import ErrorHandler from py2neo import Graph, authenticate from mongo_connector import errors from mongo_connector.compat import u from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL, DEFAULT_MAX_BULK) from mongo_connector.util import exception_wrapper, retry_until_ok from mongo_connector.doc_managers.doc_manager_base import DocManagerBase errors_handler = ErrorHandler() wrap_exceptions = exception_wrapper(errors_handler.error_hash) LOG = logging.getLogger(__name__) class DocManager(DocManagerBase): """ Neo4j implementation for the DocManager. Receives documents and communicates with Neo4j Server using doc2graph cypher api. """ def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL, unique_key='_id', chunk_size=DEFAULT_MAX_BULK, **kwargs):
from pysolr import Solr, SolrError from mongo_connector import errors from mongo_connector.compat import u from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL, DEFAULT_MAX_BULK) from mongo_connector.compat import ( Request, urlopen, urlencode, URLError, HTTPError) from mongo_connector.util import exception_wrapper, retry_until_ok from mongo_connector.doc_managers.doc_manager_base import DocManagerBase from mongo_connector.doc_managers.formatters import DocumentFlattener wrap_exceptions = exception_wrapper({ SolrError: errors.OperationFailed, URLError: errors.ConnectionFailed, HTTPError: errors.ConnectionFailed }) ADMIN_URL = 'admin/luke?show=schema&wt=json' # From the documentation of Solr 4.0 "classic" query parser. ESCAPE_CHARACTERS = set('+-&|!(){}[]^"~*?:\\/') decoder = json.JSONDecoder() class DocManager(DocManagerBase): """The DocManager class creates a connection to the backend engine and adds/removes documents, and in the case of rollback, searches for them. The reason for storing id/doc pairs as opposed to doc's is so that multiple
Company: Innoplexus, Pune """ import os import logging import arango from arango import ArangoClient from arango.exceptions import DocumentInsertError, DatabaseCreateError, ServerConnectionError from mongo_connector import errors, constants from mongo_connector.util import exception_wrapper from mongo_connector.doc_managers.doc_manager_base import DocManagerBase from mongo_connector.command_helper import CommandHelper wrap_exceptions = exception_wrapper({ DatabaseCreateError: errors.OperationFailed, DocumentInsertError: errors.OperationFailed }) LOG = logging.getLogger(__name__) __version__ = constants.__version__ __version__ = '0.1.4' """ArangoDB 3.X DocManager version.""" class DocManager(DocManagerBase): """ArangoDB implementation of the DocManager interface. Receives documents from an OplogThread and takes the appropriate actions on ArangoDB.
from mongo_connector.util import exception_wrapper, retry_until_ok from mongo_connector.doc_managers.doc_manager_base import DocManagerBase from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter _HAS_AWS = True try: from boto3 import session from requests_aws_sign import AWSV4Sign except ImportError: _HAS_AWS = False wrap_exceptions = exception_wrapper( { BulkIndexError: errors.OperationFailed, es_exceptions.ConnectionError: errors.ConnectionFailed, es_exceptions.ConnectionTimeout: errors.ConnectionFailed, es_exceptions.TransportError: errors.OperationFailed, es_exceptions.NotFoundError: errors.OperationFailed, es_exceptions.RequestError: errors.OperationFailed, } ) LOG = logging.getLogger(__name__) DEFAULT_SEND_INTERVAL = 5 """The default interval in seconds to send buffered operations.""" DEFAULT_AWS_REGION = "us-east-1" __version__ = importlib_metadata.version("elastic2_doc_manager")
import gridfs from mongo_connector import errors, util wrap_exceptions = util.exception_wrapper( {gridfs.errors.CorruptGridFile: errors.OperationFailed}) class GridFSFile(object): @wrap_exceptions def __init__(self, collection, doc): self._id = doc['_id'] self.f = gridfs.GridOut(collection, file_document=doc) self.filename = self.f.filename self.length = self.f.length self.upload_date = self.f.upload_date self.md5 = self.f.md5 def get_metadata(self): result = { '_id': self._id, 'upload_date': self.upload_date, 'md5': self.md5, } if self.filename is not None: result['filename'] = self.filename return result def __len__(self): return self.length @wrap_exceptions