replace the method definitions with API calls for the desired backend.
    """

import logging
import pymongo

from bson import SON
from gridfs import GridFS

from mongo_connector import errors, constants
from mongo_connector.util import exception_wrapper
from mongo_connector.doc_managers.doc_manager_base import DocManagerBase

wrap_exceptions = exception_wrapper({
    pymongo.errors.ConnectionFailure:
    errors.ConnectionFailed,
    pymongo.errors.OperationFailure:
    errors.OperationFailed,
})

LOG = logging.getLogger(__name__)

__version__ = constants.__version__
"""MongoDB DocManager version information

This is packaged with mongo-connector so it shares the same version.
Downstream DocManager implementations should add their package __version__
string here, for example:

__version__ = '0.1.0'
"""
import gridfs

from mongo_connector import errors, util

wrap_exceptions = util.exception_wrapper({
    gridfs.errors.CorruptGridFile: errors.OperationFailed
})


class GridFSFile(object):
    @wrap_exceptions
    def __init__(self, collection, doc):
        self._id = doc['_id']
        self.f = gridfs.GridOut(collection, file_document=doc)
        self.filename = self.f.filename
        self.length = self.f.length
        self.upload_date = self.f.upload_date
        self.md5 = self.f.md5
        self.parentId = self.f.parentId if getattr(self.f,'parentId') else None

    def get_metadata(self):
        result = {
            '_id': self._id,
            'upload_date': self.upload_date,
            'md5': self.md5,
        }
        if self.filename is not None:
            result['filename'] = self.filename
        if self.parentId is not None:
            result['parentId'] = self.parentId
        return result
    This file is a document manager for MongoDB, but the intent
    is that this file can be used as an example to add on different backends.
    To extend this to other systems, simply implement the exact same class and
    replace the method definitions with API calls for the desired backend.
    """

import logging
import pymongo

from gridfs import GridFS
from mongo_connector import errors, constants
from mongo_connector.util import exception_wrapper
from mongo_connector.doc_managers.doc_manager_base import DocManagerBase

wrap_exceptions = exception_wrapper({
    pymongo.errors.ConnectionFailure: errors.ConnectionFailed,
    pymongo.errors.OperationFailure: errors.OperationFailed})

LOG = logging.getLogger(__name__)


class DocManager(DocManagerBase):
    """The DocManager class creates a connection to the backend engine and
        adds/removes documents, and in the case of rollback, searches for them.

        The reason for storing id/doc pairs as opposed to doc's is so that
        multiple updates to the same doc reflect the most up to date version as
        opposed to multiple, slightly different versions of a doc.

        We are using MongoDB native fields for _id and ns, but we also store
        them as fields in the document, due to compatibility issues.
from pysolr import Solr, SolrError

from mongo_connector import errors
from mongo_connector.compat import u
from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL,
                                       DEFAULT_MAX_BULK)
from mongo_connector.compat import (Request, urlopen, urlencode, URLError,
                                    HTTPError)
from mongo_connector.util import exception_wrapper, retry_until_ok
from mongo_connector.doc_managers.doc_manager_base import DocManagerBase
from mongo_connector.doc_managers.formatters import DocumentFlattener

wrap_exceptions = exception_wrapper({
    SolrError: errors.OperationFailed,
    URLError: errors.ConnectionFailed,
    HTTPError: errors.ConnectionFailed
})

ADMIN_URL = 'admin/luke?show=schema&wt=json'

decoder = json.JSONDecoder()


class DocManager(DocManagerBase):
    """The DocManager class creates a connection to the backend engine and
    adds/removes documents, and in the case of rollback, searches for them.

    The reason for storing id/doc pairs as opposed to doc's is so that multiple
    updates to the same doc reflect the most up to date version as opposed to
    multiple, slightly different versions of a doc.
import bson.json_util

from elasticsearch import Elasticsearch, exceptions as es_exceptions
from elasticsearch.helpers import scan, streaming_bulk

from mongo_connector import errors
from mongo_connector.compat import u
from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL,
                                       DEFAULT_MAX_BULK)
from mongo_connector.util import exception_wrapper, retry_until_ok
from mongo_connector.doc_managers.doc_manager_base import DocManagerBase
from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter

wrap_exceptions = exception_wrapper({
    es_exceptions.ConnectionError: errors.ConnectionFailed,
    es_exceptions.TransportError: errors.OperationFailed,
    es_exceptions.NotFoundError: errors.OperationFailed,
    es_exceptions.RequestError: errors.OperationFailed})

LOG = logging.getLogger(__name__)


class DocManager(DocManagerBase):
    """Elasticsearch implementation of the DocManager interface.

    Receives documents from an OplogThread and takes the appropriate actions on
    Elasticsearch.
    """

    def __init__(self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id', chunk_size=DEFAULT_MAX_BULK,
Exemple #6
0
from elasticsearch import Elasticsearch, exceptions as es_exceptions
from elasticsearch.helpers import scan, streaming_bulk

from mongo_connector import errors
from mongo_connector.compat import u
from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL,
                                       DEFAULT_MAX_BULK)
from mongo_connector.util import exception_wrapper, retry_until_ok
from mongo_connector.doc_managers.doc_manager_base import DocManagerBase
from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter

wrap_exceptions = exception_wrapper({
    es_exceptions.ConnectionError:
    errors.ConnectionFailed,
    es_exceptions.TransportError:
    errors.OperationFailed,
    es_exceptions.NotFoundError:
    errors.OperationFailed,
    es_exceptions.RequestError:
    errors.OperationFailed
})

LOG = logging.getLogger(__name__)


class DocManager(DocManagerBase):
    """Elasticsearch implementation of the DocManager interface.

    Receives documents from an OplogThread and takes the appropriate actions on
    Elasticsearch.
    """
    def __init__(self,
from mongo_connector.doc_managers.nodes_and_relationships_builder import NodesAndRelationshipsBuilder
from mongo_connector.doc_managers.nodes_and_relationships_updater import NodesAndRelationshipsUpdater
from mongo_connector.doc_managers.error_handler import ErrorHandler

from py2neo import Graph, authenticate


from mongo_connector import errors
from mongo_connector.compat import u
from mongo_connector.constants import DEFAULT_COMMIT_INTERVAL, DEFAULT_MAX_BULK
from mongo_connector.util import exception_wrapper, retry_until_ok
from mongo_connector.doc_managers.doc_manager_base import DocManagerBase
from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter

errors_handler = ErrorHandler()
wrap_exceptions = exception_wrapper(errors_handler.error_hash)

LOG = logging.getLogger(__name__)


class DocManager(DocManagerBase):
    """
  Neo4j implementation for the DocManager. Receives documents and 
  communicates with Neo4j Server.
  """

    def __init__(
        self, url, auto_commit_interval=DEFAULT_COMMIT_INTERVAL, unique_key="_id", chunk_size=DEFAULT_MAX_BULK, **kwargs
    ):

        self.graph = Graph(url)
import bson.json_util
from bson.json_util import dumps

from mongo_connector.doc_managers.error_handler import ErrorHandler

from py2neo import Graph, authenticate

from mongo_connector import errors
from mongo_connector.compat import u
from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL,
                                       DEFAULT_MAX_BULK)
from mongo_connector.util import exception_wrapper, retry_until_ok
from mongo_connector.doc_managers.doc_manager_base import DocManagerBase

errors_handler = ErrorHandler()
wrap_exceptions = exception_wrapper(errors_handler.error_hash)

LOG = logging.getLogger(__name__)


class DocManager(DocManagerBase):
    """
  Neo4j implementation for the DocManager. Receives documents and 
  communicates with Neo4j Server using doc2graph cypher api.
  """
    def __init__(self,
                 url,
                 auto_commit_interval=DEFAULT_COMMIT_INTERVAL,
                 unique_key='_id',
                 chunk_size=DEFAULT_MAX_BULK,
                 **kwargs):
from pysolr import Solr, SolrError

from mongo_connector import errors
from mongo_connector.compat import u
from mongo_connector.constants import (DEFAULT_COMMIT_INTERVAL,
                                       DEFAULT_MAX_BULK)
from mongo_connector.compat import (
    Request, urlopen, urlencode, URLError, HTTPError)
from mongo_connector.util import exception_wrapper, retry_until_ok
from mongo_connector.doc_managers.doc_manager_base import DocManagerBase
from mongo_connector.doc_managers.formatters import DocumentFlattener

wrap_exceptions = exception_wrapper({
    SolrError: errors.OperationFailed,
    URLError: errors.ConnectionFailed,
    HTTPError: errors.ConnectionFailed
})

ADMIN_URL = 'admin/luke?show=schema&wt=json'
# From the documentation of Solr 4.0 "classic" query parser.
ESCAPE_CHARACTERS = set('+-&|!(){}[]^"~*?:\\/')

decoder = json.JSONDecoder()


class DocManager(DocManagerBase):
    """The DocManager class creates a connection to the backend engine and
    adds/removes documents, and in the case of rollback, searches for them.

    The reason for storing id/doc pairs as opposed to doc's is so that multiple
Exemple #10
0
Company: Innoplexus, Pune
"""

import os
import logging
import arango
from arango import ArangoClient
from arango.exceptions import DocumentInsertError, DatabaseCreateError, ServerConnectionError
from mongo_connector import errors, constants
from mongo_connector.util import exception_wrapper
from mongo_connector.doc_managers.doc_manager_base import DocManagerBase
from mongo_connector.command_helper import CommandHelper

wrap_exceptions = exception_wrapper({
    DatabaseCreateError: errors.OperationFailed,
    DocumentInsertError: errors.OperationFailed
})

LOG = logging.getLogger(__name__)

__version__ = constants.__version__

__version__ = '0.1.4'
"""ArangoDB 3.X DocManager version."""


class DocManager(DocManagerBase):
    """ArangoDB implementation of the DocManager interface.

    Receives documents from an OplogThread and takes the appropriate actions on
    ArangoDB.
Exemple #11
0
from mongo_connector.util import exception_wrapper, retry_until_ok
from mongo_connector.doc_managers.doc_manager_base import DocManagerBase
from mongo_connector.doc_managers.formatters import DefaultDocumentFormatter

_HAS_AWS = True
try:
    from boto3 import session
    from requests_aws_sign import AWSV4Sign
except ImportError:
    _HAS_AWS = False

wrap_exceptions = exception_wrapper(
    {
        BulkIndexError: errors.OperationFailed,
        es_exceptions.ConnectionError: errors.ConnectionFailed,
        es_exceptions.ConnectionTimeout: errors.ConnectionFailed,
        es_exceptions.TransportError: errors.OperationFailed,
        es_exceptions.NotFoundError: errors.OperationFailed,
        es_exceptions.RequestError: errors.OperationFailed,
    }
)

LOG = logging.getLogger(__name__)

DEFAULT_SEND_INTERVAL = 5
"""The default interval in seconds to send buffered operations."""

DEFAULT_AWS_REGION = "us-east-1"

__version__ = importlib_metadata.version("elastic2_doc_manager")

import gridfs
from mongo_connector import errors, util

wrap_exceptions = util.exception_wrapper(
    {gridfs.errors.CorruptGridFile: errors.OperationFailed})


class GridFSFile(object):
    @wrap_exceptions
    def __init__(self, collection, doc):
        self._id = doc['_id']
        self.f = gridfs.GridOut(collection, file_document=doc)
        self.filename = self.f.filename
        self.length = self.f.length
        self.upload_date = self.f.upload_date
        self.md5 = self.f.md5

    def get_metadata(self):
        result = {
            '_id': self._id,
            'upload_date': self.upload_date,
            'md5': self.md5,
        }
        if self.filename is not None:
            result['filename'] = self.filename
        return result

    def __len__(self):
        return self.length

    @wrap_exceptions