def parse_uri(uri, default_port=DEFAULT_PORT):
    """Parse and validate a MongoDB URI.

    Returns a dict of the form::

        {
            'nodelist': <list of (host, port) tuples>,
            'username': <username> or None,
            'password': <password> or None,
            'database': <database name> or None,
            'collection': <collection name> or None,
            'options': <dict of MongoDB URI options>
        }

    :Parameters:
        - `uri`: The MongoDB URI to parse.
        - `default_port`: The port number to use when one wasn't specified
                          for a host in the URI.
    """
    if not uri.startswith(SCHEME):
        raise InvalidURI("Invalid URI scheme: URI "
                         "must begin with '%s'" % (SCHEME, ))

    scheme_free = uri[SCHEME_LEN:]

    if not scheme_free:
        raise InvalidURI("Must provide at least one hostname or IP.")

    nodes = None
    user = None
    passwd = None
    dbase = None
    collection = None
    options = {}

    # Check for unix domain sockets in the uri
    if '.sock' in scheme_free:
        host_part, _, path_part = _rpartition(scheme_free, '/')
        try:
            parse_uri('%s%s' % (SCHEME, host_part))
        except (ConfigurationError, InvalidURI):
            host_part = scheme_free
            path_part = ""
    else:
        host_part, _, path_part = _partition(scheme_free, '/')

    if not path_part and '?' in host_part:
        raise InvalidURI("A '/' is required between "
                         "the host list and any options.")

    if '@' in host_part:
        userinfo, _, hosts = _rpartition(host_part, '@')
        user, passwd = parse_userinfo(userinfo)
    else:
        hosts = host_part

    nodes = split_hosts(hosts, default_port=default_port)

    if path_part:

        if path_part[0] == '?':
            opts = path_part[1:]
        else:
            dbase, _, opts = _partition(path_part, '?')
            if '.' in dbase:
                dbase, collection = dbase.split('.', 1)

        if opts:
            options = split_options(opts)

    return {
        'nodelist': nodes,
        'username': user,
        'password': passwd,
        'database': dbase,
        'collection': collection,
        'options': options
    }
    def __init__(self,
                 host=None,
                 port=None,
                 document_class=dict,
                 tz_aware=False,
                 connect=True,
                 **kwargs):
        """Client for a MongoDB instance, a replica set, or a set of mongoses.

        The client object is thread-safe and has connection-pooling built in.
        If an operation fails because of a network error,
        :class:`~pymongo.errors.ConnectionFailure` is raised and the client
        reconnects in the background. Application code should handle this
        exception (recognizing that the operation failed) and then continue to
        execute.

        The `host` parameter can be a full `mongodb URI
        <http://dochub.mongodb.org/core/connections>`_, in addition to
        a simple hostname. It can also be a list of hostnames or
        URIs. Any port specified in the host string(s) will override
        the `port` parameter. If multiple mongodb URIs containing
        database or auth information are passed, the last database,
        username, and password present will be used.  For username and
        passwords reserved characters like ':', '/', '+' and '@' must be
        escaped following RFC 2396.

        .. warning:: When using PyMongo in a multiprocessing context, please
          read :ref:`multiprocessing` first.

        :Parameters:
          - `host` (optional): hostname or IP address of the
            instance to connect to, or a mongodb URI, or a list of
            hostnames / mongodb URIs. If `host` is an IPv6 literal
            it must be enclosed in '[' and ']' characters following
            the RFC2732 URL syntax (e.g. '[::1]' for localhost)
          - `port` (optional): port number on which to connect
          - `document_class` (optional): default class to use for
            documents returned from queries on this client
          - `tz_aware` (optional): if ``True``,
            :class:`~datetime.datetime` instances returned as values
            in a document by this :class:`MongoClient` will be timezone
            aware (otherwise they will be naive)
          - `connect` (optional): if ``True`` (the default), immediately
            begin connecting to MongoDB in the background. Otherwise connect
            on the first operation.

          | **Other optional parameters can be passed as keyword arguments:**

          - `maxPoolSize` (optional): The maximum number of connections
            that the pool will open simultaneously. If this is set, operations
            will block if there are `maxPoolSize` outstanding connections
            from the pool. Defaults to 100. Cannot be 0.
          - `socketTimeoutMS`: (integer or None) Controls how long (in
            milliseconds) the driver will wait for a response after sending an
            ordinary (non-monitoring) database operation before concluding that
            a network error has occurred. Defaults to ``None`` (no timeout).
          - `connectTimeoutMS`: (integer or None) Controls how long (in
            milliseconds) the driver will wait during server monitoring when
            connecting a new socket to a server before concluding the server
            is unavailable. Defaults to ``20000`` (20 seconds).
          - `serverSelectionTimeoutMS`: (integer) Controls how long (in
            milliseconds) the driver will wait to find an available,
            appropriate server to carry out a database operation; while it is
            waiting, multiple server monitoring operations may be carried out,
            each controlled by `connectTimeoutMS`. Defaults to ``30000`` (30
            seconds).
          - `waitQueueTimeoutMS`: (integer or None) How long (in milliseconds)
            a thread will wait for a socket from the pool if the pool has no
            free sockets. Defaults to ``None`` (no timeout).
          - `waitQueueMultiple`: (integer or None) Multiplied by maxPoolSize
            to give the number of threads allowed to wait for a socket at one
            time. Defaults to ``None`` (no limit).
          - `socketKeepAlive`: (boolean) Whether to send periodic keep-alive
            packets on connected sockets. Defaults to ``False`` (do not send
            keep-alive packets).
          - `event_listeners`: a list or tuple of event listeners. See
            :mod:`~pymongo.monitoring` for details.

          | **Write Concern options:**
          | (Only set if passed. No default values.)

          - `w`: (integer or string) If this is a replica set, write operations
            will block until they have been replicated to the specified number
            or tagged set of servers. `w=<int>` always includes the replica set
            primary (e.g. w=3 means write to the primary and wait until
            replicated to **two** secondaries). Passing w=0 **disables write
            acknowledgement** and all other write concern options.
          - `wtimeout`: (integer) Used in conjunction with `w`. Specify a value
            in milliseconds to control how long to wait for write propagation
            to complete. If replication does not complete in the given
            timeframe, a timeout exception is raised.
          - `j`: If ``True`` block until write operations have been committed
            to the journal. Cannot be used in combination with `fsync`. Prior
            to MongoDB 2.6 this option was ignored if the server was running
            without journaling. Starting with MongoDB 2.6 write operations will
            fail with an exception if this option is used when the server is
            running without journaling.
          - `fsync`: If ``True`` and the server is running without journaling,
            blocks until the server has synced all data files to disk. If the
            server is running with journaling, this acts the same as the `j`
            option, blocking until write operations have been committed to the
            journal. Cannot be used in combination with `j`.

          | **Replica set keyword arguments for connecting with a replica set
            - either directly or via a mongos:**

          - `replicaSet`: (string or None) The name of the replica set to
            connect to. The driver will verify that all servers it connects to
            match this name. Implies that the hosts specified are a seed list
            and the driver should attempt to find all members of the set.
            Defaults to ``None``.
          - `read_preference`: The read preference for this client.
            See :class:`~pymongo.read_preferences.ReadPreference` for all
            available read preference options. Defaults to ``PRIMARY``.

          | **SSL configuration:**

          - `ssl`: If ``True``, create the connection to the server using SSL.
            Defaults to ``False``.
          - `ssl_certfile`: The certificate file used to identify the local
            connection against mongod. Implies ``ssl=True``. Defaults to
            ``None``.
          - `ssl_keyfile`: The private keyfile used to identify the local
            connection against mongod.  If included with the ``certfile`` then
            only the ``ssl_certfile`` is needed.  Implies ``ssl=True``.
            Defaults to ``None``.
          - `ssl_pem_passphrase`: The password or passphrase for decrypting
            the private key in ``ssl_certfile`` or ``ssl_keyfile``. Only
            necessary if the private key is encrypted. Only supported by python
            2.7.9+ (pypy 2.5.1+) and 3.3+. Defaults to ``None``.
          - `ssl_cert_reqs`: Specifies whether a certificate is required from
            the other side of the connection, and whether it will be validated
            if provided. It must be one of the three values ``ssl.CERT_NONE``
            (certificates ignored), ``ssl.CERT_OPTIONAL``
            (not required, but validated if provided), or ``ssl.CERT_REQUIRED``
            (required and validated). If the value of this parameter is not
            ``ssl.CERT_NONE`` and a value is not provided for ``ssl_ca_certs``
            PyMongo will attempt to load system provided CA certificates.
            If the python version in use does not support loading system CA
            certificates then the ``ssl_ca_certs`` parameter must point
            to a file of CA certificates. Implies ``ssl=True``. Defaults to
            ``ssl.CERT_REQUIRED`` if not provided and ``ssl=True``.
          - `ssl_ca_certs`: The ca_certs file contains a set of concatenated
            "certification authority" certificates, which are used to validate
            certificates passed from the other end of the connection.
            Implies ``ssl=True``. Defaults to ``None``.
          - `ssl_crlfile`: The path to a PEM or DER formatted certificate
            revocation list. Only supported by python 2.7.9+ (pypy 2.5.1+)
            and 3.4+. Defaults to ``None``.
          - `ssl_match_hostname`: If ``True`` (the default), and
            `ssl_cert_reqs` is not ``ssl.CERT_NONE``, enables hostname
            verification using the :func:`~ssl.match_hostname` function from
            python's :mod:`~ssl` module. Think very carefully before setting
            this to ``False`` as that could make your application vulnerable to
            man-in-the-middle attacks.

          | **Read Concern options:**
          | (If not set explicitly, this will use the server default)

          - `readConcernLevel`: (string) The read concern level specifies the
            level of isolation for read operations.  For example, a read
            operation using a read concern level of ``majority`` will only
            return data that has been written to a majority of nodes. If the
            level is left unspecified, the server default will be used.

        .. mongodoc:: connections

        .. versionchanged:: 3.0
           :class:`~pymongo.mongo_client.MongoClient` is now the one and only
           client class for a standalone server, mongos, or replica set.
           It includes the functionality that had been split into
           :class:`~pymongo.mongo_client.MongoReplicaSetClient`: it can connect
           to a replica set, discover all its members, and monitor the set for
           stepdowns, elections, and reconfigs.

           The :class:`~pymongo.mongo_client.MongoClient` constructor no
           longer blocks while connecting to the server or servers, and it no
           longer raises :class:`~pymongo.errors.ConnectionFailure` if they
           are unavailable, nor :class:`~pymongo.errors.ConfigurationError`
           if the user's credentials are wrong. Instead, the constructor
           returns immediately and launches the connection process on
           background threads.

           Therefore the ``alive`` method is removed since it no longer
           provides meaningful information; even if the client is disconnected,
           it may discover a server in time to fulfill the next operation.

           In PyMongo 2.x, :class:`~pymongo.MongoClient` accepted a list of
           standalone MongoDB servers and used the first it could connect to::

               MongoClient(['host1.com:27017', 'host2.com:27017'])

           A list of multiple standalones is no longer supported; if multiple
           servers are listed they must be members of the same replica set, or
           mongoses in the same sharded cluster.

           The behavior for a list of mongoses is changed from "high
           availability" to "load balancing". Before, the client connected to
           the lowest-latency mongos in the list, and used it until a network
           error prompted it to re-evaluate all mongoses' latencies and
           reconnect to one of them. In PyMongo 3, the client monitors its
           network latency to all the mongoses continuously, and distributes
           operations evenly among those with the lowest latency. See
           :ref:`mongos-load-balancing` for more information.

           The ``connect`` option is added.

           The ``start_request``, ``in_request``, and ``end_request`` methods
           are removed, as well as the ``auto_start_request`` option.

           The ``copy_database`` method is removed, see the
           :doc:`copy_database examples </examples/copydb>` for alternatives.

           The :meth:`MongoClient.disconnect` method is removed; it was a
           synonym for :meth:`~pymongo.MongoClient.close`.

           :class:`~pymongo.mongo_client.MongoClient` no longer returns an
           instance of :class:`~pymongo.database.Database` for attribute names
           with leading underscores. You must use dict-style lookups instead::

               client['__my_database__']

           Not::

               client.__my_database__
        """
        if host is None:
            host = self.HOST
        if isinstance(host, string_type):
            host = [host]
        if port is None:
            port = self.PORT
        if not isinstance(port, int):
            raise TypeError("port must be an instance of int")

        seeds = set()
        username = None
        password = None
        dbase = None
        opts = {}
        for entity in host:
            if "://" in entity:
                if entity.startswith("mongodb://"):
                    res = uri_parser.parse_uri(entity, port, warn=True)
                    seeds.update(res["nodelist"])
                    username = res["username"] or username
                    password = res["password"] or password
                    dbase = res["database"] or dbase
                    opts = res["options"]
                else:
                    idx = entity.find("://")
                    raise InvalidURI("Invalid URI scheme: "
                                     "%s" % (entity[:idx], ))
            else:
                seeds.update(uri_parser.split_hosts(entity, port))
        if not seeds:
            raise ConfigurationError("need to specify at least one host")

        # _pool_class, _monitor_class, and _condition_class are for deep
        # customization of PyMongo, e.g. Motor.
        pool_class = kwargs.pop('_pool_class', None)
        monitor_class = kwargs.pop('_monitor_class', None)
        condition_class = kwargs.pop('_condition_class', None)

        keyword_opts = kwargs
        keyword_opts['document_class'] = document_class
        keyword_opts['tz_aware'] = tz_aware
        keyword_opts['connect'] = connect
        # Validate all keyword options.
        keyword_opts = dict(
            common.validate(k, v) for k, v in keyword_opts.items())
        opts.update(keyword_opts)
        self.__options = options = ClientOptions(username, password, dbase,
                                                 opts)

        self.__default_database_name = dbase
        self.__lock = threading.Lock()
        self.__cursor_manager = CursorManager(self)
        self.__kill_cursors_queue = []

        self._event_listeners = options.pool_options.event_listeners

        # Cache of existing indexes used by ensure_index ops.
        self.__index_cache = {}
        self.__index_cache_lock = threading.Lock()

        super(MongoClient,
              self).__init__(options.codec_options, options.read_preference,
                             options.write_concern, options.read_concern)

        self.__all_credentials = {}
        creds = options.credentials
        if creds:
            self._cache_credentials(creds.source, creds)

        self._topology_settings = TopologySettings(
            seeds=seeds,
            replica_set_name=options.replica_set_name,
            pool_class=pool_class,
            pool_options=options.pool_options,
            monitor_class=monitor_class,
            condition_class=condition_class,
            local_threshold_ms=options.local_threshold_ms,
            server_selection_timeout=options.server_selection_timeout)

        self._topology = Topology(self._topology_settings)
        if connect:
            self._topology.open()

        def target():
            client = self_ref()
            if client is None:
                return False  # Stop the executor.
            MongoClient._process_kill_cursors_queue(client)
            return True

        executor = periodic_executor.PeriodicExecutor(
            interval=common.KILL_CURSOR_FREQUENCY,
            min_interval=0.5,
            target=target,
            name="pymongo_kill_cursors_thread")

        # We strongly reference the executor and it weakly references us via
        # this closure. When the client is freed, stop the executor soon.
        self_ref = weakref.ref(self, executor.close)
        self._kill_cursors_executor = executor
        executor.open()
Example #3
0
    def __init__(self,
                 host=None,
                 port=None,
                 max_pool_size=10,
                 network_timeout=None,
                 document_class=dict,
                 tz_aware=False,
                 _connect=True,
                 **kwargs):
        """Create a new connection to a single MongoDB instance at *host:port*.

        The resultant connection object has connection-pooling built
        in. It also performs auto-reconnection when necessary. If an
        operation fails because of a connection error,
        :class:`~pymongo.errors.ConnectionFailure` is raised. If
        auto-reconnection will be performed,
        :class:`~pymongo.errors.AutoReconnect` will be
        raised. Application code should handle this exception
        (recognizing that the operation failed) and then continue to
        execute.

        Raises :class:`TypeError` if port is not an instance of
        ``int``. Raises :class:`~pymongo.errors.ConnectionFailure` if
        the connection cannot be made.

        The `host` parameter can be a full `mongodb URI
        <http://dochub.mongodb.org/core/connections>`_, in addition to
        a simple hostname. It can also be a list of hostnames or
        URIs. Any port specified in the host string(s) will override
        the `port` parameter. If multiple mongodb URIs containing
        database or auth information are passed, the last database,
        username, and password present will be used.

        :Parameters:
          - `host` (optional): hostname or IP address of the
            instance to connect to, or a mongodb URI, or a list of
            hostnames / mongodb URIs. If `host` is an IPv6 literal
            it must be enclosed in '[' and ']' characters following
            the RFC2732 URL syntax (e.g. '[::1]' for localhost)
          - `port` (optional): port number on which to connect
          - `max_pool_size` (optional): The maximum size limit for
            the connection pool.
          - `network_timeout` (optional): timeout (in seconds) to use
            for socket operations - default is no timeout
          - `document_class` (optional): default class to use for
            documents returned from queries on this connection
          - `tz_aware` (optional): if ``True``,
            :class:`~datetime.datetime` instances returned as values
            in a document by this :class:`Connection` will be timezone
            aware (otherwise they will be naive)

          Other optional parameters can be passed as keyword arguments:

          - `safe`: Use getlasterror for each write operation?
          - `j` or `journal`: Block until write operations have been commited
            to the journal. Ignored if the server is running without journaling.
            Implies safe=True.
          - `w`: (integer or string) If this is a replica set write operations
            won't return until they have been replicated to the specified
            number or tagged set of servers.
            Implies safe=True.
          - `wtimeout`: Used in conjunction with `j` and/or `w`. Wait this many
            milliseconds for journal acknowledgement and/or write replication.
            Implies safe=True.
          - `fsync`: Force the database to fsync all files before returning
            When used with `j` the server awaits the next group commit before
            returning.
            Implies safe=True.
          - `replicaSet`: The name of the replica set to connect to. The driver
            will verify that the replica set it connects to matches this name.
            Implies that the hosts specified are a seed list and the driver should
            attempt to find all members of the set.
          - `socketTimeoutMS`: How long a send or receive on a socket can take
            before timing out.
          - `connectTimeoutMS`: How long a connection can take to be opened
            before timing out.
          - `ssl`: If True, create the connection to the server using SSL.
          - `read_preference`: The read preference for this connection.
            See :class:`~pymongo.ReadPreference` for available options.
          - `slave_okay` or `slaveOk` (deprecated): Use `read_preference`
            instead.

        .. seealso:: :meth:`end_request`
        .. versionchanged:: 2.1
           Support `w` = integer or string.
           Added `ssl` option.
           DEPRECATED slave_okay/slaveOk.
        .. versionchanged:: 2.0
           `slave_okay` is a pure keyword argument. Added support for safe,
           and getlasterror options as keyword arguments.
        .. versionchanged:: 1.11
           Added `max_pool_size`. Completely removed previously deprecated
           `pool_size`, `auto_start_request` and `timeout` parameters.
        .. versionchanged:: 1.8
           The `host` parameter can now be a full `mongodb URI
           <http://dochub.mongodb.org/core/connections>`_, in addition
           to a simple hostname. It can also be a list of hostnames or
           URIs.
        .. versionadded:: 1.8
           The `tz_aware` parameter.
        .. versionadded:: 1.7
           The `document_class` parameter.
        .. versionadded:: 1.1
           The `network_timeout` parameter.

        .. mongodoc:: connections
        """
        if host is None:
            host = self.HOST
        if isinstance(host, basestring):
            host = [host]
        if port is None:
            port = self.PORT
        if not isinstance(port, int):
            raise TypeError("port must be an instance of int")

        nodes = set()
        username = None
        password = None
        db = None
        options = {}
        for entity in host:
            if "://" in entity:
                if entity.startswith("mongodb://"):
                    res = uri_parser.parse_uri(entity, port)
                    nodes.update(res["nodelist"])
                    username = res["username"] or username
                    password = res["password"] or password
                    db = res["database"] or db
                    options = res["options"]
                else:
                    idx = entity.find("://")
                    raise InvalidURI("Invalid URI scheme: "
                                     "%s" % (entity[:idx], ))
            else:
                nodes.update(uri_parser.split_hosts(entity, port))
        if not nodes:
            raise ConfigurationError("need to specify at least one host")

        self.__nodes = nodes
        self.__host = None
        self.__port = None

        for option, value in kwargs.iteritems():
            option, value = common.validate(option, value)
            options[option] = value

        if not isinstance(max_pool_size, int):
            raise ConfigurationError("max_pool_size must be an integer")
        if max_pool_size < 0:
            raise ValueError("max_pool_size must be >= 0")
        self.__max_pool_size = max_pool_size

        self.__cursor_manager = CursorManager(self)

        self.__repl = options.get('replicaset')

        if network_timeout is not None:
            if (not isinstance(network_timeout, (int, float))
                    or network_timeout <= 0):
                raise ConfigurationError("network_timeout must "
                                         "be a positive integer")
        self.__net_timeout = (network_timeout
                              or options.get('sockettimeoutms'))
        self.__conn_timeout = options.get('connecttimeoutms')
        self.__use_ssl = options.get('ssl', False)
        if self.__use_ssl and not have_ssl:
            raise ConfigurationError("The ssl module is not available. If you "
                                     "are using a python version previous to "
                                     "2.6 you must install the ssl package "
                                     "from PyPI.")
        self.__pool = _Pool(self.__max_pool_size, self.__net_timeout,
                            self.__conn_timeout, self.__use_ssl)

        self.__last_checkout = time.time()

        self.__document_class = document_class
        self.__tz_aware = tz_aware

        # cache of existing indexes used by ensure_index ops
        self.__index_cache = {}
        self.__auth_credentials = {}

        super(Connection, self).__init__(**options)
        if self.slave_okay:
            warnings.warn(
                "slave_okay is deprecated. Please "
                "use read_preference instead.", DeprecationWarning)

        if _connect:
            self.__find_node()

        if db and username is None:
            warnings.warn("must provide a username and password "
                          "to authenticate to %s" % (db, ))
        if username:
            db = db or "admin"
            if not self[db].authenticate(username, password):
                raise ConfigurationError("authentication failed")
def parse_uri(uri, default_port=DEFAULT_PORT, validate=True, warn=False,
              normalize=True, connect_timeout=None):
    """Parse and validate a MongoDB URI.

    Returns a dict of the form::

        {
            'nodelist': <list of (host, port) tuples>,
            'username': <username> or None,
            'password': <password> or None,
            'database': <database name> or None,
            'collection': <collection name> or None,
            'options': <dict of MongoDB URI options>,
            'fqdn': <fqdn of the MongoDB+SRV URI> or None
        }

    If the URI scheme is "mongodb+srv://" DNS SRV and TXT lookups will be done
    to build nodelist and options.

    :Parameters:
        - `uri`: The MongoDB URI to parse.
        - `default_port`: The port number to use when one wasn't specified
          for a host in the URI.
        - `validate` (optional): If ``True`` (the default), validate and
          normalize all options. Default: ``True``.
        - `warn` (optional): When validating, if ``True`` then will warn
          the user then ignore any invalid options or values. If ``False``,
          validation will error when options are unsupported or values are
          invalid. Default: ``False``.
        - `normalize` (optional): If ``True``, convert names of URI options
          to their internally-used names. Default: ``True``.
        - `connect_timeout` (optional): The maximum time in milliseconds to
          wait for a response from the DNS server.

    .. versionchanged:: 3.9
        Added the ``normalize`` parameter.

    .. versionchanged:: 3.6
        Added support for mongodb+srv:// URIs.

    .. versionchanged:: 3.5
        Return the original value of the ``readPreference`` MongoDB URI option
        instead of the validated read preference mode.

    .. versionchanged:: 3.1
        ``warn`` added so invalid options can be ignored.
    """
    if uri.startswith(SCHEME):
        is_srv = False
        scheme_free = uri[SCHEME_LEN:]
    elif uri.startswith(SRV_SCHEME):
        if not _HAVE_DNSPYTHON:
            raise ConfigurationError('The "dnspython" module must be '
                                     'installed to use mongodb+srv:// URIs')
        is_srv = True
        scheme_free = uri[SRV_SCHEME_LEN:]
    else:
        raise InvalidURI("Invalid URI scheme: URI must "
                         "begin with '%s' or '%s'" % (SCHEME, SRV_SCHEME))

    if not scheme_free:
        raise InvalidURI("Must provide at least one hostname or IP.")

    user = None
    passwd = None
    dbase = None
    collection = None
    options = _CaseInsensitiveDictionary()

    host_part, _, path_part = scheme_free.partition('/')
    if not host_part:
        host_part = path_part
        path_part = ""

    if not path_part and '?' in host_part:
        raise InvalidURI("A '/' is required between "
                         "the host list and any options.")

    if path_part:
        dbase, _, opts = path_part.partition('?')
        if dbase:
            dbase = unquote_plus(dbase)
            if '.' in dbase:
                dbase, collection = dbase.split('.', 1)
            if _BAD_DB_CHARS.search(dbase):
                raise InvalidURI('Bad database name "%s"' % dbase)
        else:
            dbase = None

        if opts:
            options.update(split_options(opts, validate, warn, normalize))

    if '@' in host_part:
        userinfo, _, hosts = host_part.rpartition('@')
        user, passwd = parse_userinfo(userinfo)
    else:
        hosts = host_part

    if '/' in hosts:
        raise InvalidURI("Any '/' in a unix domain socket must be"
                         " percent-encoded: %s" % host_part)

    hosts = unquote_plus(hosts)
    fqdn = None

    if is_srv:
        nodes = split_hosts(hosts, default_port=None)
        if len(nodes) != 1:
            raise InvalidURI(
                "%s URIs must include one, "
                "and only one, hostname" % (SRV_SCHEME,))
        fqdn, port = nodes[0]
        if port is not None:
            raise InvalidURI(
                "%s URIs must not include a port number" % (SRV_SCHEME,))

        # Use the connection timeout. connectTimeoutMS passed as a keyword
        # argument overrides the same option passed in the connection string.
        connect_timeout = connect_timeout or options.get("connectTimeoutMS")
        dns_resolver = _SrvResolver(fqdn, connect_timeout=connect_timeout)
        nodes = dns_resolver.get_hosts()
        dns_options = dns_resolver.get_options()
        if dns_options:
            parsed_dns_options = split_options(
                dns_options, validate, warn, normalize)
            if set(parsed_dns_options) - _ALLOWED_TXT_OPTS:
                raise ConfigurationError(
                    "Only authSource and replicaSet are supported from DNS")
            for opt, val in parsed_dns_options.items():
                if opt not in options:
                    options[opt] = val
        if "ssl" not in options:
            options["ssl"] = True if validate else 'true'
    else:
        nodes = split_hosts(hosts, default_port=default_port)

    return {
        'nodelist': nodes,
        'username': user,
        'password': passwd,
        'database': dbase,
        'collection': collection,
        'options': options,
        'fqdn': fqdn
    }
Example #5
0
def parse_uri(uri, default_port=DEFAULT_PORT, validate=True, warn=False):
    """Parse and validate a MongoDB URI.

    Returns a dict of the form::

        {
            'nodelist': <list of (host, port) tuples>,
            'username': <username> or None,
            'password': <password> or None,
            'database': <database name> or None,
            'collection': <collection name> or None,
            'options': <dict of MongoDB URI options>
        }

    If the URI scheme is "mongodb+srv://" DNS SRV and TXT lookups will be done
    to build nodelist and options.

    :Parameters:
        - `uri`: The MongoDB URI to parse.
        - `default_port`: The port number to use when one wasn't specified
          for a host in the URI.
        - `validate`: If ``True`` (the default), validate and normalize all
          options.
        - `warn` (optional): When validating, if ``True`` then will warn
          the user then ignore any invalid options or values. If ``False``,
          validation will error when options are unsupported or values are
          invalid.

    .. versionchanged:: 3.6
        Added support for mongodb+srv:// URIs

    .. versionchanged:: 3.5
        Return the original value of the ``readPreference`` MongoDB URI option
        instead of the validated read preference mode.

    .. versionchanged:: 3.1
        ``warn`` added so invalid options can be ignored.
    """
    if uri.startswith(SCHEME):
        is_srv = False
        scheme_free = uri[SCHEME_LEN:]
    elif uri.startswith(SRV_SCHEME):
        if not _HAVE_DNSPYTHON:
            raise ConfigurationError('The "dnspython" module must be '
                                     'installed to use mongodb+srv:// URIs')
        is_srv = True
        scheme_free = uri[SRV_SCHEME_LEN:]
    else:
        raise InvalidURI("Invalid URI scheme: URI must "
                         "begin with '%s' or '%s'" % (SCHEME, SRV_SCHEME))

    if not scheme_free:
        raise InvalidURI("Must provide at least one hostname or IP.")

    user = None
    passwd = None
    dbase = None
    collection = None
    options = {}

    host_part, _, path_part = _partition(scheme_free, '/')
    if not host_part:
        host_part = path_part
        path_part = ""

    if not path_part and '?' in host_part:
        raise InvalidURI("A '/' is required between "
                         "the host list and any options.")

    if '@' in host_part:
        userinfo, _, hosts = _rpartition(host_part, '@')
        user, passwd = parse_userinfo(userinfo)
    else:
        hosts = host_part

    if '/' in hosts:
        raise InvalidURI("Any '/' in a unix domain socket must be"
                         " percent-encoded: %s" % host_part)

    hosts = unquote_plus(hosts)

    if is_srv:
        nodes = split_hosts(hosts, default_port=None)
        if len(nodes) != 1:
            raise InvalidURI("%s URIs must include one, "
                             "and only one, hostname" % (SRV_SCHEME, ))
        fqdn, port = nodes[0]
        if port is not None:
            raise InvalidURI("%s URIs must not include a port number" %
                             (SRV_SCHEME, ))
        nodes = _get_dns_srv_hosts(fqdn)

        try:
            plist = fqdn.split(".")[1:]
        except Exception:
            raise ConfigurationError("Invalid URI host")
        slen = len(plist)
        if slen < 2:
            raise ConfigurationError("Invalid URI host")
        for node in nodes:
            try:
                nlist = node[0].split(".")[1:][-slen:]
            except Exception:
                raise ConfigurationError("Invalid SRV host")
            if plist != nlist:
                raise ConfigurationError("Invalid SRV host")

        dns_options = _get_dns_txt_options(fqdn)
        if dns_options:
            options = split_options(dns_options, validate, warn)
            if set(options) - _ALLOWED_TXT_OPTS:
                raise ConfigurationError(
                    "Only authSource and replicaSet are supported from DNS")
        options["ssl"] = True if validate else 'true'
    else:
        nodes = split_hosts(hosts, default_port=default_port)

    if path_part:
        if path_part[0] == '?':
            opts = unquote_plus(path_part[1:])
        else:
            dbase, _, opts = map(unquote_plus, _partition(path_part, '?'))
            if '.' in dbase:
                dbase, collection = dbase.split('.', 1)

            if _BAD_DB_CHARS.search(dbase):
                raise InvalidURI('Bad database name "%s"' % dbase)

        if opts:
            options.update(split_options(opts, validate, warn))

    if dbase is not None:
        dbase = unquote_plus(dbase)
    if collection is not None:
        collection = unquote_plus(collection)

    return {
        'nodelist': nodes,
        'username': user,
        'password': passwd,
        'database': dbase,
        'collection': collection,
        'options': options
    }
Example #6
0
    def __init__(self,
                 host=None,
                 port=None,
                 max_pool_size=10,
                 document_class=dict,
                 tz_aware=False,
                 _connect=True,
                 **kwargs):
        """Create a new connection to a single MongoDB instance at *host:port*.

        The resultant client object has connection-pooling built
        in. It also performs auto-reconnection when necessary. If an
        operation fails because of a connection error,
        :class:`~pymongo.errors.ConnectionFailure` is raised. If
        auto-reconnection will be performed,
        :class:`~pymongo.errors.AutoReconnect` will be
        raised. Application code should handle this exception
        (recognizing that the operation failed) and then continue to
        execute.

        Raises :class:`TypeError` if port is not an instance of
        ``int``. Raises :class:`~pymongo.errors.ConnectionFailure` if
        the connection cannot be made.

        The `host` parameter can be a full `mongodb URI
        <http://dochub.mongodb.org/core/connections>`_, in addition to
        a simple hostname. It can also be a list of hostnames or
        URIs. Any port specified in the host string(s) will override
        the `port` parameter. If multiple mongodb URIs containing
        database or auth information are passed, the last database,
        username, and password present will be used.  For username and
        passwords reserved characters like ':', '/', '+' and '@' must be
        escaped following RFC 2396.

        :Parameters:
          - `host` (optional): hostname or IP address of the
            instance to connect to, or a mongodb URI, or a list of
            hostnames / mongodb URIs. If `host` is an IPv6 literal
            it must be enclosed in '[' and ']' characters following
            the RFC2732 URL syntax (e.g. '[::1]' for localhost)
          - `port` (optional): port number on which to connect
          - `max_pool_size` (optional): The maximum number of idle connections
            to keep open in the pool for future use
          - `document_class` (optional): default class to use for
            documents returned from queries on this client
          - `tz_aware` (optional): if ``True``,
            :class:`~datetime.datetime` instances returned as values
            in a document by this :class:`MongoClient` will be timezone
            aware (otherwise they will be naive)

          | **Other optional parameters can be passed as keyword arguments:**

          - `socketTimeoutMS`: (integer) How long (in milliseconds) a send or
            receive on a socket can take before timing out.
          - `connectTimeoutMS`: (integer) How long (in milliseconds) a
            connection can take to be opened before timing out.
          - `auto_start_request`: If ``True``, each thread that accesses
            this :class:`MongoClient` has a socket allocated to it for the
            thread's lifetime.  This ensures consistent reads, even if you
            read after an unacknowledged write. Defaults to ``False``
          - `use_greenlets`: If ``True``, :meth:`start_request()` will ensure
            that the current greenlet uses the same socket for all
            operations until :meth:`end_request()`

          | **Write Concern options:**

          - `w`: (integer or string) If this is a replica set, write operations
            will block until they have been replicated to the specified number
            or tagged set of servers. `w=<int>` always includes the replica set
            primary (e.g. w=3 means write to the primary and wait until
            replicated to **two** secondaries). Passing w=0 **disables write
            acknowledgement** and all other write concern options.
          - `wtimeout`: (integer) Used in conjunction with `w`. Specify a value
            in milliseconds to control how long to wait for write propagation
            to complete. If replication does not complete in the given
            timeframe, a timeout exception is raised.
          - `j`: If ``True`` block until write operations have been committed
            to the journal. Ignored if the server is running without journaling.
          - `fsync`: If ``True`` force the database to fsync all files before
            returning. When used with `j` the server awaits the next group
            commit before returning.

          | **Replica set keyword arguments for connecting with a replica set
            - either directly or via a mongos:**
          | (ignored by standalone mongod instances)

          - `replicaSet`: (string) The name of the replica set to connect to.
            The driver will verify that the replica set it connects to matches
            this name. Implies that the hosts specified are a seed list and the
            driver should attempt to find all members of the set. *Ignored by
            mongos*.
          - `read_preference`: The read preference for this client. If
            connecting to a secondary then a read preference mode *other* than
            PRIMARY is required - otherwise all queries will throw
            :class:`~pymongo.errors.AutoReconnect` "not master".
            See :class:`~pymongo.read_preferences.ReadPreference` for all
            available read preference options.
          - `tag_sets`: Ignored unless connecting to a replica set via mongos.
            Specify a priority-order for tag sets, provide a list of
            tag sets: ``[{'dc': 'ny'}, {'dc': 'la'}, {}]``. A final, empty tag
            set, ``{}``, means "read from any member that matches the mode,
            ignoring tags.

          | **SSL configuration:**

          - `ssl`: If ``True``, create the connection to the server using SSL.
          - `ssl_keyfile`: The private keyfile used to identify the local
            connection against mongod.  If included with the ``certfile` then
            only the ``ssl_certfile`` is needed.  Implies ``ssl=True``.
          - `ssl_certfile`: The certificate file used to identify the local
            connection against mongod. Implies ``ssl=True``.
          - `ssl_cert_reqs`: Specifies whether a certificate is required from
            the other side of the connection, and whether it will be validated
            if provided. It must be one of the three values ``ssl.CERT_NONE``
            (certificates ignored), ``ssl.CERT_OPTIONAL``
            (not required, but validated if provided), or ``ssl.CERT_REQUIRED``
            (required and validated). If the value of this parameter is not
            ``ssl.CERT_NONE``, then the ``ssl_ca_certs`` parameter must point
            to a file of CA certificates. Implies ``ssl=True``.
          - `ssl_ca_certs`: The ca_certs file contains a set of concatenated
            "certification authority" certificates, which are used to validate
            certificates passed from the other end of the connection.
            Implies ``ssl=True``.

        .. seealso:: :meth:`end_request`

        .. mongodoc:: connections

        .. versionchanged:: 2.5
           Added additional ssl options
        .. versionadded:: 2.4
        """
        if host is None:
            host = self.HOST
        if isinstance(host, basestring):
            host = [host]
        if port is None:
            port = self.PORT
        if not isinstance(port, int):
            raise TypeError("port must be an instance of int")

        seeds = set()
        username = None
        password = None
        db_name = None
        opts = {}
        for entity in host:
            if "://" in entity:
                if entity.startswith("mongodb://"):
                    res = uri_parser.parse_uri(entity, port)
                    seeds.update(res["nodelist"])
                    username = res["username"] or username
                    password = res["password"] or password
                    db_name = res["database"] or db_name
                    opts = res["options"]
                else:
                    idx = entity.find("://")
                    raise InvalidURI("Invalid URI scheme: "
                                     "%s" % (entity[:idx], ))
            else:
                seeds.update(uri_parser.split_hosts(entity, port))
        if not seeds:
            raise ConfigurationError("need to specify at least one host")

        self.__nodes = seeds
        self.__host = None
        self.__port = None
        self.__is_primary = False
        self.__is_mongos = False

        # _pool_class option is for deep customization of PyMongo, e.g. Motor.
        # SHOULD NOT BE USED BY DEVELOPERS EXTERNAL TO 10GEN.
        pool_class = kwargs.pop('_pool_class', pool.Pool)

        options = {}
        for option, value in kwargs.iteritems():
            option, value = common.validate(option, value)
            options[option] = value
        options.update(opts)

        self.__max_pool_size = common.validate_positive_integer(
            'max_pool_size', max_pool_size)

        self.__cursor_manager = CursorManager(self)

        self.__repl = options.get('replicaset')
        if len(seeds) == 1 and not self.__repl:
            self.__direct = True
        else:
            self.__direct = False
            self.__nodes = set()

        self.__net_timeout = options.get('sockettimeoutms')
        self.__conn_timeout = options.get('connecttimeoutms')
        self.__use_ssl = options.get('ssl', None)
        self.__ssl_keyfile = options.get('ssl_keyfile', None)
        self.__ssl_certfile = options.get('ssl_certfile', None)
        self.__ssl_cert_reqs = options.get('ssl_cert_reqs', None)
        self.__ssl_ca_certs = options.get('ssl_ca_certs', None)

        ssl_kwarg_keys = [k for k in kwargs.keys() if k.startswith('ssl_')]
        if self.__use_ssl == False and ssl_kwarg_keys:
            raise ConfigurationError("ssl has not been enabled but the "
                                     "following ssl parameters have been set: "
                                     "%s. Please set `ssl=True` or remove." %
                                     ', '.join(ssl_kwarg_keys))

        if self.__ssl_cert_reqs and not self.__ssl_ca_certs:
            raise ConfigurationError("If `ssl_cert_reqs` is not "
                                     "`ssl.CERT_NONE` then you must "
                                     "include `ssl_ca_certs` to be able "
                                     "to validate the server.")

        if ssl_kwarg_keys and self.__use_ssl is None:
            # ssl options imply ssl = True
            self.__use_ssl = True

        if self.__use_ssl and not HAS_SSL:
            raise ConfigurationError("The ssl module is not available. If you "
                                     "are using a python version previous to "
                                     "2.6 you must install the ssl package "
                                     "from PyPI.")

        self.__use_greenlets = options.get('use_greenlets', False)
        self.__pool = pool_class(None,
                                 self.__max_pool_size,
                                 self.__net_timeout,
                                 self.__conn_timeout,
                                 self.__use_ssl,
                                 use_greenlets=self.__use_greenlets,
                                 ssl_keyfile=self.__ssl_keyfile,
                                 ssl_certfile=self.__ssl_certfile,
                                 ssl_cert_reqs=self.__ssl_cert_reqs,
                                 ssl_ca_certs=self.__ssl_ca_certs)

        self.__document_class = document_class
        self.__tz_aware = common.validate_boolean('tz_aware', tz_aware)
        self.__auto_start_request = options.get('auto_start_request', False)

        # cache of existing indexes used by ensure_index ops
        self.__index_cache = {}
        self.__auth_credentials = {}

        super(MongoClient, self).__init__(**options)
        if self.slave_okay:
            warnings.warn(
                "slave_okay is deprecated. Please "
                "use read_preference instead.",
                DeprecationWarning,
                stacklevel=2)

        if _connect:
            try:
                self.__find_node(seeds)
            except AutoReconnect, e:
                # ConnectionFailure makes more sense here than AutoReconnect
                raise ConnectionFailure(str(e))
Example #7
0
    def __init__(self,
                 host=None,
                 port=None,
                 max_pool_size=10,
                 document_class=dict,
                 tz_aware=False,
                 _connect=True,
                 **kwargs):
        """Create a new connection to a single MongoDB instance at *host:port*.

        The resultant connection object has connection-pooling built
        in. It also performs auto-reconnection when necessary. If an
        operation fails because of a connection error,
        :class:`~pymongo.errors.ConnectionFailure` is raised. If
        auto-reconnection will be performed,
        :class:`~pymongo.errors.AutoReconnect` will be
        raised. Application code should handle this exception
        (recognizing that the operation failed) and then continue to
        execute.

        Raises :class:`TypeError` if port is not an instance of
        ``int``. Raises :class:`~pymongo.errors.ConnectionFailure` if
        the connection cannot be made.

        The `host` parameter can be a full `mongodb URI
        <http://dochub.mongodb.org/core/connections>`_, in addition to
        a simple hostname. It can also be a list of hostnames or
        URIs. Any port specified in the host string(s) will override
        the `port` parameter. If multiple mongodb URIs containing
        database or auth information are passed, the last database,
        username, and password present will be used.  For username and
        passwords reserved characters like ':', '/', '+' and '@' must be
        escaped following RFC 2396.

        :Parameters:
          - `host` (optional): hostname or IP address of the
            instance to connect to, or a mongodb URI, or a list of
            hostnames / mongodb URIs. If `host` is an IPv6 literal
            it must be enclosed in '[' and ']' characters following
            the RFC2732 URL syntax (e.g. '[::1]' for localhost)
          - `port` (optional): port number on which to connect
          - `max_pool_size` (optional): The maximum size limit for
            the connection pool.
          - `document_class` (optional): default class to use for
            documents returned from queries on this connection
          - `tz_aware` (optional): if ``True``,
            :class:`~datetime.datetime` instances returned as values
            in a document by this :class:`MongoClient` will be timezone
            aware (otherwise they will be naive)

          **Other optional parameters can be passed as keyword arguments:**

          - `w`: (integer or string) If this is a replica set, write operations
            will block until they have been replicated to the specified number
            or tagged set of servers. `w=<int>` always includes the replica set
            primary (e.g. w=3 means write to the primary and wait until
            replicated to **two** secondaries). **Passing w=0 disables write
            acknowledgement and all other write concern options.**
          - `wtimeout`: (integer) Used in conjunction with `w`. Specify a value
            in milliseconds to control how long to wait for write propagation
            to complete. If replication does not complete in the given
            timeframe, a timeout exception is raised.
          - `j`: If ``True`` block until write operations have been committed
            to the journal. Ignored if the server is running without journaling.
          - `fsync`: If ``True`` force the database to fsync all files before
            returning. When used with `j` the server awaits the next group
            commit before returning.
          - `replicaSet`: (string) The name of the replica set to connect to.
            The driver will verify that the replica set it connects to matches
            this name. Implies that the hosts specified are a seed list and the
            driver should attempt to find all members of the set.
          - `socketTimeoutMS`: (integer) How long (in milliseconds) a send or
            receive on a socket can take before timing out.
          - `connectTimeoutMS`: (integer) How long (in milliseconds) a
            connection can take to be opened before timing out.
          - `ssl`: If ``True``, create the connection to the server using SSL.
          - `read_preference`: The read preference for this connection.
            See :class:`~pymongo.read_preferences.ReadPreference` for available
            options.
          - `auto_start_request`: If ``True``, each thread that accesses
            this :class:`MongoClient` has a socket allocated to it for the
            thread's lifetime.  This ensures consistent reads, even if you
            read after an unacknowledged write. Defaults to ``False``
          - `use_greenlets`: If ``True``, :meth:`start_request()` will ensure
            that the current greenlet uses the same socket for all
            operations until :meth:`end_request()`

        .. seealso:: :meth:`end_request`

        .. mongodoc:: connections

        .. versionadded:: 2.4
        """
        if host is None:
            host = self.HOST
        if isinstance(host, basestring):
            host = [host]
        if port is None:
            port = self.PORT
        if not isinstance(port, int):
            raise TypeError("port must be an instance of int")

        seeds = set()
        username = None
        password = None
        db = None
        opts = {}
        for entity in host:
            if "://" in entity:
                if entity.startswith("mongodb://"):
                    res = uri_parser.parse_uri(entity, port)
                    seeds.update(res["nodelist"])
                    username = res["username"] or username
                    password = res["password"] or password
                    db = res["database"] or db
                    opts = res["options"]
                else:
                    idx = entity.find("://")
                    raise InvalidURI("Invalid URI scheme: "
                                     "%s" % (entity[:idx], ))
            else:
                seeds.update(uri_parser.split_hosts(entity, port))
        if not seeds:
            raise ConfigurationError("need to specify at least one host")

        self.__nodes = seeds
        self.__host = None
        self.__port = None
        self.__is_primary = False
        self.__is_mongos = False

        options = {}
        for option, value in kwargs.iteritems():
            option, value = common.validate(option, value)
            options[option] = value
        options.update(opts)

        self.__max_pool_size = common.validate_positive_integer(
            'max_pool_size', max_pool_size)

        self.__cursor_manager = CursorManager(self)

        self.__repl = options.get('replicaset')
        if len(seeds) == 1 and not self.__repl:
            self.__direct = True
        else:
            self.__direct = False
            self.__nodes = set()

        self.__net_timeout = options.get('sockettimeoutms')
        self.__conn_timeout = options.get('connecttimeoutms')
        self.__use_ssl = options.get('ssl', False)
        if self.__use_ssl and not pool.have_ssl:
            raise ConfigurationError("The ssl module is not available. If you "
                                     "are using a python version previous to "
                                     "2.6 you must install the ssl package "
                                     "from PyPI.")

        if options.get('use_greenlets', False):
            if not pool.have_greenlet:
                raise ConfigurationError(
                    "The greenlet module is not available. "
                    "Install the greenlet package from PyPI.")
            self.pool_class = pool.GreenletPool
        else:
            self.pool_class = pool.Pool

        self.__pool = self.pool_class(None, self.__max_pool_size,
                                      self.__net_timeout, self.__conn_timeout,
                                      self.__use_ssl)

        self.__document_class = document_class
        self.__tz_aware = common.validate_boolean('tz_aware', tz_aware)
        self.__auto_start_request = options.get('auto_start_request', False)

        # cache of existing indexes used by ensure_index ops
        self.__index_cache = {}
        self.__auth_credentials = {}

        super(MongoClient, self).__init__(**options)
        if self.slave_okay:
            warnings.warn(
                "slave_okay is deprecated. Please "
                "use read_preference instead.", DeprecationWarning)

        if _connect:
            try:
                self.__find_node(seeds)
            except AutoReconnect, e:
                # ConnectionFailure makes more sense here than AutoReconnect
                raise ConnectionFailure(str(e))
Example #8
0
def _handle_security_options(options):
    """Raise appropriate errors when conflicting TLS options are present in
    the options dictionary.

    :Parameters:
        - `options`: Instance of _CaseInsensitiveDictionary containing
          MongoDB URI options.
    """
    tlsinsecure = options.get('tlsinsecure')
    if tlsinsecure is not None:
        for opt in _TLSINSECURE_EXCLUDE_OPTS:
            if opt in options:
                err_msg = ("URI options %s and %s cannot be specified "
                           "simultaneously.")
                raise InvalidURI(
                    err_msg %
                    (options.cased_key('tlsinsecure'), options.cased_key(opt)))

    # Convenience function to retrieve option values based on public or private names.
    def _getopt(opt):
        return (options.get(opt)
                or options.get(INTERNAL_URI_OPTION_NAME_MAP[opt]))

    # Handle co-occurence of OCSP & tlsAllowInvalidCertificates options.
    tlsallowinvalidcerts = _getopt('tlsallowinvalidcertificates')
    if tlsallowinvalidcerts is not None:
        if 'tlsdisableocspendpointcheck' in options:
            err_msg = ("URI options %s and %s cannot be specified "
                       "simultaneously.")
            raise InvalidURI(
                err_msg % ('tlsallowinvalidcertificates',
                           options.cased_key('tlsdisableocspendpointcheck')))
        if tlsallowinvalidcerts is True:
            options['tlsdisableocspendpointcheck'] = True

    # Handle co-occurence of CRL and OCSP-related options.
    tlscrlfile = _getopt('tlscrlfile')
    if tlscrlfile is not None:
        for opt in ('tlsinsecure', 'tlsallowinvalidcertificates',
                    'tlsdisableocspendpointcheck'):
            if options.get(opt) is True:
                err_msg = ("URI option %s=True cannot be specified when "
                           "CRL checking is enabled.")
                raise InvalidURI(err_msg % (opt, ))

    if 'ssl' in options and 'tls' in options:

        def truth_value(val):
            if val in ('true', 'false'):
                return val == 'true'
            if isinstance(val, bool):
                return val
            return val

        if truth_value(options.get('ssl')) != truth_value(options.get('tls')):
            err_msg = ("Can not specify conflicting values for URI options %s "
                       "and %s.")
            raise InvalidURI(
                err_msg % (options.cased_key('ssl'), options.cased_key('tls')))

    return options
def parse_uri(uri, default_port=DEFAULT_PORT, validate=True, warn=False):
    """Parse and validate a MongoDB URI.

    Returns a dict of the form::

        {
            'nodelist': <list of (host, port) tuples>,
            'username': <username> or None,
            'password': <password> or None,
            'database': <database name> or None,
            'collection': <collection name> or None,
            'options': <dict of MongoDB URI options>
        }

    :Parameters:
        - `uri`: The MongoDB URI to parse.
        - `default_port`: The port number to use when one wasn't specified
          for a host in the URI.
        - `validate`: If ``True`` (the default), validate and normalize all
          options.
        - `warn` (optional): When validating, if ``True`` then will warn
          the user then ignore any invalid options or values. If ``False``,
          validation will error when options are unsupported or values are
          invalid.

    .. versionchanged:: 3.1
        ``warn`` added so invalid options can be ignored.
    """
    if not uri.startswith(SCHEME):
        raise InvalidURI("Invalid URI scheme: URI "
                         "must begin with '%s'" % (SCHEME,))

    scheme_free = uri[SCHEME_LEN:]

    if not scheme_free:
        raise InvalidURI("Must provide at least one hostname or IP.")

    user = None
    passwd = None
    dbase = None
    collection = None
    options = {}

    host_part, _, path_part = _partition(scheme_free, '/')
    if not host_part:
        host_part = path_part
        path_part = ""

    if not path_part and '?' in host_part:
        raise InvalidURI("A '/' is required between "
                         "the host list and any options.")

    if '@' in host_part:
        userinfo, _, hosts = _rpartition(host_part, '@')
        user, passwd = parse_userinfo(userinfo)
    else:
        hosts = host_part

    if '/' in hosts:
        raise InvalidURI("Any '/' in a unix domain socket must be"
                         " percent-encoded: %s" % host_part)

    hosts = unquote_plus(hosts)
    nodes = split_hosts(hosts, default_port=default_port)

    if path_part:
        if path_part[0] == '?':
            opts = unquote_plus(path_part[1:])
        else:
            dbase, _, opts = map(unquote_plus, _partition(path_part, '?'))
            if '.' in dbase:
                dbase, collection = dbase.split('.', 1)

            if _BAD_DB_CHARS.search(dbase):
                raise InvalidURI('Bad database name "%s"' % dbase)

        if opts:
            options = split_options(opts, validate, warn)

    if dbase is not None:
        dbase = unquote_plus(dbase)
    if collection is not None:
        collection = unquote_plus(collection)

    return {
        'nodelist': nodes,
        'username': user,
        'password': passwd,
        'database': dbase,
        'collection': collection,
        'options': options
    }
Example #10
0
    def __init__(self,
                 host=None,
                 port=None,
                 max_pool_size=10,
                 slave_okay=False,
                 network_timeout=None,
                 document_class=dict,
                 tz_aware=False,
                 _connect=True):
        """Create a new connection to a single MongoDB instance at *host:port*.

        The resultant connection object has connection-pooling built
        in. It also performs auto-reconnection when necessary. If an
        operation fails because of a connection error,
        :class:`~pymongo.errors.ConnectionFailure` is raised. If
        auto-reconnection will be performed,
        :class:`~pymongo.errors.AutoReconnect` will be
        raised. Application code should handle this exception
        (recognizing that the operation failed) and then continue to
        execute.

        Raises :class:`TypeError` if port is not an instance of
        ``int``. Raises :class:`~pymongo.errors.ConnectionFailure` if
        the connection cannot be made.

        The `host` parameter can be a full `mongodb URI
        <http://dochub.mongodb.org/core/connections>`_, in addition to
        a simple hostname. It can also be a list of hostnames or
        URIs. Any port specified in the host string(s) will override
        the `port` parameter. If multiple mongodb URIs containing
        database or auth information are passed, the last database,
        username, and password present will be used.

        :Parameters:
          - `host` (optional): hostname or IP address of the
            instance to connect to, or a mongodb URI, or a list of
            hostnames / mongodb URIs. If `host` is an IPv6 literal
            it must be enclosed in '[' and ']' characters following
            the RFC2732 URL syntax (e.g. '[::1]' for localhost)
          - `port` (optional): port number on which to connect
          - `max_pool_size` (optional): The maximum size limit for
            the connection pool.
          - `slave_okay` (optional): is it okay to connect directly to
            and perform queries on a slave instance
          - `network_timeout` (optional): timeout (in seconds) to use
            for socket operations - default is no timeout
          - `document_class` (optional): default class to use for
            documents returned from queries on this connection
          - `tz_aware` (optional): if ``True``,
            :class:`~datetime.datetime` instances returned as values
            in a document by this :class:`Connection` will be timezone
            aware (otherwise they will be naive)

        .. seealso:: :meth:`end_request`
        .. versionchanged:: 1.11
           Added `max_pool_size`. Completely removed previously deprecated
           `pool_size`, `auto_start_request` and `timeout` parameters.
        .. versionchanged:: 1.8
           The `host` parameter can now be a full `mongodb URI
           <http://dochub.mongodb.org/core/connections>`_, in addition
           to a simple hostname. It can also be a list of hostnames or
           URIs.
        .. versionadded:: 1.8
           The `tz_aware` parameter.
        .. versionadded:: 1.7
           The `document_class` parameter.
        .. versionchanged:: 1.4
           DEPRECATED The `pool_size`, `auto_start_request`, and `timeout`
           parameters.
        .. versionadded:: 1.1
           The `network_timeout` parameter.

        .. mongodoc:: connections
        """
        if host is None:
            host = self.HOST
        if isinstance(host, basestring):
            host = [host]
        if port is None:
            port = self.PORT
        if not isinstance(port, int):
            raise TypeError("port must be an instance of int")

        nodes = set()
        database = None
        username = None
        password = None
        collection = None
        options = {}
        for uri in host:
            (n, db, u, p, coll, opts) = _parse_uri(uri, port)
            nodes.update(n)
            database = db or database
            username = u or username
            password = p or password
            collection = coll or collection
            options = opts or options
        if not nodes:
            raise ConfigurationError("need to specify at least one host")
        self.__nodes = nodes
        if database and username is None:
            raise InvalidURI("cannot specify database without "
                             "a username and password")

        self.__host = None
        self.__port = None

        if "maxpoolsize" in options:
            self.__max_pool_size = options['maxpoolsize']
            if not self.__max_pool_size.isdigit():
                raise TypeError("maxPoolSize must be an integer >= 0.")
            self.__max_pool_size = int(self.__max_pool_size)
        else:
            self.__max_pool_size = max_pool_size
            if not isinstance(self.__max_pool_size, int):
                raise TypeError("max_pool_size must be an instance of int.")
        if self.__max_pool_size < 0:
            raise ValueError("the maximum pool size must be >= 0")

        if "slaveok" in options:
            self.__slave_okay = (options['slaveok'][0].upper() == 'T')
        else:
            self.__slave_okay = slave_okay

        if slave_okay and len(self.__nodes) > 1:
            raise ConfigurationError("cannot specify slave_okay for a paired "
                                     "or replica set connection")

        # TODO - Support using other options like w and fsync from URI
        self.__options = options
        # TODO - Support setting the collection from URI like the Java driver
        self.__collection = collection

        self.__cursor_manager = CursorManager(self)

        self.__network_timeout = network_timeout
        self.__pool = _Pool(self.__max_pool_size, self.__network_timeout)
        self.__last_checkout = time.time()

        self.__document_class = document_class
        self.__tz_aware = tz_aware

        # cache of existing indexes used by ensure_index ops
        self.__index_cache = {}

        if _connect:
            self.__find_master()

        if username:
            database = database or "admin"
            if not self[database].authenticate(username, password):
                raise ConfigurationError("authentication failed")