def __init__(self, host=None, port=None, max_pool_size=10, network_timeout=None, document_class=dict, tz_aware=False, _connect=True, **kwargs): """Create a new connection to a single MongoDB instance at *host:port*. .. warning:: **DEPRECATED:** :class:`Connection` is deprecated. Please use :class:`~pymongo.mongo_client.MongoClient` instead. The resultant connection object has connection-pooling built in. It also performs auto-reconnection when necessary. If an operation fails because of a connection error, :class:`~pymongo.errors.ConnectionFailure` is raised. If auto-reconnection will be performed, :class:`~pymongo.errors.AutoReconnect` will be raised. Application code should handle this exception (recognizing that the operation failed) and then continue to execute. Raises :class:`TypeError` if port is not an instance of ``int``. Raises :class:`~pymongo.errors.ConnectionFailure` if the connection cannot be made. The `host` parameter can be a full `mongodb URI <http://dochub.mongodb.org/core/connections>`_, in addition to a simple hostname. It can also be a list of hostnames or URIs. Any port specified in the host string(s) will override the `port` parameter. If multiple mongodb URIs containing database or auth information are passed, the last database, username, and password present will be used. For username and passwords reserved characters like ':', '/', '+' and '@' must be escaped following RFC 2396. :Parameters: - `host` (optional): hostname or IP address of the instance to connect to, or a mongodb URI, or a list of hostnames / mongodb URIs. If `host` is an IPv6 literal it must be enclosed in '[' and ']' characters following the RFC2732 URL syntax (e.g. '[::1]' for localhost) - `port` (optional): port number on which to connect - `max_pool_size` (optional): The maximum size limit for the connection pool. - `network_timeout` (optional): timeout (in seconds) to use for socket operations - default is no timeout - `document_class` (optional): default class to use for documents returned from queries on this connection - `tz_aware` (optional): if ``True``, :class:`~datetime.datetime` instances returned as values in a document by this :class:`Connection` will be timezone aware (otherwise they will be naive) **Other optional parameters can be passed as keyword arguments:** - `safe`: :class:`Connection` **disables** acknowledgement of write operations. Use ``safe=True`` to enable write acknowledgement. - `w`: (integer or string) If this is a replica set, write operations will block until they have been replicated to the specified number or tagged set of servers. `w=<int>` always includes the replica set primary (e.g. w=3 means write to the primary and wait until replicated to **two** secondaries). Implies safe=True. - `wtimeout`: (integer) Used in conjunction with `w`. Specify a value in milliseconds to control how long to wait for write propagation to complete. If replication does not complete in the given timeframe, a timeout exception is raised. Implies safe=True. - `j`: If ``True`` block until write operations have been committed to the journal. Ignored if the server is running without journaling. Implies safe=True. - `fsync`: If ``True`` force the database to fsync all files before returning. When used with `j` the server awaits the next group commit before returning. Implies safe=True. - `replicaSet`: (string) The name of the replica set to connect to. The driver will verify that the replica set it connects to matches this name. Implies that the hosts specified are a seed list and the driver should attempt to find all members of the set. - `socketTimeoutMS`: (integer) How long (in milliseconds) a send or receive on a socket can take before timing out. - `connectTimeoutMS`: (integer) How long (in milliseconds) a connection can take to be opened before timing out. - `ssl`: If ``True``, create the connection to the server using SSL. - `read_preference`: The read preference for this connection. See :class:`~pymongo.read_preferences.ReadPreference` for available options. - `auto_start_request`: If ``True`` (the default), each thread that accesses this Connection has a socket allocated to it for the thread's lifetime. This ensures consistent reads, even if you read after an unsafe write. - `use_greenlets`: if ``True``, :meth:`start_request()` will ensure that the current greenlet uses the same socket for all operations until :meth:`end_request()` - `slave_okay` or `slaveOk` (deprecated): Use `read_preference` instead. .. seealso:: :meth:`end_request` .. versionchanged:: 2.3 Added support for failover between mongos seed list members. .. versionchanged:: 2.2 Added `auto_start_request` option back. Added `use_greenlets` option. .. versionchanged:: 2.1 Support `w` = integer or string. Added `ssl` option. DEPRECATED slave_okay/slaveOk. .. versionchanged:: 2.0 `slave_okay` is a pure keyword argument. Added support for safe, and getlasterror options as keyword arguments. .. versionchanged:: 1.11 Added `max_pool_size`. Completely removed previously deprecated `pool_size`, `auto_start_request` and `timeout` parameters. .. versionchanged:: 1.8 The `host` parameter can now be a full `mongodb URI <http://dochub.mongodb.org/core/connections>`_, in addition to a simple hostname. It can also be a list of hostnames or URIs. .. versionadded:: 1.8 The `tz_aware` parameter. .. versionadded:: 1.7 The `document_class` parameter. .. versionadded:: 1.1 The `network_timeout` parameter. .. mongodoc:: connections """ if network_timeout is not None: if (not isinstance(network_timeout, (int, float)) or network_timeout <= 0): raise ConfigurationError("network_timeout must " "be a positive integer") kwargs['socketTimeoutMS'] = network_timeout * 1000 kwargs['auto_start_request'] = kwargs.get('auto_start_request', True) kwargs['safe'] = kwargs.get('safe', False) super(Connection, self).__init__(host, port, max_pool_size, document_class, tz_aware, _connect, **kwargs)
def __setitem__(self, key, value): if key not in SAFE_OPTIONS: raise ConfigurationError("%s is not a valid write " "concern option." % (key, )) key, value = validate(key, value) super(WriteConcern, self).__setitem__(key, value)
def updated_topology_description(topology_description, server_description): """Return an updated copy of a TopologyDescription. :Parameters: - `topology_description`: the current TopologyDescription - `server_description`: a new ServerDescription that resulted from a hello call Called after attempting (successfully or not) to call hello on the server at server_description.address. Does not modify topology_description. """ address = server_description.address # These values will be updated, if necessary, to form the new # TopologyDescription. topology_type = topology_description.topology_type set_name = topology_description.replica_set_name max_set_version = topology_description.max_set_version max_election_id = topology_description.max_election_id server_type = server_description.server_type # Don't mutate the original dict of server descriptions; copy it. sds = topology_description.server_descriptions() # Replace this server's description with the new one. sds[address] = server_description if topology_type == TOPOLOGY_TYPE.Single: # Set server type to Unknown if replica set name does not match. if (set_name is not None and set_name != server_description.replica_set_name): error = ConfigurationError( "client is configured to connect to a replica set named " "'%s' but this node belongs to a set named '%s'" % (set_name, server_description.replica_set_name)) sds[address] = server_description.to_unknown(error=error) # Single type never changes. return TopologyDescription(TOPOLOGY_TYPE.Single, sds, set_name, max_set_version, max_election_id, topology_description._topology_settings) if topology_type == TOPOLOGY_TYPE.Unknown: if server_type in (SERVER_TYPE.Standalone, SERVER_TYPE.LoadBalancer): if len(topology_description._topology_settings.seeds) == 1: topology_type = TOPOLOGY_TYPE.Single else: # Remove standalone from Topology when given multiple seeds. sds.pop(address) elif server_type not in (SERVER_TYPE.Unknown, SERVER_TYPE.RSGhost): topology_type = _SERVER_TYPE_TO_TOPOLOGY_TYPE[server_type] if topology_type == TOPOLOGY_TYPE.Sharded: if server_type not in (SERVER_TYPE.Mongos, SERVER_TYPE.Unknown): sds.pop(address) elif topology_type == TOPOLOGY_TYPE.ReplicaSetNoPrimary: if server_type in (SERVER_TYPE.Standalone, SERVER_TYPE.Mongos): sds.pop(address) elif server_type == SERVER_TYPE.RSPrimary: (topology_type, set_name, max_set_version, max_election_id) = _update_rs_from_primary( sds, set_name, server_description, max_set_version, max_election_id) elif server_type in (SERVER_TYPE.RSSecondary, SERVER_TYPE.RSArbiter, SERVER_TYPE.RSOther): topology_type, set_name = _update_rs_no_primary_from_member( sds, set_name, server_description) elif topology_type == TOPOLOGY_TYPE.ReplicaSetWithPrimary: if server_type in (SERVER_TYPE.Standalone, SERVER_TYPE.Mongos): sds.pop(address) topology_type = _check_has_primary(sds) elif server_type == SERVER_TYPE.RSPrimary: (topology_type, set_name, max_set_version, max_election_id) = _update_rs_from_primary( sds, set_name, server_description, max_set_version, max_election_id) elif server_type in (SERVER_TYPE.RSSecondary, SERVER_TYPE.RSArbiter, SERVER_TYPE.RSOther): topology_type = _update_rs_with_primary_from_member( sds, set_name, server_description) else: # Server type is Unknown or RSGhost: did we just lose the primary? topology_type = _check_has_primary(sds) # Return updated copy. return TopologyDescription(topology_type, sds, set_name, max_set_version, max_election_id, topology_description._topology_settings)
def parse_uri(uri, default_port=DEFAULT_PORT, validate=True, warn=False): """Parse and validate a MongoDB URI. Returns a dict of the form:: { 'nodelist': <list of (host, port) tuples>, 'username': <username> or None, 'password': <password> or None, 'database': <database name> or None, 'collection': <collection name> or None, 'options': <dict of MongoDB URI options> } If the URI scheme is "mongodb+srv://" DNS SRV and TXT lookups will be done to build nodelist and options. :Parameters: - `uri`: The MongoDB URI to parse. - `default_port`: The port number to use when one wasn't specified for a host in the URI. - `validate`: If ``True`` (the default), validate and normalize all options. - `warn` (optional): When validating, if ``True`` then will warn the user then ignore any invalid options or values. If ``False``, validation will error when options are unsupported or values are invalid. .. versionchanged:: 3.6 Added support for mongodb+srv:// URIs .. versionchanged:: 3.5 Return the original value of the ``readPreference`` MongoDB URI option instead of the validated read preference mode. .. versionchanged:: 3.1 ``warn`` added so invalid options can be ignored. """ if uri.startswith(SCHEME): is_srv = False scheme_free = uri[SCHEME_LEN:] elif uri.startswith(SRV_SCHEME): if not _HAVE_DNSPYTHON: raise ConfigurationError('The "dnspython" module must be ' 'installed to use mongodb+srv:// URIs') is_srv = True scheme_free = uri[SRV_SCHEME_LEN:] else: raise InvalidURI( "Invalid URI scheme: URI must " "begin with '%s' or '%s'", (SCHEME, SRV_SCHEME)) if not scheme_free: raise InvalidURI("Must provide at least one hostname or IP.") user = None passwd = None dbase = None collection = None options = {} host_part, _, path_part = _partition(scheme_free, '/') if not host_part: host_part = path_part path_part = "" if not path_part and '?' in host_part: raise InvalidURI("A '/' is required between " "the host list and any options.") if '@' in host_part: userinfo, _, hosts = _rpartition(host_part, '@') user, passwd = parse_userinfo(userinfo) else: hosts = host_part if '/' in hosts: raise InvalidURI("Any '/' in a unix domain socket must be" " percent-encoded: %s" % host_part) hosts = unquote_plus(hosts) if is_srv: nodes = split_hosts(hosts, default_port=None) if len(nodes) != 1: raise InvalidURI("%s URIs must include one, " "and only one, hostname" % (SRV_SCHEME, )) fqdn, port = nodes[0] if port is not None: raise InvalidURI("%s URIs must not include a port number" % (SRV_SCHEME, )) nodes = _get_dns_srv_hosts(fqdn) try: plist = fqdn.split(".")[1:] except Exception: raise ConfigurationError("Invalid URI host") slen = len(plist) if slen < 2: raise ConfigurationError("Invalid URI host") for node in nodes: try: nlist = node[0].split(".")[1:][-slen:] except Exception: raise ConfigurationError("Invalid SRV host") if plist != nlist: raise ConfigurationError("Invalid SRV host") dns_options = _get_dns_txt_options(fqdn) if dns_options: options = split_options(dns_options, validate, warn) if set(options) - _ALLOWED_TXT_OPTS: raise ConfigurationError( "Only authSource and replicaSet are supported from DNS") options["ssl"] = True if validate else 'true' else: nodes = split_hosts(hosts, default_port=default_port) if path_part: if path_part[0] == '?': opts = unquote_plus(path_part[1:]) else: dbase, _, opts = map(unquote_plus, _partition(path_part, '?')) if '.' in dbase: dbase, collection = dbase.split('.', 1) if _BAD_DB_CHARS.search(dbase): raise InvalidURI('Bad database name "%s"' % dbase) if opts: options.update(split_options(opts, validate, warn)) if dbase is not None: dbase = unquote_plus(dbase) if collection is not None: collection = unquote_plus(collection) return { 'nodelist': nodes, 'username': user, 'password': passwd, 'database': dbase, 'collection': collection, 'options': options }
def __init__(self, hosts_or_uri=None, max_pool_size=10, document_class=dict, tz_aware=False, **kwargs): """Create a new connection to a MongoDB replica set. The resultant connection object has connection-pooling built in. It also performs auto-reconnection when necessary. If an operation fails because of a connection error, :class:`~pymongo.errors.ConnectionFailure` is raised. If auto-reconnection will be performed, :class:`~pymongo.errors.AutoReconnect` will be raised. Application code should handle this exception (recognizing that the operation failed) and then continue to execute. Raises :class:`~pymongo.errors.ConnectionFailure` if the connection cannot be made. The `hosts_or_uri` parameter can be a full `mongodb URI <http://dochub.mongodb.org/core/connections>`_, in addition to a string of `host:port` pairs (e.g. 'host1:port1,host2:port2'). If `hosts_or_uri` is None 'localhost:27017' will be used. :Parameters: - `hosts_or_uri` (optional): A MongoDB URI or string of `host:port` pairs. If a host is an IPv6 literal it must be enclosed in '[' and ']' characters following the RFC2732 URL syntax (e.g. '[::1]' for localhost) - `max_pool_size` (optional): The maximum size limit for each connection pool. - `document_class` (optional): default class to use for documents returned from queries on this connection - `tz_aware` (optional): if ``True``, :class:`~datetime.datetime` instances returned as values in a document by this :class:`Connection` will be timezone aware (otherwise they will be naive) - `replicaSet`: (required) The name of the replica set to connect to. The driver will verify that each host it connects to is a member of this replica set. Can be passed as a keyword argument or as a MongoDB URI option. Other optional parameters can be passed as keyword arguments: - `safe`: Use getlasterror for each write operation? - `j` or `journal`: Block until write operations have been commited to the journal. Ignored if the server is running without journaling. Implies safe=True. - `w`: (integer or string) If this is a replica set write operations won't return until they have been replicated to the specified number or tagged set of servers. Implies safe=True. - `wtimeoutMS`: Used in conjunction with `j` and/or `w`. Wait this many milliseconds for journal acknowledgement and/or write replication. Implies safe=True. - `fsync`: Force the database to fsync all files before returning When used with `j` the server awaits the next group commit before returning. Implies safe=True. - `socketTimeoutMS`: How long a send or receive on a socket can take before timing out. - `connectTimeoutMS`: How long a connection can take to be opened before timing out. - `ssl`: If True, create the connection to the servers using SSL. - `read_preference`: The read preference for this connection. See :class:`~pymongo.ReadPreference` for available options. - `slave_okay` or `slaveOk` (deprecated): Use `read_preference` instead. .. versionadded:: 2.1 """ self.__max_pool_size = max_pool_size self.__document_class = document_class self.__tz_aware = tz_aware self.__opts = {} self.__seeds = set() self.__hosts = None self.__arbiters = set() self.__writer = None self.__readers = [] self.__pools = {} self.__index_cache = {} self.__auth_credentials = {} username = None db_name = None if hosts_or_uri is None: self.__seeds.add(('localhost', 27017)) elif '://' in hosts_or_uri: res = uri_parser.parse_uri(hosts_or_uri) self.__seeds.update(res['nodelist']) username = res['username'] password = res['password'] db_name = res['database'] self.__opts = res['options'] else: self.__seeds.update(uri_parser.split_hosts(hosts_or_uri)) for option, value in kwargs.iteritems(): option, value = common.validate(option, value) self.__opts[option] = value self.__name = self.__opts.get('replicaset') if not self.__name: raise ConfigurationError("the replicaSet " "keyword parameter is required.") self.__net_timeout = self.__opts.get('sockettimeoutms') self.__conn_timeout = self.__opts.get('connecttimeoutms') self.__use_ssl = self.__opts.get('ssl', False) if self.__use_ssl and not pool.have_ssl: raise ConfigurationError("The ssl module is not available. If you " "are using a python version previous to " "2.6 you must install the ssl package " "from PyPI.") super(ReplicaSetConnection, self).__init__(**self.__opts) if self.slave_okay: warnings.warn("slave_okay is deprecated. Please " "use read_preference instead.", DeprecationWarning) self.refresh() monitor_thread = Monitor(self) monitor_thread.setName("ReplicaSetMonitorThread") monitor_thread.setDaemon(True) monitor_thread.start() if db_name and username is None: warnings.warn("must provide a username and password " "to authenticate to %s" % (db_name,)) if username: db_name = db_name or 'admin' if not self[db_name].authenticate(username, password): raise ConfigurationError("authentication failed")
def get_ssl_context(*args): """Create and return an SSLContext object.""" (certfile, keyfile, passphrase, ca_certs, cert_reqs, crlfile, match_hostname) = args verify_mode = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs # Note PROTOCOL_SSLv23 is about the most misleading name imaginable. # This configures the server and client to negotiate the # highest protocol version they both support. A very good thing. # PROTOCOL_TLS_CLIENT was added in CPython 3.6, deprecating # PROTOCOL_SSLv23. ctx = SSLContext( getattr(ssl, "PROTOCOL_TLS_CLIENT", ssl.PROTOCOL_SSLv23)) # SSLContext.check_hostname was added in CPython 2.7.9 and 3.4. # PROTOCOL_TLS_CLIENT (added in Python 3.6) enables it by default. if hasattr(ctx, "check_hostname"): if _PY37PLUS and verify_mode != ssl.CERT_NONE: # Python 3.7 uses OpenSSL's hostname matching implementation # making it the obvious version to start using this with. # Python 3.6 might have been a good version, but it suffers # from https://bugs.python.org/issue32185. # We'll use our bundled match_hostname for older Python # versions, which also supports IP address matching # with Python < 3.5. ctx.check_hostname = match_hostname else: ctx.check_hostname = False if hasattr(ctx, "options"): # Explicitly disable SSLv2, SSLv3 and TLS compression. Note that # up to date versions of MongoDB 2.4 and above already disable # SSLv2 and SSLv3, python disables SSLv2 by default in >= 2.7.7 # and >= 3.3.4 and SSLv3 in >= 3.4.3. There is no way for us to do # any of this explicitly for python 2.6 or 2.7 before 2.7.9. ctx.options |= getattr(ssl, "OP_NO_SSLv2", 0) ctx.options |= getattr(ssl, "OP_NO_SSLv3", 0) # OpenSSL >= 1.0.0 ctx.options |= getattr(ssl, "OP_NO_COMPRESSION", 0) if certfile is not None: try: if passphrase is not None: vi = sys.version_info # Since python just added a new parameter to an existing method # this seems to be about the best we can do. if (vi[0] == 2 and vi < (2, 7, 9) or vi[0] == 3 and vi < (3, 3)): raise ConfigurationError( "Support for ssl_pem_passphrase requires " "python 2.7.9+ (pypy 2.5.1+) or 3.3+") ctx.load_cert_chain(certfile, keyfile, passphrase) else: ctx.load_cert_chain(certfile, keyfile) except ssl.SSLError as exc: raise ConfigurationError( "Private key doesn't match certificate: %s" % (exc,)) if crlfile is not None: if not hasattr(ctx, "verify_flags"): raise ConfigurationError( "Support for ssl_crlfile requires " "python 2.7.9+ (pypy 2.5.1+) or 3.4+") # Match the server's behavior. ctx.verify_flags = ssl.VERIFY_CRL_CHECK_LEAF ctx.load_verify_locations(crlfile) if ca_certs is not None: ctx.load_verify_locations(ca_certs) elif cert_reqs != ssl.CERT_NONE: # CPython >= 2.7.9 or >= 3.4.0, pypy >= 2.5.1 if hasattr(ctx, "load_default_certs"): ctx.load_default_certs() # Python >= 3.2.0, useless on Windows. elif (sys.platform != "win32" and hasattr(ctx, "set_default_verify_paths")): ctx.set_default_verify_paths() elif sys.platform == "win32" and HAVE_WINCERTSTORE: with _WINCERTSLOCK: if _WINCERTS is None: _load_wincerts() ctx.load_verify_locations(_WINCERTS.name) elif HAVE_CERTIFI: ctx.load_verify_locations(certifi.where()) else: raise ConfigurationError( "`ssl_cert_reqs` is not ssl.CERT_NONE and no system " "CA certificates could be loaded. `ssl_ca_certs` is " "required.") ctx.verify_mode = verify_mode return ctx
def __init__(self, host=None, port=None, max_pool_size=10, network_timeout=None, document_class=dict, tz_aware=False, _connect=True, **kwargs): """Create a new connection to a single MongoDB instance at *host:port*. The resultant connection object has connection-pooling built in. It also performs auto-reconnection when necessary. If an operation fails because of a connection error, :class:`~pymongo.errors.ConnectionFailure` is raised. If auto-reconnection will be performed, :class:`~pymongo.errors.AutoReconnect` will be raised. Application code should handle this exception (recognizing that the operation failed) and then continue to execute. Raises :class:`TypeError` if port is not an instance of ``int``. Raises :class:`~pymongo.errors.ConnectionFailure` if the connection cannot be made. The `host` parameter can be a full `mongodb URI <http://dochub.mongodb.org/core/connections>`_, in addition to a simple hostname. It can also be a list of hostnames or URIs. Any port specified in the host string(s) will override the `port` parameter. If multiple mongodb URIs containing database or auth information are passed, the last database, username, and password present will be used. :Parameters: - `host` (optional): hostname or IP address of the instance to connect to, or a mongodb URI, or a list of hostnames / mongodb URIs. If `host` is an IPv6 literal it must be enclosed in '[' and ']' characters following the RFC2732 URL syntax (e.g. '[::1]' for localhost) - `port` (optional): port number on which to connect - `max_pool_size` (optional): The maximum size limit for the connection pool. - `network_timeout` (optional): timeout (in seconds) to use for socket operations - default is no timeout - `document_class` (optional): default class to use for documents returned from queries on this connection - `tz_aware` (optional): if ``True``, :class:`~datetime.datetime` instances returned as values in a document by this :class:`Connection` will be timezone aware (otherwise they will be naive) Other optional parameters can be passed as keyword arguments: - `slave_okay` or `slaveok`: Is it OK to perform queries if this connection is to a secondary? - `safe`: Use getlasterror for each write operation? - `j`: Block until write operations have been commited to the journal. Ignored if the server is running without journaling. Implies safe=True. - `w`: If this is a replica set the server won't return until write operations have replicated to this many set members. Implies safe=True. - `wtimeout`: Used in conjunction with `j` and/or `w`. Wait this many milliseconds for journal acknowledgement and/or write replication. Implies safe=True. - `fsync`: Force the database to fsync all files before returning When used with `j` the server awaits the next group commit before returning. Implies safe=True. .. seealso:: :meth:`end_request` .. versionchanged:: 1.11+ `slave_okay` is a pure keyword argument. Added support for safe, and getlasterror options as keyword arguments. .. versionchanged:: 1.11 Added `max_pool_size`. Completely removed previously deprecated `pool_size`, `auto_start_request` and `timeout` parameters. .. versionchanged:: 1.8 The `host` parameter can now be a full `mongodb URI <http://dochub.mongodb.org/core/connections>`_, in addition to a simple hostname. It can also be a list of hostnames or URIs. .. versionadded:: 1.8 The `tz_aware` parameter. .. versionadded:: 1.7 The `document_class` parameter. .. versionchanged:: 1.4 DEPRECATED The `pool_size`, `auto_start_request`, and `timeout` parameters. .. versionadded:: 1.1 The `network_timeout` parameter. .. mongodoc:: connections """ super(Connection, self).__init__(**kwargs) if host is None: host = self.HOST if isinstance(host, basestring): host = [host] if port is None: port = self.PORT if not isinstance(port, int): raise TypeError("port must be an instance of int") nodes = set() username = None password = None db = None coll = None options = {} for entity in host: if "://" in entity: if entity.startswith("mongodb://"): res = uri_parser.parse_uri(entity, port) nodes.update(res["nodelist"]) username = res["username"] or username password = res["password"] or password db = res["database"] or db coll = res["collection"] or coll options = res["options"] else: idx = entity.find("://") raise InvalidURI("Invalid URI scheme: " "%s" % (entity[:idx], )) else: nodes.update(uri_parser.split_hosts(entity, port)) if not nodes: raise ConfigurationError("need to specify at least one host") self.__nodes = nodes self.__host = None self.__port = None if options: super(Connection, self)._set_options(**options) assert isinstance(max_pool_size, int), "max_pool_size must be an int" self.__max_pool_size = options.get("maxpoolsize") or max_pool_size if self.__max_pool_size < 0: raise ValueError("the maximum pool size must be >= 0") # TODO - Support using other options like w and fsync from URI self.__options = options # TODO - Support setting the collection from URI like the Java driver self.__collection = coll self.__cursor_manager = CursorManager(self) self.__network_timeout = network_timeout self.__pool = _Pool(self.__max_pool_size, self.__network_timeout) self.__last_checkout = time.time() self.__document_class = document_class self.__tz_aware = tz_aware # cache of existing indexes used by ensure_index ops self.__index_cache = {} if _connect: self.__find_node() if db and username is None: warnings.warn("must provide a username and password " "to authenticate to %s" % (db, )) if username: db = db or "admin" if not self[db].authenticate(username, password): raise ConfigurationError("authentication failed")
def __init__(self, root_collection, session=None, **kwargs): """Write a file to GridFS Application developers should generally not need to instantiate this class directly - instead see the methods provided by :class:`~gridfs.GridFS`. Raises :class:`TypeError` if `root_collection` is not an instance of :class:`~pymongo.collection.Collection`. Any of the file level options specified in the `GridFS Spec <http://dochub.mongodb.org/core/gridfsspec>`_ may be passed as keyword arguments. Any additional keyword arguments will be set as additional fields on the file document. Valid keyword arguments include: - ``"_id"``: unique ID for this file (default: :class:`~bson.objectid.ObjectId`) - this ``"_id"`` must not have already been used for another file - ``"filename"``: human name for the file - ``"contentType"`` or ``"content_type"``: valid mime-type for the file - ``"chunkSize"`` or ``"chunk_size"``: size of each of the chunks, in bytes (default: 255 kb) - ``"encoding"``: encoding used for this file. In Python 2, any :class:`unicode` that is written to the file will be converted to a :class:`str`. In Python 3, any :class:`str` that is written to the file will be converted to :class:`bytes`. :Parameters: - `root_collection`: root collection to write to - `session` (optional): a :class:`~pymongo.client_session.ClientSession` to use for all commands - `**kwargs` (optional): file level options (see above) .. versionchanged:: 3.6 Added ``session`` parameter. .. versionchanged:: 3.0 `root_collection` must use an acknowledged :attr:`~pymongo.collection.Collection.write_concern` """ if not isinstance(root_collection, Collection): raise TypeError("root_collection must be an " "instance of Collection") # With w=0, 'filemd5' might run before the final chunks are written. if not root_collection.write_concern.acknowledged: raise ConfigurationError('root_collection must use ' 'acknowledged write_concern') # Handle alternative naming if "content_type" in kwargs: kwargs["contentType"] = kwargs.pop("content_type") if "chunk_size" in kwargs: kwargs["chunkSize"] = kwargs.pop("chunk_size") coll = root_collection.with_options( read_preference=ReadPreference.PRIMARY) kwargs['md5'] = md5() # Defaults kwargs["_id"] = kwargs.get("_id", ObjectId()) kwargs["chunkSize"] = kwargs.get("chunkSize", DEFAULT_CHUNK_SIZE) object.__setattr__(self, "_session", session) object.__setattr__(self, "_coll", coll) object.__setattr__(self, "_chunks", coll.chunks) object.__setattr__(self, "_file", kwargs) object.__setattr__(self, "_buffer", StringIO()) object.__setattr__(self, "_position", 0) object.__setattr__(self, "_chunk_number", 0) object.__setattr__(self, "_closed", False) object.__setattr__(self, "_ensured_index", False)
def _execute_command(self, generator, write_concern, session, sock_info, op_id, retryable, full_result): if sock_info.max_wire_version < 5: if self.uses_collation: raise ConfigurationError( 'Must be connected to MongoDB 3.4+ to use a collation.') if self.uses_hint: raise ConfigurationError( 'Must be connected to MongoDB 3.4+ to use hint.') if sock_info.max_wire_version < 6 and self.uses_array_filters: raise ConfigurationError( 'Must be connected to MongoDB 3.6+ to use arrayFilters.') db_name = self.collection.database.name client = self.collection.database.client listeners = client._event_listeners if not self.current_run: self.current_run = next(generator) run = self.current_run # sock_info.command validates the session, but we use # sock_info.write_command. sock_info.validate_session(client, session) while run: cmd_name = _COMMANDS[run.op_type] bwc = self.bulk_ctx_class( db_name, cmd_name, sock_info, op_id, listeners, session, run.op_type, self.collection.codec_options) while run.idx_offset < len(run.ops): cmd = SON([(cmd_name, self.collection.name), ('ordered', self.ordered)]) if not write_concern.is_server_default: cmd['writeConcern'] = write_concern.document if self.bypass_doc_val and sock_info.max_wire_version >= 4: cmd['bypassDocumentValidation'] = True if session: # Start a new retryable write unless one was already # started for this command. if retryable and not self.started_retryable_write: session._start_retryable_write() self.started_retryable_write = True session._apply_to(cmd, retryable, ReadPreference.PRIMARY, sock_info) sock_info.send_cluster_time(cmd, session, client) sock_info.add_server_api(cmd) ops = islice(run.ops, run.idx_offset, None) # Run as many ops as possible in one command. result, to_send = bwc.execute(cmd, ops, client) # Retryable writeConcernErrors halt the execution of this run. wce = result.get('writeConcernError', {}) if wce.get('code', 0) in _RETRYABLE_ERROR_CODES: # Synthesize the full bulk result without modifying the # current one because this write operation may be retried. full = copy.deepcopy(full_result) _merge_command(run, full, run.idx_offset, result) _raise_bulk_write_error(full) _merge_command(run, full_result, run.idx_offset, result) # We're no longer in a retry once a command succeeds. self.retrying = False self.started_retryable_write = False if self.ordered and "writeErrors" in result: break run.idx_offset += len(to_send) # We're supposed to continue if errors are # at the write concern level (e.g. wtimeout) if self.ordered and full_result['writeErrors']: break # Reset our state self.current_run = run = next(generator, None)
def __init__(self, host=None, port=None, max_pool_size=10, document_class=dict, tz_aware=False, _connect=True, **kwargs): """Create a new connection to a single MongoDB instance at *host:port*. The resultant connection object has connection-pooling built in. It also performs auto-reconnection when necessary. If an operation fails because of a connection error, :class:`~pymongo.errors.ConnectionFailure` is raised. If auto-reconnection will be performed, :class:`~pymongo.errors.AutoReconnect` will be raised. Application code should handle this exception (recognizing that the operation failed) and then continue to execute. Raises :class:`TypeError` if port is not an instance of ``int``. Raises :class:`~pymongo.errors.ConnectionFailure` if the connection cannot be made. The `host` parameter can be a full `mongodb URI <http://dochub.mongodb.org/core/connections>`_, in addition to a simple hostname. It can also be a list of hostnames or URIs. Any port specified in the host string(s) will override the `port` parameter. If multiple mongodb URIs containing database or auth information are passed, the last database, username, and password present will be used. For username and passwords reserved characters like ':', '/', '+' and '@' must be escaped following RFC 2396. :Parameters: - `host` (optional): hostname or IP address of the instance to connect to, or a mongodb URI, or a list of hostnames / mongodb URIs. If `host` is an IPv6 literal it must be enclosed in '[' and ']' characters following the RFC2732 URL syntax (e.g. '[::1]' for localhost) - `port` (optional): port number on which to connect - `max_pool_size` (optional): The maximum number of idle connections to keep open in the pool for future use - `document_class` (optional): default class to use for documents returned from queries on this connection - `tz_aware` (optional): if ``True``, :class:`~datetime.datetime` instances returned as values in a document by this :class:`MongoClient` will be timezone aware (otherwise they will be naive) **Other optional parameters can be passed as keyword arguments:** - `w`: (integer or string) If this is a replica set, write operations will block until they have been replicated to the specified number or tagged set of servers. `w=<int>` always includes the replica set primary (e.g. w=3 means write to the primary and wait until replicated to **two** secondaries). **Passing w=0 disables write acknowledgement and all other write concern options.** - `wtimeout`: (integer) Used in conjunction with `w`. Specify a value in milliseconds to control how long to wait for write propagation to complete. If replication does not complete in the given timeframe, a timeout exception is raised. - `j`: If ``True`` block until write operations have been committed to the journal. Ignored if the server is running without journaling. - `fsync`: If ``True`` force the database to fsync all files before returning. When used with `j` the server awaits the next group commit before returning. - `replicaSet`: (string) The name of the replica set to connect to. The driver will verify that the replica set it connects to matches this name. Implies that the hosts specified are a seed list and the driver should attempt to find all members of the set. - `socketTimeoutMS`: (integer) How long (in milliseconds) a send or receive on a socket can take before timing out. - `connectTimeoutMS`: (integer) How long (in milliseconds) a connection can take to be opened before timing out. - `ssl`: If ``True``, create the connection to the server using SSL. - `read_preference`: The read preference for this connection. See :class:`~pymongo.read_preferences.ReadPreference` for available options. - `auto_start_request`: If ``True``, each thread that accesses this :class:`MongoClient` has a socket allocated to it for the thread's lifetime. This ensures consistent reads, even if you read after an unacknowledged write. Defaults to ``False`` - `use_greenlets`: If ``True``, :meth:`start_request()` will ensure that the current greenlet uses the same socket for all operations until :meth:`end_request()` .. seealso:: :meth:`end_request` .. mongodoc:: connections .. versionadded:: 2.4 """ if host is None: host = self.HOST if isinstance(host, basestring): host = [host] if port is None: port = self.PORT if not isinstance(port, int): raise TypeError("port must be an instance of int") seeds = set() username = None password = None db = None opts = {} for entity in host: if "://" in entity: if entity.startswith("mongodb://"): res = uri_parser.parse_uri(entity, port) seeds.update(res["nodelist"]) username = res["username"] or username password = res["password"] or password db = res["database"] or db opts = res["options"] else: idx = entity.find("://") raise InvalidURI("Invalid URI scheme: " "%s" % (entity[:idx], )) else: seeds.update(uri_parser.split_hosts(entity, port)) if not seeds: raise ConfigurationError("need to specify at least one host") self.__nodes = seeds self.__host = None self.__port = None self.__is_primary = False self.__is_mongos = False options = {} for option, value in kwargs.iteritems(): option, value = common.validate(option, value) options[option] = value options.update(opts) self.__max_pool_size = common.validate_positive_integer( 'max_pool_size', max_pool_size) self.__cursor_manager = CursorManager(self) self.__repl = options.get('replicaset') if len(seeds) == 1 and not self.__repl: self.__direct = True else: self.__direct = False self.__nodes = set() self.__net_timeout = options.get('sockettimeoutms') self.__conn_timeout = options.get('connecttimeoutms') self.__use_ssl = options.get('ssl', False) if self.__use_ssl and not pool.have_ssl: raise ConfigurationError("The ssl module is not available. If you " "are using a python version previous to " "2.6 you must install the ssl package " "from PyPI.") if options.get('use_greenlets', False): if not pool.have_greenlet: raise ConfigurationError( "The greenlet module is not available. " "Install the greenlet package from PyPI.") self.pool_class = pool.GreenletPool else: self.pool_class = pool.Pool self.__pool = self.pool_class(None, self.__max_pool_size, self.__net_timeout, self.__conn_timeout, self.__use_ssl) self.__document_class = document_class self.__tz_aware = common.validate_boolean('tz_aware', tz_aware) self.__auto_start_request = options.get('auto_start_request', False) # cache of existing indexes used by ensure_index ops self.__index_cache = {} self.__auth_credentials = {} super(MongoClient, self).__init__(**options) if self.slave_okay: warnings.warn( "slave_okay is deprecated. Please " "use read_preference instead.", DeprecationWarning) if _connect: try: self.__find_node(seeds) except AutoReconnect, e: # ConnectionFailure makes more sense here than AutoReconnect raise ConnectionFailure(str(e))
def __init__(self, kms_providers, key_vault_namespace, key_vault_client, codec_options): """Explicit client-side field level encryption. The ClientEncryption class encapsulates explicit operations on a key vault collection that cannot be done directly on a MongoClient. Similar to configuring auto encryption on a MongoClient, it is constructed with a MongoClient (to a MongoDB cluster containing the key vault collection), KMS provider configuration, and keyVaultNamespace. It provides an API for explicitly encrypting and decrypting values, and creating data keys. It does not provide an API to query keys from the key vault collection, as this can be done directly on the MongoClient. See :ref:`explicit-client-side-encryption` for an example. :Parameters: - `kms_providers`: Map of KMS provider options. Two KMS providers are supported: "aws" and "local". The kmsProviders map values differ by provider: - `aws`: Map with "accessKeyId" and "secretAccessKey" as strings. These are the AWS access key ID and AWS secret access key used to generate KMS messages. An optional "sessionToken" may be included to support temporary AWS credentials. - `azure`: Map with "tenantId", "clientId", and "clientSecret" as strings. Additionally, "identityPlatformEndpoint" may also be specified as a string (defaults to 'login.microsoftonline.com'). These are the Azure Active Directory credentials used to generate Azure Key Vault messages. - `gcp`: Map with "email" as a string and "privateKey" as `bytes` or a base64 encoded string. Additionally, "endpoint" may also be specified as a string (defaults to 'oauth2.googleapis.com'). These are the credentials used to generate Google Cloud KMS messages. - `local`: Map with "key" as `bytes` (96 bytes in length) or a base64 encoded string which decodes to 96 bytes. "key" is the master key used to encrypt/decrypt data keys. This key should be generated and stored as securely as possible. - `key_vault_namespace`: The namespace for the key vault collection. The key vault collection contains all data keys used for encryption and decryption. Data keys are stored as documents in this MongoDB collection. Data keys are protected with encryption by a KMS provider. - `key_vault_client`: A MongoClient connected to a MongoDB cluster containing the `key_vault_namespace` collection. - `codec_options`: An instance of :class:`~bson.codec_options.CodecOptions` to use when encoding a value for encryption and decoding the decrypted BSON value. This should be the same CodecOptions instance configured on the MongoClient, Database, or Collection used to access application data. .. versionadded:: 3.9 """ if not _HAVE_PYMONGOCRYPT: raise ConfigurationError( "client-side field level encryption requires the pymongocrypt " "library: install a compatible version with: " "python -m pip install 'pymongo[encryption]'") if not isinstance(codec_options, CodecOptions): raise TypeError("codec_options must be an instance of " "bson.codec_options.CodecOptions") self._kms_providers = kms_providers self._key_vault_namespace = key_vault_namespace self._key_vault_client = key_vault_client self._codec_options = codec_options db, coll = key_vault_namespace.split('.', 1) key_vault_coll = key_vault_client[db][coll] self._io_callbacks = _EncryptionIO(None, key_vault_coll, None, None) self._encryption = ExplicitEncrypter( self._io_callbacks, MongoCryptOptions(kms_providers, None))
def _execute_command(self, generator, write_concern, session, sock_info, op_id, retryable, full_result): if sock_info.max_wire_version < 5 and self.uses_collation: raise ConfigurationError( 'Must be connected to MongoDB 3.4+ to use a collation.') if sock_info.max_wire_version < 6 and self.uses_array_filters: raise ConfigurationError( 'Must be connected to MongoDB 3.6+ to use arrayFilters.') db_name = self.collection.database.name client = self.collection.database.client listeners = client._event_listeners if not self.current_run: self.current_run = next(generator) run = self.current_run # sock_info.command validates the session, but we use # sock_info.write_command. sock_info.validate_session(client, session) while run: cmd = SON([(_COMMANDS[run.op_type], self.collection.name), ('ordered', self.ordered)]) if write_concern.document: cmd['writeConcern'] = write_concern.document if self.bypass_doc_val and sock_info.max_wire_version >= 4: cmd['bypassDocumentValidation'] = True if session: cmd['lsid'] = session._use_lsid() bwc = _BulkWriteContext(db_name, cmd, sock_info, op_id, listeners, session) results = [] while run.idx_offset < len(run.ops): if session and retryable: cmd['txnNumber'] = session._transaction_id() sock_info.send_cluster_time(cmd, session, client) check_keys = run.op_type == _INSERT ops = islice(run.ops, run.idx_offset, None) # Run as many ops as possible. request_id, msg, to_send = _do_batched_write_command( self.namespace, run.op_type, cmd, ops, check_keys, self.collection.codec_options, bwc) if not to_send: raise InvalidOperation("cannot do an empty bulk write") result = bwc.write_command(request_id, msg, to_send) client._receive_cluster_time(result, session) results.append((run.idx_offset, result)) # We're no longer in a retry once a command succeeds. self.retrying = False if self.ordered and "writeErrors" in result: break run.idx_offset += len(to_send) _merge_command(run, full_result, results) # We're supposed to continue if errors are # at the write concern level (e.g. wtimeout) if self.ordered and full_result['writeErrors']: break # Reset our state self.current_run = run = next(generator, None)
def command(self, dbname, spec, slave_ok=False, read_preference=ReadPreference.PRIMARY, codec_options=DEFAULT_CODEC_OPTIONS, check=True, allowable_errors=None, check_keys=False, read_concern=DEFAULT_READ_CONCERN, write_concern=None, parse_write_concern_error=False, collation=None): """Execute a command or raise ConnectionFailure or OperationFailure. :Parameters: - `dbname`: name of the database on which to run the command - `spec`: a command document as a dict, SON, or mapping object - `slave_ok`: whether to set the SlaveOkay wire protocol bit - `read_preference`: a read preference - `codec_options`: a CodecOptions instance - `check`: raise OperationFailure if there are errors - `allowable_errors`: errors to ignore if `check` is True - `check_keys`: if True, check `spec` for invalid keys - `read_concern`: The read concern for this command. - `write_concern`: The write concern for this command. - `parse_write_concern_error`: Whether to parse the ``writeConcernError`` field in the command response. - `collation`: The collation for this command. """ if self.max_wire_version < 4 and not read_concern.ok_for_legacy: raise ConfigurationError( 'read concern level of %s is not valid ' 'with a max wire version of %d.' % (read_concern.level, self.max_wire_version)) if not (write_concern is None or write_concern.acknowledged or collation is None): raise ConfigurationError( 'Collation is unsupported for unacknowledged writes.') if self.max_wire_version >= 5 and write_concern: spec['writeConcern'] = write_concern.document elif self.max_wire_version < 5 and collation is not None: raise ConfigurationError( 'Must be connected to MongoDB 3.4+ to use a collation.') try: return command(self.sock, dbname, spec, slave_ok, self.is_mongos, read_preference, codec_options, check, allowable_errors, self.address, check_keys, self.listeners, self.max_bson_size, read_concern, parse_write_concern_error=parse_write_concern_error, collation=collation) except OperationFailure: raise # Catch socket.error, KeyboardInterrupt, etc. and close ourselves. except BaseException as error: self._raise_connection_failure(error)
def _check_compat(sock_info): # Older server version don't raise a descriptive error, so we raise # one instead. if not sock_info.max_wire_version >= 6: err_msg = "Database.aggregate() is only supported on MongoDB 3.6+." raise ConfigurationError(err_msg)
def validate_cert_reqs(option, dummy): """No ssl module, raise ConfigurationError.""" raise ConfigurationError("The value of %s is set but can't be " "validated. The ssl module is not available" % (option,))
def execute_no_results(self, sock_info, generator): """Execute all operations, returning no results (w=0). """ if self.uses_collation: raise ConfigurationError( 'Collation is unsupported for unacknowledged writes.') if self.uses_array_filters: raise ConfigurationError( 'arrayFilters is unsupported for unacknowledged writes.') if self.uses_hint: raise ConfigurationError( 'hint is unsupported for unacknowledged writes.') # Cannot have both unacknowledged writes and bypass document validation. if self.bypass_doc_val and sock_info.max_wire_version >= 4: raise OperationFailure("Cannot set bypass_document_validation with" " unacknowledged write concern") # OP_MSG if sock_info.max_wire_version > 5: if self.ordered: return self.execute_command_no_results(sock_info, generator) return self.execute_op_msg_no_results(sock_info, generator) coll = self.collection # If ordered is True we have to send GLE or use write # commands so we can abort on the first error. write_concern = WriteConcern(w=int(self.ordered)) op_id = _randint() next_run = next(generator) while next_run: # An ordered bulk write needs to send acknowledged writes to short # circuit the next run. However, the final message on the final # run can be unacknowledged. run = next_run next_run = next(generator, None) needs_ack = self.ordered and next_run is not None try: if run.op_type == _INSERT: self.execute_insert_no_results( sock_info, run, op_id, needs_ack) elif run.op_type == _UPDATE: for operation in run.ops: doc = operation['u'] check_keys = True if doc and next(iter(doc)).startswith('$'): check_keys = False coll._update( sock_info, operation['q'], doc, operation['upsert'], check_keys, operation['multi'], write_concern=write_concern, op_id=op_id, ordered=self.ordered, bypass_doc_val=self.bypass_doc_val) else: for operation in run.ops: coll._delete(sock_info, operation['q'], not operation['limit'], write_concern, op_id, self.ordered) except OperationFailure: if self.ordered: break
def get_ssl_context(*dummy): """No ssl module, raise ConfigurationError.""" raise ConfigurationError("The ssl module is not available.")
def _authenticate_gssapi(credentials, sock_info): """Authenticate using GSSAPI. """ if not HAVE_KERBEROS: raise ConfigurationError('The "kerberos" module must be ' 'installed to use GSSAPI authentication.') try: username = credentials.username password = credentials.password props = credentials.mechanism_properties # Starting here and continuing through the while loop below - establish # the security context. See RFC 4752, Section 3.1, first paragraph. host = sock_info.address[0] if props.canonicalize_host_name: host = socket.getfqdn(host) service = props.service_name + '@' + host if props.service_realm is not None: service = service + '@' + props.service_realm if password is not None: if _USE_PRINCIPAL: # Note that, though we use unquote_plus for unquoting URI # options, we use quote here. Microsoft's UrlUnescape (used # by WinKerberos) doesn't support +. principal = ":".join((quote(username), quote(password))) result, ctx = kerberos.authGSSClientInit( service, principal, gssflags=kerberos.GSS_C_MUTUAL_FLAG) else: if '@' in username: user, domain = username.split('@', 1) else: user, domain = username, None result, ctx = kerberos.authGSSClientInit( service, gssflags=kerberos.GSS_C_MUTUAL_FLAG, user=user, domain=domain, password=password) else: result, ctx = kerberos.authGSSClientInit( service, gssflags=kerberos.GSS_C_MUTUAL_FLAG) if result != kerberos.AUTH_GSS_COMPLETE: raise OperationFailure('Kerberos context failed to initialize.') try: # pykerberos uses a weird mix of exceptions and return values # to indicate errors. # 0 == continue, 1 == complete, -1 == error # Only authGSSClientStep can return 0. if kerberos.authGSSClientStep(ctx, '') != 0: raise OperationFailure('Unknown kerberos ' 'failure in step function.') # Start a SASL conversation with mongod/s # Note: pykerberos deals with base64 encoded byte strings. # Since mongo accepts base64 strings as the payload we don't # have to use bson.binary.Binary. payload = kerberos.authGSSClientResponse(ctx) cmd = SON([('saslStart', 1), ('mechanism', 'GSSAPI'), ('payload', payload), ('autoAuthorize', 1)]) response = sock_info.command('$external', cmd) # Limit how many times we loop to catch protocol / library issues for _ in range(10): result = kerberos.authGSSClientStep(ctx, str(response['payload'])) if result == -1: raise OperationFailure('Unknown kerberos ' 'failure in step function.') payload = kerberos.authGSSClientResponse(ctx) or '' cmd = SON([('saslContinue', 1), ('conversationId', response['conversationId']), ('payload', payload)]) response = sock_info.command('$external', cmd) if result == kerberos.AUTH_GSS_COMPLETE: break else: raise OperationFailure('Kerberos ' 'authentication failed to complete.') # Once the security context is established actually authenticate. # See RFC 4752, Section 3.1, last two paragraphs. if kerberos.authGSSClientUnwrap(ctx, str(response['payload'])) != 1: raise OperationFailure('Unknown kerberos ' 'failure during GSS_Unwrap step.') if kerberos.authGSSClientWrap(ctx, kerberos.authGSSClientResponse(ctx), username) != 1: raise OperationFailure('Unknown kerberos ' 'failure during GSS_Wrap step.') payload = kerberos.authGSSClientResponse(ctx) cmd = SON([('saslContinue', 1), ('conversationId', response['conversationId']), ('payload', payload)]) sock_info.command('$external', cmd) finally: kerberos.authGSSClientClean(ctx) except kerberos.KrbError as exc: raise OperationFailure(str(exc))
def parse_uri(uri, default_port=DEFAULT_PORT, validate=True, warn=False, normalize=True, connect_timeout=None): """Parse and validate a MongoDB URI. Returns a dict of the form:: { 'nodelist': <list of (host, port) tuples>, 'username': <username> or None, 'password': <password> or None, 'database': <database name> or None, 'collection': <collection name> or None, 'options': <dict of MongoDB URI options>, 'fqdn': <fqdn of the MongoDB+SRV URI> or None } If the URI scheme is "mongodb+srv://" DNS SRV and TXT lookups will be done to build nodelist and options. :Parameters: - `uri`: The MongoDB URI to parse. - `default_port`: The port number to use when one wasn't specified for a host in the URI. - `validate` (optional): If ``True`` (the default), validate and normalize all options. Default: ``True``. - `warn` (optional): When validating, if ``True`` then will warn the user then ignore any invalid options or values. If ``False``, validation will error when options are unsupported or values are invalid. Default: ``False``. - `normalize` (optional): If ``True``, convert names of URI options to their internally-used names. Default: ``True``. - `connect_timeout` (optional): The maximum time in milliseconds to wait for a response from the DNS server. .. versionchanged:: 3.9 Added the ``normalize`` parameter. .. versionchanged:: 3.6 Added support for mongodb+srv:// URIs. .. versionchanged:: 3.5 Return the original value of the ``readPreference`` MongoDB URI option instead of the validated read preference mode. .. versionchanged:: 3.1 ``warn`` added so invalid options can be ignored. """ if uri.startswith(SCHEME): is_srv = False scheme_free = uri[SCHEME_LEN:] elif uri.startswith(SRV_SCHEME): if not _HAVE_DNSPYTHON: raise ConfigurationError('The "dnspython" module must be ' 'installed to use mongodb+srv:// URIs') is_srv = True scheme_free = uri[SRV_SCHEME_LEN:] else: raise InvalidURI("Invalid URI scheme: URI must " "begin with '%s' or '%s'" % (SCHEME, SRV_SCHEME)) if not scheme_free: raise InvalidURI("Must provide at least one hostname or IP.") user = None passwd = None dbase = None collection = None options = _CaseInsensitiveDictionary() host_part, _, path_part = scheme_free.partition('/') if not host_part: host_part = path_part path_part = "" if not path_part and '?' in host_part: raise InvalidURI("A '/' is required between " "the host list and any options.") if path_part: dbase, _, opts = path_part.partition('?') if dbase: dbase = unquote_plus(dbase) if '.' in dbase: dbase, collection = dbase.split('.', 1) if _BAD_DB_CHARS.search(dbase): raise InvalidURI('Bad database name "%s"' % dbase) else: dbase = None if opts: options.update(split_options(opts, validate, warn, normalize)) if '@' in host_part: userinfo, _, hosts = host_part.rpartition('@') user, passwd = parse_userinfo(userinfo) else: hosts = host_part if '/' in hosts: raise InvalidURI("Any '/' in a unix domain socket must be" " percent-encoded: %s" % host_part) hosts = unquote_plus(hosts) fqdn = None if is_srv: if options.get('directConnection'): raise ConfigurationError( "Cannot specify directConnection=true with " "%s URIs" % (SRV_SCHEME, )) nodes = split_hosts(hosts, default_port=None) if len(nodes) != 1: raise InvalidURI("%s URIs must include one, " "and only one, hostname" % (SRV_SCHEME, )) fqdn, port = nodes[0] if port is not None: raise InvalidURI("%s URIs must not include a port number" % (SRV_SCHEME, )) # Use the connection timeout. connectTimeoutMS passed as a keyword # argument overrides the same option passed in the connection string. connect_timeout = connect_timeout or options.get("connectTimeoutMS") dns_resolver = _SrvResolver(fqdn, connect_timeout=connect_timeout) nodes = dns_resolver.get_hosts() dns_options = dns_resolver.get_options() if dns_options: parsed_dns_options = split_options(dns_options, validate, warn, normalize) if set(parsed_dns_options) - _ALLOWED_TXT_OPTS: raise ConfigurationError( "Only authSource and replicaSet are supported from DNS") for opt, val in parsed_dns_options.items(): if opt not in options: options[opt] = val if "ssl" not in options: options["ssl"] = True if validate else 'true' else: nodes = split_hosts(hosts, default_port=default_port) if len(nodes) > 1 and options.get('directConnection'): raise ConfigurationError( "Cannot specify multiple hosts with directConnection=true") return { 'nodelist': nodes, 'username': user, 'password': passwd, 'database': dbase, 'collection': collection, 'options': options, 'fqdn': fqdn }
def command(self, dbname, spec, slave_ok=False, read_preference=ReadPreference.PRIMARY, codec_options=DEFAULT_CODEC_OPTIONS, check=True, allowable_errors=None, check_keys=False, read_concern=None, write_concern=None, parse_write_concern_error=False, collation=None, session=None, client=None, retryable_write=False, publish_events=True): """Execute a command or raise an error. :Parameters: - `dbname`: name of the database on which to run the command - `spec`: a command document as a dict, SON, or mapping object - `slave_ok`: whether to set the SlaveOkay wire protocol bit - `read_preference`: a read preference - `codec_options`: a CodecOptions instance - `check`: raise OperationFailure if there are errors - `allowable_errors`: errors to ignore if `check` is True - `check_keys`: if True, check `spec` for invalid keys - `read_concern`: The read concern for this command. - `write_concern`: The write concern for this command. - `parse_write_concern_error`: Whether to parse the ``writeConcernError`` field in the command response. - `collation`: The collation for this command. - `session`: optional ClientSession instance. - `client`: optional MongoClient for gossipping $clusterTime. - `retryable_write`: True if this command is a retryable write. - `publish_events`: Should we publish events for this command? """ self.validate_session(client, session) if (read_concern and self.max_wire_version < 4 and not read_concern.ok_for_legacy): raise ConfigurationError( 'read concern level of %s is not valid ' 'with a max wire version of %d.' % (read_concern.level, self.max_wire_version)) if not (write_concern is None or write_concern.acknowledged or collation is None): raise ConfigurationError( 'Collation is unsupported for unacknowledged writes.') if self.max_wire_version >= 5 and write_concern: spec['writeConcern'] = write_concern.document elif self.max_wire_version < 5 and collation is not None: raise ConfigurationError( 'Must be connected to MongoDB 3.4+ to use a collation.') if (client or session) and not isinstance(spec, ORDERED_TYPES): # Ensure command name remains in first place. spec = SON(spec) if session: spec['lsid'] = session._use_lsid() if retryable_write: spec['txnNumber'] = session._transaction_id() self.send_cluster_time(spec, session, client) listeners = self.listeners if publish_events else None try: return command(self.sock, dbname, spec, slave_ok, self.is_mongos, read_preference, codec_options, session, client, check, allowable_errors, self.address, check_keys, listeners, self.max_bson_size, read_concern, parse_write_concern_error=parse_write_concern_error, collation=collation) except OperationFailure: raise # Catch socket.error, KeyboardInterrupt, etc. and close ourselves. except BaseException as error: self._raise_connection_failure(error)
def configure(self, proto): """ Configures the protocol using the information gathered from the remote Mongo instance. Such information may contain the max BSON document size, replica set configuration, and the master status of the instance. """ if not proto: defer.returnValue(None) query = Query(collection="admin.$cmd", query={"ismaster": 1}) reply = yield proto.send_QUERY(query) # Handle the reply from the "ismaster" query. The reply contains # configuration information about the peer. # Make sure we got a result document. if len(reply.documents) != 1: raise OperationFailure("TxMongo: invalid document length.") # Get the configuration document from the reply. config = reply.documents[0].decode() # Make sure the command was successful. if not config.get("ok"): code = config.get("code") msg = "TxMongo: " + config.get("err", "Unknown error") raise OperationFailure(msg, code) # Check that the replicaSet matches. set_name = config.get("setName") expected_set_name = self.uri["options"].get("replicaset") if expected_set_name and (expected_set_name != set_name): # Log the invalid replica set failure. msg = "TxMongo: Mongo instance does not match requested replicaSet." raise ConfigurationError(msg) # Track max bson object size limit. proto.max_bson_size = config.get("maxBsonObjectSize", DEFAULT_MAX_BSON_SIZE) proto.max_write_batch_size = config.get("maxWriteBatchSize", DEFAULT_MAX_WRITE_BATCH_SIZE) proto.set_wire_versions(config.get("minWireVersion", 0), config.get("maxWireVersion", 0)) # Track the other hosts in the replica set. hosts = config.get("hosts") if isinstance(hosts, list) and hosts: for host in hosts: if ':' not in host: host = (host, 27017) else: host = host.split(':', 1) host[1] = int(host[1]) host = tuple(host) if host not in self.__allnodes: self.__allnodes.append(host) # Check if this node is the master. ismaster = config.get("ismaster") if not ismaster: msg = "TxMongo: MongoDB host `%s` is not master." % config.get( 'me') raise AutoReconnect(msg)
def __init__(self, kms_providers, key_vault_namespace, key_vault_client=None, schema_map=None, bypass_auto_encryption=False, mongocryptd_uri='mongodb://localhost:27020', mongocryptd_bypass_spawn=False, mongocryptd_spawn_path='mongocryptd', mongocryptd_spawn_args=None): """Options to configure automatic client-side field level encryption. Automatic client-side field level encryption requires MongoDB 4.2 enterprise or a MongoDB 4.2 Atlas cluster. Automatic encryption is not supported for operations on a database or view and will result in error. Although automatic encryption requires MongoDB 4.2 enterprise or a MongoDB 4.2 Atlas cluster, automatic *decryption* is supported for all users. To configure automatic *decryption* without automatic *encryption* set ``bypass_auto_encryption=True``. Explicit encryption and explicit decryption is also supported for all users with the :class:`~pymongo.encryption.ClientEncryption` class. See :ref:`automatic-client-side-encryption` for an example. :Parameters: - `kms_providers`: Map of KMS provider options. Two KMS providers are supported: "aws" and "local". The kmsProviders map values differ by provider: - `aws`: Map with "accessKeyId" and "secretAccessKey" as strings. These are the AWS access key ID and AWS secret access key used to generate KMS messages. An optional "sessionToken" may be included to support temporary AWS credentials. - `azure`: Map with "tenantId", "clientId", and "clientSecret" as strings. Additionally, "identityPlatformEndpoint" may also be specified as a string (defaults to 'login.microsoftonline.com'). These are the Azure Active Directory credentials used to generate Azure Key Vault messages. - `gcp`: Map with "email" as a string and "privateKey" as `bytes` or a base64 encoded string (unicode on Python 2). Additionally, "endpoint" may also be specified as a string (defaults to 'oauth2.googleapis.com'). These are the credentials used to generate Google Cloud KMS messages. - `local`: Map with "key" as `bytes` (96 bytes in length) or a base64 encoded string (unicode on Python 2) which decodes to 96 bytes. "key" is the master key used to encrypt/decrypt data keys. This key should be generated and stored as securely as possible. - `key_vault_namespace`: The namespace for the key vault collection. The key vault collection contains all data keys used for encryption and decryption. Data keys are stored as documents in this MongoDB collection. Data keys are protected with encryption by a KMS provider. - `key_vault_client` (optional): By default the key vault collection is assumed to reside in the same MongoDB cluster as the encrypted MongoClient. Use this option to route data key queries to a separate MongoDB cluster. - `schema_map` (optional): Map of collection namespace ("db.coll") to JSON Schema. By default, a collection's JSONSchema is periodically polled with the listCollections command. But a JSONSchema may be specified locally with the schemaMap option. **Supplying a `schema_map` provides more security than relying on JSON Schemas obtained from the server. It protects against a malicious server advertising a false JSON Schema, which could trick the client into sending unencrypted data that should be encrypted.** Schemas supplied in the schemaMap only apply to configuring automatic encryption for client side encryption. Other validation rules in the JSON schema will not be enforced by the driver and will result in an error. - `bypass_auto_encryption` (optional): If ``True``, automatic encryption will be disabled but automatic decryption will still be enabled. Defaults to ``False``. - `mongocryptd_uri` (optional): The MongoDB URI used to connect to the *local* mongocryptd process. Defaults to ``'mongodb://localhost:27020'``. - `mongocryptd_bypass_spawn` (optional): If ``True``, the encrypted MongoClient will not attempt to spawn the mongocryptd process. Defaults to ``False``. - `mongocryptd_spawn_path` (optional): Used for spawning the mongocryptd process. Defaults to ``'mongocryptd'`` and spawns mongocryptd from the system path. - `mongocryptd_spawn_args` (optional): A list of string arguments to use when spawning the mongocryptd process. Defaults to ``['--idleShutdownTimeoutSecs=60']``. If the list does not include the ``idleShutdownTimeoutSecs`` option then ``'--idleShutdownTimeoutSecs=60'`` will be added. .. versionadded:: 3.9 """ if not _HAVE_PYMONGOCRYPT: raise ConfigurationError( "client side encryption requires the pymongocrypt library: " "install a compatible version with: " "python -m pip install 'pymongo[encryption]'") self._kms_providers = kms_providers self._key_vault_namespace = key_vault_namespace self._key_vault_client = key_vault_client self._schema_map = schema_map self._bypass_auto_encryption = bypass_auto_encryption self._mongocryptd_uri = mongocryptd_uri self._mongocryptd_bypass_spawn = mongocryptd_bypass_spawn self._mongocryptd_spawn_path = mongocryptd_spawn_path self._mongocryptd_spawn_args = (copy.copy(mongocryptd_spawn_args) or ['--idleShutdownTimeoutSecs=60']) if not isinstance(self._mongocryptd_spawn_args, list): raise TypeError('mongocryptd_spawn_args must be a list') if not any('idleShutdownTimeoutSecs' in s for s in self._mongocryptd_spawn_args): self._mongocryptd_spawn_args.append('--idleShutdownTimeoutSecs=60')
hosts.update([_partition_node(h) for h in response["hosts"]]) if "passives" in response: hosts.update([_partition_node(h) for h in response["passives"]]) except (ConnectionFailure, socket.error), why: if mongo: mongo['pool'].discard_socket() errors.append("%s:%d: %s" % (node[0], node[1], str(why))) if hosts: self.__hosts = hosts break else: if errors: raise AutoReconnect(', '.join(errors)) raise ConfigurationError('No suitable hosts found') self.__update_pools() def __check_is_primary(self, host): """Checks if this host is the primary for the replica set. """ try: mongo = None if host in self.__pools: mongo = self.__pools[host] sock = self.__socket(mongo) res = self.__simple_command(sock, 'admin', {'ismaster': 1}) else: res, conn = self.__is_master(host) bson_max = res.get('maxBsonObjectSize', MAX_BSON_SIZE)
def validate_uuid_subtype(dummy, value): """Validate the uuid subtype option, a numerical value whose acceptable values are defined in bson.binary.""" if value not in _UUID_SUBTYPES.values(): raise ConfigurationError("Not a valid setting for uuid_subtype.") return value
def validate_auth_mechanism(option, value): """Validate the authMechanism URI option. """ if value not in MECHANISMS: raise ConfigurationError("%s must be in " "%s" % (option, MECHANISMS)) return value
def __init__(self, hosts_or_uri=None, max_pool_size=None, document_class=dict, tz_aware=False, **kwargs): """Create a new connection to a MongoDB replica set. .. warning:: **DEPRECATED:** :class:`ReplicaSetConnection` is deprecated. Please use :class:`~pymongo.mongo_replica_set_client.MongoReplicaSetClient` instead The resultant connection object has connection-pooling built in. It also performs auto-reconnection when necessary. If an operation fails because of a connection error, :class:`~pymongo.errors.ConnectionFailure` is raised. If auto-reconnection will be performed, :class:`~pymongo.errors.AutoReconnect` will be raised. Application code should handle this exception (recognizing that the operation failed) and then continue to execute. Raises :class:`~pymongo.errors.ConnectionFailure` if the connection cannot be made. The `hosts_or_uri` parameter can be a full `mongodb URI <http://dochub.mongodb.org/core/connections>`_, in addition to a string of `host:port` pairs (e.g. 'host1:port1,host2:port2'). If `hosts_or_uri` is None 'localhost:27017' will be used. .. note:: Instances of :class:`~ReplicaSetConnection` start a background task to monitor the state of the replica set. This allows it to quickly respond to changes in replica set configuration. Before discarding an instance of :class:`~ReplicaSetConnection` make sure you call :meth:`~close` to ensure that the monitor task is cleanly shut down. :Parameters: - `hosts_or_uri` (optional): A MongoDB URI or string of `host:port` pairs. If a host is an IPv6 literal it must be enclosed in '[' and ']' characters following the RFC2732 URL syntax (e.g. '[::1]' for localhost) - `max_pool_size` (optional): The maximum number of connections each pool will open simultaneously. If this is set, operations will block if there are `max_pool_size` outstanding connections from the pool. By default the pool size is unlimited. - `document_class` (optional): default class to use for documents returned from queries on this connection - `tz_aware` (optional): if ``True``, :class:`~datetime.datetime` instances returned as values in a document by this :class:`ReplicaSetConnection` will be timezone aware (otherwise they will be naive) - `replicaSet`: (required) The name of the replica set to connect to. The driver will verify that each host it connects to is a member of this replica set. Can be passed as a keyword argument or as a MongoDB URI option. | **Other optional parameters can be passed as keyword arguments:** - `host`: For compatibility with connection.Connection. If both `host` and `hosts_or_uri` are specified `host` takes precedence. - `port`: For compatibility with connection.Connection. The default port number to use for hosts. - `network_timeout`: For compatibility with connection.Connection. The timeout (in seconds) to use for socket operations - default is no timeout. If both `network_timeout` and `socketTimeoutMS` are specified `network_timeout` takes precedence, matching connection.Connection. - `socketTimeoutMS`: (integer) How long (in milliseconds) a send or receive on a socket can take before timing out. - `connectTimeoutMS`: (integer) How long (in milliseconds) a connection can take to be opened before timing out. - `auto_start_request`: If ``True`` (the default), each thread that accesses this :class:`ReplicaSetConnection` has a socket allocated to it for the thread's lifetime, for each member of the set. For :class:`~pymongo.read_preferences.ReadPreference` PRIMARY, auto_start_request=True ensures consistent reads, even if you read after an unsafe write. For read preferences other than PRIMARY, there are no consistency guarantees. - `use_greenlets`: if ``True``, use a background Greenlet instead of a background thread to monitor state of replica set. Additionally, :meth:`start_request()` will ensure that the current greenlet uses the same socket for all operations until :meth:`end_request()`. `use_greenlets` with ReplicaSetConnection requires `Gevent <http://gevent.org/>`_ to be installed. | **Write Concern options:** - `safe`: :class:`ReplicaSetConnection` **disables** acknowledgement of write operations. Use ``safe=True`` to enable write acknowledgement. - `w`: (integer or string) Write operations will block until they have been replicated to the specified number or tagged set of servers. `w=<int>` always includes the replica set primary (e.g. w=3 means write to the primary and wait until replicated to **two** secondaries). Implies safe=True. - `wtimeout`: (integer) Used in conjunction with `w`. Specify a value in milliseconds to control how long to wait for write propagation to complete. If replication does not complete in the given timeframe, a timeout exception is raised. Implies safe=True. - `j`: If ``True`` block until write operations have been committed to the journal. Ignored if the server is running without journaling. Implies safe=True. - `fsync`: If ``True`` force the database to fsync all files before returning. When used with `j` the server awaits the next group commit before returning. Implies safe=True. | **Read preference options:** - `slave_okay` or `slaveOk` (deprecated): Use `read_preference` instead. - `read_preference`: The read preference for this connection. See :class:`~pymongo.read_preferences.ReadPreference` for available - `tag_sets`: Read from replica-set members with these tags. To specify a priority-order for tag sets, provide a list of tag sets: ``[{'dc': 'ny'}, {'dc': 'la'}, {}]``. A final, empty tag set, ``{}``, means "read from any member that matches the mode, ignoring tags." :class:`MongoReplicaSetClient` tries each set of tags in turn until it finds a set of tags with at least one matching member. - `secondary_acceptable_latency_ms`: (integer) Any replica-set member whose ping time is within secondary_acceptable_latency_ms of the nearest member may accept reads. Default 15 milliseconds. **Ignored by mongos** and must be configured on the command line. See the localThreshold_ option for more information. | **SSL configuration:** - `ssl`: If ``True``, create the connection to the servers using SSL. - `ssl_keyfile`: The private keyfile used to identify the local connection against mongod. If included with the ``certfile` then only the ``ssl_certfile`` is needed. Implies ``ssl=True``. - `ssl_certfile`: The certificate file used to identify the local connection against mongod. Implies ``ssl=True``. - `ssl_cert_reqs`: Specifies whether a certificate is required from the other side of the connection, and whether it will be validated if provided. It must be one of the three values ``ssl.CERT_NONE`` (certificates ignored), ``ssl.CERT_OPTIONAL`` (not required, but validated if provided), or ``ssl.CERT_REQUIRED`` (required and validated). If the value of this parameter is not ``ssl.CERT_NONE``, then the ``ssl_ca_certs`` parameter must point to a file of CA certificates. Implies ``ssl=True``. - `ssl_ca_certs`: The ca_certs file contains a set of concatenated "certification authority" certificates, which are used to validate certificates passed from the other end of the connection. Implies ``ssl=True``. .. versionchanged:: 2.5 Added additional ssl options .. versionchanged:: 2.3 Added `tag_sets` and `secondary_acceptable_latency_ms` options. .. versionchanged:: 2.2 Added `auto_start_request` and `use_greenlets` options. Added support for `host`, `port`, and `network_timeout` keyword arguments for compatibility with connection.Connection. .. versionadded:: 2.1 .. _localThreshold: http://docs.mongodb.org/manual/reference/mongos/#cmdoption-mongos--localThreshold """ network_timeout = kwargs.pop('network_timeout', None) if network_timeout is not None: if (not isinstance(network_timeout, (int, float)) or network_timeout <= 0): raise ConfigurationError("network_timeout must " "be a positive integer") kwargs['socketTimeoutMS'] = network_timeout * 1000 kwargs['auto_start_request'] = kwargs.get('auto_start_request', True) kwargs['safe'] = kwargs.get('safe', False) super(ReplicaSetConnection, self).__init__( hosts_or_uri, max_pool_size, document_class, tz_aware, **kwargs)
def raise_config_error(key, dummy): """Raise ConfigurationError with the given key name.""" raise ConfigurationError("Unknown option %s" % (key, ))
def __init__(self, pair, max_size, net_timeout, conn_timeout, use_ssl, use_greenlets, ssl_keyfile=None, ssl_certfile=None, ssl_cert_reqs=None, ssl_ca_certs=None, wait_queue_timeout=None, wait_queue_multiple=None, socket_keepalive=False, ssl_match_hostname=True): """ :Parameters: - `pair`: a (hostname, port) tuple - `max_size`: The maximum number of open sockets. Calls to `get_socket` will block if this is set, this pool has opened `max_size` sockets, and there are none idle. Set to `None` to disable. - `net_timeout`: timeout in seconds for operations on open connection - `conn_timeout`: timeout in seconds for establishing connection - `use_ssl`: bool, if True use an encrypted connection - `use_greenlets`: bool, if True then start_request() assigns a socket to the current greenlet - otherwise it is assigned to the current thread - `ssl_keyfile`: The private keyfile used to identify the local connection against mongod. If included with the ``certfile` then only the ``ssl_certfile`` is needed. Implies ``ssl=True``. - `ssl_certfile`: The certificate file used to identify the local connection against mongod. Implies ``ssl=True``. - `ssl_cert_reqs`: Specifies whether a certificate is required from the other side of the connection, and whether it will be validated if provided. It must be one of the three values ``ssl.CERT_NONE`` (certificates ignored), ``ssl.CERT_OPTIONAL`` (not required, but validated if provided), or ``ssl.CERT_REQUIRED`` (required and validated). If the value of this parameter is not ``ssl.CERT_NONE``, then the ``ssl_ca_certs`` parameter must point to a file of CA certificates. Implies ``ssl=True``. - `ssl_ca_certs`: The ca_certs file contains a set of concatenated "certification authority" certificates, which are used to validate certificates passed from the other end of the connection. Implies ``ssl=True``. - `wait_queue_timeout`: (integer) How long (in seconds) a thread will wait for a socket from the pool if the pool has no free sockets. - `wait_queue_multiple`: (integer) Multiplied by max_pool_size to give the number of threads allowed to wait for a socket at one time. - `socket_keepalive`: (boolean) Whether to send periodic keep-alive packets on connected sockets. Defaults to ``False`` (do not send keep-alive packets). - `ssl_match_hostname`: If ``True`` (the default), and `ssl_cert_reqs` is not ``ssl.CERT_NONE``, enables hostname verification using the :func:`~ssl.match_hostname` function from python's :mod:`~ssl` module. Think very carefully before setting this to ``False`` as that could make your application vulnerable to man-in-the-middle attacks. """ # Only check a socket's health with _closed() every once in a while. # Can override for testing: 0 to always check, None to never check. self._check_interval_seconds = 1 self.sockets = set() self.lock = threading.Lock() # Keep track of resets, so we notice sockets created before the most # recent reset and close them. self.pool_id = 0 self.pid = os.getpid() self.pair = pair self.max_size = max_size self.net_timeout = net_timeout self.conn_timeout = conn_timeout self.wait_queue_timeout = wait_queue_timeout self.wait_queue_multiple = wait_queue_multiple self.socket_keepalive = socket_keepalive self.use_ssl = use_ssl self.ssl_keyfile = ssl_keyfile self.ssl_certfile = ssl_certfile self.ssl_cert_reqs = ssl_cert_reqs self.ssl_ca_certs = ssl_ca_certs self.ssl_match_hostname = ssl_match_hostname if HAS_SSL and use_ssl and not ssl_cert_reqs: self.ssl_cert_reqs = ssl.CERT_NONE # Map self._ident.get() -> request socket self._tid_to_sock = {} if use_greenlets and not thread_util.have_gevent: raise ConfigurationError("The Gevent module is not available. " "Install the gevent package from PyPI.") self._ident = thread_util.create_ident(use_greenlets) # Count the number of calls to start_request() per thread or greenlet self._request_counter = thread_util.Counter(use_greenlets) if self.wait_queue_multiple is None or self.max_size is None: max_waiters = None else: max_waiters = self.max_size * self.wait_queue_multiple self._socket_semaphore = thread_util.create_semaphore( self.max_size, max_waiters, use_greenlets)
def __init__(self, host=None, port=None, document_class=dict, tz_aware=False, connect=True, **kwargs): """Client for a MongoDB instance, a replica set, or a set of mongoses. The client object is thread-safe and has connection-pooling built in. If an operation fails because of a network error, :class:`~pymongo.errors.ConnectionFailure` is raised and the client reconnects in the background. Application code should handle this exception (recognizing that the operation failed) and then continue to execute. The `host` parameter can be a full `mongodb URI <http://dochub.mongodb.org/core/connections>`_, in addition to a simple hostname. It can also be a list of hostnames or URIs. Any port specified in the host string(s) will override the `port` parameter. If multiple mongodb URIs containing database or auth information are passed, the last database, username, and password present will be used. For username and passwords reserved characters like ':', '/', '+' and '@' must be escaped following RFC 2396. .. warning:: When using PyMongo in a multiprocessing context, please read :ref:`multiprocessing` first. :Parameters: - `host` (optional): hostname or IP address of the instance to connect to, or a mongodb URI, or a list of hostnames / mongodb URIs. If `host` is an IPv6 literal it must be enclosed in '[' and ']' characters following the RFC2732 URL syntax (e.g. '[::1]' for localhost) - `port` (optional): port number on which to connect - `document_class` (optional): default class to use for documents returned from queries on this client - `tz_aware` (optional): if ``True``, :class:`~datetime.datetime` instances returned as values in a document by this :class:`MongoClient` will be timezone aware (otherwise they will be naive) - `connect` (optional): if ``True`` (the default), immediately begin connecting to MongoDB in the background. Otherwise connect on the first operation. | **Other optional parameters can be passed as keyword arguments:** - `maxPoolSize` (optional): The maximum number of connections that the pool will open simultaneously. If this is set, operations will block if there are `maxPoolSize` outstanding connections from the pool. Defaults to 100. Cannot be 0. - `socketTimeoutMS`: (integer or None) Controls how long (in milliseconds) the driver will wait for a response after sending an ordinary (non-monitoring) database operation before concluding that a network error has occurred. Defaults to ``None`` (no timeout). - `connectTimeoutMS`: (integer or None) Controls how long (in milliseconds) the driver will wait during server monitoring when connecting a new socket to a server before concluding the server is unavailable. Defaults to ``20000`` (20 seconds). - `serverSelectionTimeoutMS`: (integer) Controls how long (in milliseconds) the driver will wait to find an available, appropriate server to carry out a database operation; while it is waiting, multiple server monitoring operations may be carried out, each controlled by `connectTimeoutMS`. Defaults to ``30000`` (30 seconds). - `waitQueueTimeoutMS`: (integer or None) How long (in milliseconds) a thread will wait for a socket from the pool if the pool has no free sockets. Defaults to ``None`` (no timeout). - `waitQueueMultiple`: (integer or None) Multiplied by maxPoolSize to give the number of threads allowed to wait for a socket at one time. Defaults to ``None`` (no limit). - `socketKeepAlive`: (boolean) Whether to send periodic keep-alive packets on connected sockets. Defaults to ``False`` (do not send keep-alive packets). | **Write Concern options:** | (Only set if passed. No default values.) - `w`: (integer or string) If this is a replica set, write operations will block until they have been replicated to the specified number or tagged set of servers. `w=<int>` always includes the replica set primary (e.g. w=3 means write to the primary and wait until replicated to **two** secondaries). Passing w=0 **disables write acknowledgement** and all other write concern options. - `wtimeout`: (integer) Used in conjunction with `w`. Specify a value in milliseconds to control how long to wait for write propagation to complete. If replication does not complete in the given timeframe, a timeout exception is raised. - `j`: If ``True`` block until write operations have been committed to the journal. Cannot be used in combination with `fsync`. Prior to MongoDB 2.6 this option was ignored if the server was running without journaling. Starting with MongoDB 2.6 write operations will fail with an exception if this option is used when the server is running without journaling. - `fsync`: If ``True`` and the server is running without journaling, blocks until the server has synced all data files to disk. If the server is running with journaling, this acts the same as the `j` option, blocking until write operations have been committed to the journal. Cannot be used in combination with `j`. | **Replica set keyword arguments for connecting with a replica set - either directly or via a mongos:** - `replicaSet`: (string or None) The name of the replica set to connect to. The driver will verify that all servers it connects to match this name. Implies that the hosts specified are a seed list and the driver should attempt to find all members of the set. Defaults to ``None``. - `read_preference`: The read preference for this client. If connecting directly to a secondary then a read preference mode *other* than PRIMARY is required - otherwise all queries will throw :class:`~pymongo.errors.AutoReconnect` "not master". See :class:`~pymongo.read_preferences.ReadPreference` for all available read preference options. Defaults to ``PRIMARY``. | **SSL configuration:** - `ssl`: If ``True``, create the connection to the server using SSL. Defaults to ``False``. - `ssl_keyfile`: The private keyfile used to identify the local connection against mongod. If included with the ``certfile`` then only the ``ssl_certfile`` is needed. Implies ``ssl=True``. Defaults to ``None``. - `ssl_certfile`: The certificate file used to identify the local connection against mongod. Implies ``ssl=True``. Defaults to ``None``. - `ssl_cert_reqs`: Specifies whether a certificate is required from the other side of the connection, and whether it will be validated if provided. It must be one of the three values ``ssl.CERT_NONE`` (certificates ignored), ``ssl.CERT_OPTIONAL`` (not required, but validated if provided), or ``ssl.CERT_REQUIRED`` (required and validated). If the value of this parameter is not ``ssl.CERT_NONE`` and a value is not provided for ``ssl_ca_certs`` PyMongo will attempt to load system provided CA certificates. If the python version in use does not support loading system CA certificates then the ``ssl_ca_certs`` parameter must point to a file of CA certificates. Implies ``ssl=True``. Defaults to ``ssl.CERT_REQUIRED`` if not provided and ``ssl=True``. - `ssl_ca_certs`: The ca_certs file contains a set of concatenated "certification authority" certificates, which are used to validate certificates passed from the other end of the connection. Implies ``ssl=True``. Defaults to ``None``. - `ssl_match_hostname`: If ``True`` (the default), and `ssl_cert_reqs` is not ``ssl.CERT_NONE``, enables hostname verification using the :func:`~ssl.match_hostname` function from python's :mod:`~ssl` module. Think very carefully before setting this to ``False`` as that could make your application vulnerable to man-in-the-middle attacks. .. mongodoc:: connections .. versionchanged:: 3.0 :class:`~pymongo.mongo_client.MongoClient` is now the one and only client class for a standalone server, mongos, or replica set. It includes the functionality that had been split into :class:`~pymongo.mongo_client.MongoReplicaSetClient`: it can connect to a replica set, discover all its members, and monitor the set for stepdowns, elections, and reconfigs. The :class:`~pymongo.mongo_client.MongoClient` constructor no longer blocks while connecting to the server or servers, and it no longer raises :class:`~pymongo.errors.ConnectionFailure` if they are unavailable, nor :class:`~pymongo.errors.ConfigurationError` if the user's credentials are wrong. Instead, the constructor returns immediately and launches the connection process on background threads. Therefore the ``alive`` method is removed since it no longer provides meaningful information; even if the client is disconnected, it may discover a server in time to fulfill the next operation. In PyMongo 2.x, :class:`~pymongo.MongoClient` accepted a list of standalone MongoDB servers and used the first it could connect to:: MongoClient(['host1.com:27017', 'host2.com:27017']) A list of multiple standalones is no longer supported; if multiple servers are listed they must be members of the same replica set, or mongoses in the same sharded cluster. The behavior for a list of mongoses is changed from "high availability" to "load balancing". Before, the client connected to the lowest-latency mongos in the list, and used it until a network error prompted it to re-evaluate all mongoses' latencies and reconnect to one of them. In PyMongo 3, the client monitors its network latency to all the mongoses continuously, and distributes operations evenly among those with the lowest latency. See :ref:`mongos-load-balancing` for more information. The ``connect`` option is added. The ``start_request``, ``in_request``, and ``end_request`` methods are removed, as well as the ``auto_start_request`` option. The ``copy_database`` method is removed, see the :doc:`copy_database examples </examples/copydb>` for alternatives. The :meth:`MongoClient.disconnect` method is removed; it was a synonym for :meth:`~pymongo.MongoClient.close`. :class:`~pymongo.mongo_client.MongoClient` no longer returns an instance of :class:`~pymongo.database.Database` for attribute names with leading underscores. You must use dict-style lookups instead:: client['__my_database__'] Not:: client.__my_database__ """ if host is None: host = self.HOST if isinstance(host, string_type): host = [host] if port is None: port = self.PORT if not isinstance(port, int): raise TypeError("port must be an instance of int") seeds = set() username = None password = None dbase = None opts = {} for entity in host: if "://" in entity: if entity.startswith("mongodb://"): res = uri_parser.parse_uri(entity, port, warn=True) seeds.update(res["nodelist"]) username = res["username"] or username password = res["password"] or password dbase = res["database"] or dbase opts = res["options"] else: idx = entity.find("://") raise InvalidURI("Invalid URI scheme: " "%s" % (entity[:idx], )) else: seeds.update(uri_parser.split_hosts(entity, port)) if not seeds: raise ConfigurationError("need to specify at least one host") # _pool_class, _monitor_class, and _condition_class are for deep # customization of PyMongo, e.g. Motor. pool_class = kwargs.pop('_pool_class', None) monitor_class = kwargs.pop('_monitor_class', None) condition_class = kwargs.pop('_condition_class', None) keyword_opts = kwargs keyword_opts['document_class'] = document_class keyword_opts['tz_aware'] = tz_aware keyword_opts['connect'] = connect # Validate all keyword options. keyword_opts = dict( common.validate(k, v) for k, v in keyword_opts.items()) opts.update(keyword_opts) self.__options = options = ClientOptions(username, password, dbase, opts) self.__default_database_name = dbase self.__lock = threading.Lock() self.__cursor_manager = CursorManager(self) self.__kill_cursors_queue = [] # Cache of existing indexes used by ensure_index ops. self.__index_cache = {} super(MongoClient, self).__init__(options.codec_options, options.read_preference, options.write_concern) self.__all_credentials = {} creds = options.credentials if creds: self._cache_credentials(creds.source, creds) self._topology_settings = TopologySettings( seeds=seeds, replica_set_name=options.replica_set_name, pool_class=pool_class, pool_options=options.pool_options, monitor_class=monitor_class, condition_class=condition_class, local_threshold_ms=options.local_threshold_ms, server_selection_timeout=options.server_selection_timeout) self._topology = Topology(self._topology_settings) if connect: self._topology.open() def target(): client = self_ref() if client is None: return False # Stop the executor. MongoClient._process_kill_cursors_queue(client) return True executor = periodic_executor.PeriodicExecutor( condition_class=self._topology_settings.condition_class, interval=common.KILL_CURSOR_FREQUENCY, min_interval=0, target=target) # We strongly reference the executor and it weakly references us via # this closure. When the client is freed, stop the executor soon. self_ref = weakref.ref(self, executor.close) self._kill_cursors_executor = executor executor.open()
def send_message_with_response(self, operation, set_slave_okay, all_credentials, listeners, exhaust=False): """Send a message to MongoDB and return a Response object. Can raise ConnectionFailure. :Parameters: - `operation`: A _Query or _GetMore object. - `set_slave_okay`: Pass to operation.get_message. - `all_credentials`: dict, maps auth source to MongoCredential. - `exhaust` (optional): If True, the socket used stays checked out. It is returned along with its Pool in the Response. """ with self.get_socket(all_credentials, exhaust) as sock_info: duration = None publish = listeners.enabled_for_commands if publish: start = datetime.now() use_find_cmd = False if sock_info.max_wire_version >= 4: if not exhaust: use_find_cmd = True elif (isinstance(operation, _Query) and not operation.read_concern.ok_for_legacy): raise ConfigurationError( 'read concern level of %s is not valid ' 'with a max wire version of %d.' % (operation.read_concern.level, sock_info.max_wire_version)) message = operation.get_message(set_slave_okay, sock_info.is_mongos, use_find_cmd) request_id, data, max_doc_size = self._split_message(message) if publish: encoding_duration = datetime.now() - start cmd, dbn = operation.as_command() listeners.publish_command_start(cmd, dbn, request_id, sock_info.address) start = datetime.now() try: sock_info.send_message(data, max_doc_size) response_data = sock_info.receive_message(1, request_id) except Exception as exc: if publish: duration = (datetime.now() - start) + encoding_duration failure = _convert_exception(exc) listeners.publish_command_failure(duration, failure, next(iter(cmd)), request_id, sock_info.address) raise if publish: duration = (datetime.now() - start) + encoding_duration if exhaust: return ExhaustResponse(data=response_data, address=self._description.address, socket_info=sock_info, pool=self._pool, duration=duration, request_id=request_id, from_command=use_find_cmd) else: return Response(data=response_data, address=self._description.address, duration=duration, request_id=request_id, from_command=use_find_cmd)