def _get_collection_for_targetted_upsert( collection_name, query, update, with_options={}): shard_key = _get_query_target(collection_name, update) if not shard_key: shard_key = _get_query_target(collection_name, update['$set']) realm = _get_realm_for_collection(collection_name) location = _get_location_for_shard(realm, shard_key) cluster_name, database_name = parse_location(location.location) connection = get_connection(cluster_name) collection = connection[database_name][collection_name] if with_options: collection = collection.with_options(with_options) return collection
def _create_collection_iterator(collection_name, query, with_options={}, log_untargetted_queries=True): """Creates an iterator that returns collections and queries that can then be used to perform multishard operations: for collection, query, location in _create_collection_iterator(...): for doc in collection.find(query): yield doc This does all the hardwork of figuring out what collections to query and how to adjust the query to account for any shards that are currently moving. """ realm = _get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shard_key = _get_query_target(collection_name, query) if shard_key: location = _get_location_for_shard(realm, shard_key) locations = {location.location: location} else: locations = _get_all_locations_for_realm(realm) global untargetted_query_callback if untargetted_query_callback and log_untargetted_queries: untargetted_query_callback(collection_name, query) for location, location_meta in locations.iteritems(): cluster_name, database_name = parse_location(location) connection = get_connection(cluster_name) collection = connection[database_name][collection_name] if with_options: collection = collection.with_options(**with_options) if location_meta.excludes: if len(location_meta.excludes) == 1: query = { '$and': [query, { shard_field: { '$ne': location_meta.excludes[0] } }] } else: raise Exception('Multiple shards in transit. Aborting') yield collection, query, location if location_meta.excludes: query = query['$and'][0]
def _get_collection_for_targetted_upsert(collection_name, query, update, with_options={}): shard_key = _get_query_target(collection_name, update) if not shard_key: shard_key = _get_query_target(collection_name, update['$set']) realm = _get_realm_for_collection(collection_name) location = _get_location_for_shard(realm, shard_key) cluster_name, database_name = parse_location(location.location) connection = get_connection(cluster_name) collection = connection[database_name][collection_name] if with_options: collection = collection.with_options(with_options) return collection
def _create_collection_iterator(collection_name, query, with_options={}): """Creates an iterator that returns collections and queries that can then be used to perform multishard operations: for collection, query in _create_collection_iterator(...): for doc in collection.find(query): yield doc This does all the hardwork of figuring out what collections to query and how to adjust the query to account for any shards that are currently moving. """ realm = _get_realm_for_collection(collection_name) shard_field = realm['shard_field'] shard_key = _get_query_target(collection_name, query) if shard_key: location = _get_location_for_shard(realm, shard_key) locations = {location.location: location} else: locations = _get_all_locations_for_realm(realm) global untargetted_query_callback if untargetted_query_callback: untargetted_query_callback(collection_name, query) for location, location_meta in locations.iteritems(): cluster_name, database_name = parse_location(location) connection = get_connection(cluster_name) collection = connection[database_name][collection_name] if with_options: collection = collection.with_options(**with_options) if location_meta.excludes: if len(location_meta.excludes) == 1: query = {'$and': [ query, {shard_field: {'$ne': location_meta.excludes[0]}}]} else: raise Exception('Multiple shards in transit. Aborting') yield collection, query if location_meta.excludes: query = query['$and'][0]
def _assert_valid_location(location): cluster_name, _ = parse_location(location) # Attempting to get the URI for a non-existant cluster will throw an # exception get_cluster_uri(cluster_name)
def _get_collection_from_location_string(location, collection_name): server_addr, database_name = parse_location(location) connection = get_connection(server_addr) return connection[database_name][collection_name]