def do_ensure_full_commit(self, request): """ TODO: We should pass the instance_start_time back CouchDB uses this to ensure that the target database didn't crash between checkpoints during replication. In our case, we strip it from here and the /db info response. This causes replication to see undefined in both cases, which always matches. This is safe so long as the shard configuration does not change and we use delayed_commits=false or a battery-backed cache on every node. The more general solution is to request all available information by contacting all replicas of all shards and forcing replication to retry from the last checkpoint any time the instance_start_time vector does not match. However, we don't want to do this until we feel confident that we can store checkpoints in a redundant way. This means opening up the read repain can of worms or making smartproxy a real replication middle-man with its own database for storing and replicating cluster-wide checkpoint logs. """ def finish_request(results): request.setResponseCode(201) request.setHeader('Content-Length', 12) request.write('{"ok":true}\n') request.finish() database, rest = request.path[1:].split('/', 1) shards = self.conf_data.shards(database) deferred = defer.DeferredList( [getPageFromAny([ (shard_idx, '/'.join([shard_uri, rest]), [], { 'method': 'POST', 'headers': request.getAllHeaders() }) for shard_uri in self.conf_data.nodes(shard)]) for shard_idx, shard in enumerate(shards)], fireOnOneErrback=1, fireOnOneCallback=0, consumeErrors=1) deferred.addCallback(finish_request) deferred.addErrback(make_errback(request)) return server.NOT_DONE_YET
def do_missing_revs(self, request): def finish_missing_revs(results): def combine_results(acc, shard_result): # DeferredList packs success as (True, result) shard_result = shard_result[1] # getPageFromAny packs result as (result, identifier, factory) shard_result, shard_idx, factory = shard_result acc.update(cjson.decode(shard_result)['missing_revs']) return acc all_results = reduce(combine_results, results, dict()) output = {'missing_revs': all_results} log.msg(cjson.encode(output) + '\n') request.write(cjson.encode(output) + '\n') request.finish() database, rest = request.path[1:].split('/', 1) shards = self.conf_data.shards(database) #sort the docs into shard buckets by hashing the keys numShards = len(shards) shardContent = [{} for x in shards] body = get_body(request, {}) for doc_id in body: where = which_shard(lounge_hash(doc_id), numShards) shardContent[where][doc_id] = body[doc_id] deferred = defer.DeferredList( [getPageFromAny([ (shard_idx, "/".join([shard_uri, rest]), [], { 'method': 'POST', 'postdata': cjson.encode(shardContent[shard_idx])}) for shard_uri in self.conf_data.nodes(shard)]) for shard_idx, shard in enumerate(shards)], fireOnOneErrback=1, fireOnOneCallback=0, consumeErrors=1) deferred.addCallback(finish_missing_revs) deferred.addErrback(make_errback(request)) return server.NOT_DONE_YET
def get_db(self, request): """Get general information about a database.""" # chop off the leading / db_name = request.uri.strip('/') # fold function to reduce the sharded results def fold_results_fun(acc, result): result, shard_idx = result #packed by DeferredList result, node_idx, factory = result #packed by getPageFromAny result = cjson.decode(result) acc['doc_count'] += result['doc_count'] acc['doc_del_count'] += result['doc_del_count'] acc['update_seq'][str(shard_idx)] = {str(node_idx): result['update_seq']} acc['purge_seq'][str(shard_idx)] = {str(node_idx): result['purge_seq']} acc['compact_running'].append(result['compact_running']) acc['disk_size'] += result['disk_size'] return acc # success callback def finish_request(results): # results looks like (True, result) since we get here only if all succeeed # reduce over these results with fold_results_fun to produce output output = reduce(fold_results_fun, itertools.izip(itertools.imap(lambda x: x[1], results), # pull out result itertools.count()), {'db_name': db_name, 'doc_count': 0, 'doc_del_count': 0, 'update_seq': {}, 'purge_seq': {}, 'compact_running': [], 'disk_size': 0}) # encode the sequence information output['update_seq'] = changes.encode_seq(output['update_seq']) output['purge_seq'] = changes.encode_seq(output['purge_seq']) request.write(cjson.encode(output) + '\n') request.finish() # error callback def handle_error(reason): reason = reason.value.subFailure # unpack FirstError from DeferredList # Nest try because python 2.4 doesn't fully support try-except-finally try: try: reason.trap(error.Error) # trap http error from subrequest request.setResponseCode(int(reason.value.status)) request.write(reason.value.response) except: # if we get back some non-http response type error, we should # return 500 request.setResponseCode(http.INTERNAL_SERVER_ERROR) reason.raiseException() finally: request.finish() # construct a DeferredList of the deferred sub-requests # fetches shard results from any replica of each shard # if any shard fails completely the whole thing fails fast nodes = self.conf_data.nodelist deferred = defer.DeferredList( # map over all the shards and get a deferred that handles fail-over map(lambda s, rl: getPageFromAny( # create the upstream descriptions by mapping over the replica list itertools.imap(lambda r: (r, # upstream identifier "http://%s:%s/%s%d" #url % (nodes[r][0], nodes[r][1], db_name, s), [], # factory args {}), # factor kwargs rl)), xrange(len(self.conf_data.shardmap)), self.conf_data.shardmap), fireOnOneErrback=1, consumeErrors=1).addCallbacks(finish_request, handle_error) return server.NOT_DONE_YET
def get_db(self, request): """Get general information about a database.""" # chop off the leading / db_name = request.uri.strip('/') # fold function to reduce the sharded results def fold_results_fun(acc, result): result, idx = result #packed by DeferredList result, rep_id, factory = result #packed by getPageFromAny shard, rep_idx = rep_id shard_idx = self.conf_data.get_index_from_shard(shard) node_idx = self.conf_data.shardmap[shard_idx][rep_idx] result = cjson.decode(result) acc['doc_count'] += result['doc_count'] acc['doc_del_count'] += result['doc_del_count'] acc['update_seq'][str(shard_idx)] = {str(node_idx): result['update_seq']} acc['purge_seq'][str(shard_idx)] = {str(node_idx): result['purge_seq']} acc['compact_running'].append(result['compact_running']) acc['disk_size'] += result['disk_size'] return acc # success callback def finish_request(results): # results looks like (True, result) since we get here only if all succeeed # reduce over these results with fold_results_fun to produce output output = reduce(fold_results_fun, itertools.izip(itertools.imap(lambda x: x[1], results), # pull out result itertools.count()), {'db_name': db_name, 'doc_count': 0, 'doc_del_count': 0, 'update_seq': {}, 'purge_seq': {}, 'compact_running': [], 'disk_size': 0}) # encode the sequence information output['update_seq'] = changes.encode_seq(output['update_seq']) output['purge_seq'] = changes.encode_seq(output['purge_seq']) request.write(cjson.encode(output) + '\n') request.finish() # construct a DeferredList of the deferred sub-requests # fetches shard results from any replica of each shard # if any shard fails completely the whole thing fails fast deferred = defer.DeferredList( # map over all the shards and get a deferred that handles fail-over map(lambda shard: getPageFromAny( # create the upstream descriptions itertools.imap( lambda rep_idx, rep_uri: ((shard, rep_idx), # upstream identifier rep_uri, # url [], # factory args {}), # factor kwargs *zip(*enumerate(self.conf_data.nodes(shard))))), self.conf_data.shards(db_name)), fireOnOneErrback=1, consumeErrors=1) deferred.addCallback(finish_request) deferred.addErrback(make_errback(request)) return server.NOT_DONE_YET