Example #1
0
	def do_ensure_full_commit(self, request):
		"""
		TODO: We should pass the instance_start_time back
		CouchDB uses this to ensure that the target database
		didn't crash between checkpoints during replication.

		In our case, we strip it from here and the /db info
		response. This causes replication to see undefined
		in both cases, which always matches.

		This is safe so long as the shard configuration does
		not change and we use delayed_commits=false or a
		battery-backed cache on every node.

		The more general solution is to request all available
		information by contacting all replicas of all shards
		and forcing replication to retry from the last checkpoint
		any time the instance_start_time vector does not match.

		However, we don't want to do this until we feel confident
		that we can store checkpoints in a redundant way. This means
		opening up the read repain can of worms or making smartproxy
		a real replication middle-man with its own database for
		storing and replicating cluster-wide checkpoint logs.
		"""
		def finish_request(results):
			request.setResponseCode(201)
			request.setHeader('Content-Length', 12)
			request.write('{"ok":true}\n')
			request.finish()

		database, rest = request.path[1:].split('/', 1)
		shards = self.conf_data.shards(database)

		deferred = defer.DeferredList(
			[getPageFromAny([
				(shard_idx,
				 '/'.join([shard_uri, rest]),
				 [],
				 { 'method': 'POST',
				   'headers': request.getAllHeaders()
				   })
				for shard_uri in self.conf_data.nodes(shard)])
			 for shard_idx, shard in enumerate(shards)],
			fireOnOneErrback=1,
			fireOnOneCallback=0,
			consumeErrors=1)
		deferred.addCallback(finish_request)
		deferred.addErrback(make_errback(request))

		return server.NOT_DONE_YET
Example #2
0
	def do_missing_revs(self, request):
		def finish_missing_revs(results):
			def combine_results(acc, shard_result):
				# DeferredList packs success as (True, result)
				shard_result = shard_result[1]
				# getPageFromAny packs result as (result, identifier, factory)
				shard_result, shard_idx, factory = shard_result
				acc.update(cjson.decode(shard_result)['missing_revs'])
				return acc
			all_results = reduce(combine_results, results, dict())
			output = {'missing_revs': all_results}
			log.msg(cjson.encode(output) + '\n')
			request.write(cjson.encode(output) + '\n')
			request.finish()

		database, rest = request.path[1:].split('/', 1)
		shards = self.conf_data.shards(database)

		#sort the docs into shard buckets by hashing the keys
		numShards = len(shards)
		shardContent = [{} for x in shards]
		body = get_body(request, {})
		for doc_id in body:
			where = which_shard(lounge_hash(doc_id), numShards)
			shardContent[where][doc_id] = body[doc_id]

		deferred = defer.DeferredList(
			[getPageFromAny([
				(shard_idx,
				 "/".join([shard_uri, rest]),
				 [],
				 { 'method': 'POST',
				   'postdata': cjson.encode(shardContent[shard_idx])})
				for shard_uri in self.conf_data.nodes(shard)])
			 for shard_idx, shard in enumerate(shards)],
			fireOnOneErrback=1,
			fireOnOneCallback=0,
			consumeErrors=1)
		deferred.addCallback(finish_missing_revs)
		deferred.addErrback(make_errback(request))
		
		return server.NOT_DONE_YET
Example #3
0
	def get_db(self, request):
		"""Get general information about a database."""

		# chop off the leading /
		db_name = request.uri.strip('/')

		# fold function to reduce the sharded results
		def fold_results_fun(acc, result):
			result, shard_idx = result             #packed by DeferredList
			result, node_idx, factory = result     #packed by getPageFromAny
			result = cjson.decode(result)
			acc['doc_count'] += result['doc_count']
			acc['doc_del_count'] += result['doc_del_count']
			acc['update_seq'][str(shard_idx)] = {str(node_idx): result['update_seq']}
			acc['purge_seq'][str(shard_idx)] = {str(node_idx): result['purge_seq']}
			acc['compact_running'].append(result['compact_running'])
			acc['disk_size'] += result['disk_size']
			return acc

		# success callback
		def finish_request(results):
			# results looks like (True, result) since we get here only if all succeeed
			# reduce over these results with fold_results_fun to produce output
			output = reduce(fold_results_fun,
					itertools.izip(itertools.imap(lambda x: x[1], results), # pull out result
						       itertools.count()),
					{'db_name': db_name,
					 'doc_count': 0,
					 'doc_del_count': 0,
					 'update_seq': {},
					 'purge_seq': {},
					 'compact_running': [],
					 'disk_size': 0})
			# encode the sequence information
			output['update_seq'] = changes.encode_seq(output['update_seq'])
			output['purge_seq'] = changes.encode_seq(output['purge_seq'])
			request.write(cjson.encode(output) + '\n')
			request.finish()

		# error callback
		def handle_error(reason):
			reason = reason.value.subFailure # unpack FirstError from DeferredList
			# Nest try because python 2.4 doesn't fully support try-except-finally
			try:
				try:
					reason.trap(error.Error) # trap http error from subrequest
					request.setResponseCode(int(reason.value.status))
					request.write(reason.value.response)
				except:
					# if we get back some non-http response type error, we should
					# return 500
					request.setResponseCode(http.INTERNAL_SERVER_ERROR)
					reason.raiseException()
			finally:
				request.finish()
		
		# construct a DeferredList of the deferred sub-requests
		# fetches shard results from any replica of each shard
		# if any shard fails completely the whole thing fails fast
		nodes = self.conf_data.nodelist
		deferred = defer.DeferredList(
			# map over all the shards and get a deferred that handles fail-over
			map(lambda s, rl: getPageFromAny(
					# create the upstream descriptions by mapping over the replica list
					itertools.imap(lambda r:
						       (r,    # upstream identifier
							"http://%s:%s/%s%d" #url
							% (nodes[r][0], nodes[r][1], db_name, s),
							[],   # factory args
							{}),  # factor kwargs
						       rl)),
					xrange(len(self.conf_data.shardmap)),
					self.conf_data.shardmap),
			fireOnOneErrback=1,
			consumeErrors=1).addCallbacks(finish_request, handle_error)

		return server.NOT_DONE_YET
Example #4
0
	def get_db(self, request):
		"""Get general information about a database."""

		# chop off the leading /
		db_name = request.uri.strip('/')

		# fold function to reduce the sharded results
		def fold_results_fun(acc, result):
			result, idx = result                   #packed by DeferredList
			result, rep_id, factory = result       #packed by getPageFromAny
			shard, rep_idx = rep_id
			shard_idx = self.conf_data.get_index_from_shard(shard)
			node_idx = self.conf_data.shardmap[shard_idx][rep_idx]
			result = cjson.decode(result)
			acc['doc_count'] += result['doc_count']
			acc['doc_del_count'] += result['doc_del_count']
			acc['update_seq'][str(shard_idx)] = {str(node_idx): result['update_seq']}
			acc['purge_seq'][str(shard_idx)] = {str(node_idx): result['purge_seq']}
			acc['compact_running'].append(result['compact_running'])
			acc['disk_size'] += result['disk_size']
			return acc

		# success callback
		def finish_request(results):
			# results looks like (True, result) since we get here only if all succeeed
			# reduce over these results with fold_results_fun to produce output
			output = reduce(fold_results_fun,
					itertools.izip(itertools.imap(lambda x: x[1], results), # pull out result
						       itertools.count()),
					{'db_name': db_name,
					 'doc_count': 0,
					 'doc_del_count': 0,
					 'update_seq': {},
					 'purge_seq': {},
					 'compact_running': [],
					 'disk_size': 0})
			# encode the sequence information
			output['update_seq'] = changes.encode_seq(output['update_seq'])
			output['purge_seq'] = changes.encode_seq(output['purge_seq'])
			request.write(cjson.encode(output) + '\n')
			request.finish()
		
		# construct a DeferredList of the deferred sub-requests
		# fetches shard results from any replica of each shard
		# if any shard fails completely the whole thing fails fast
		deferred = defer.DeferredList(
			# map over all the shards and get a deferred that handles fail-over
			map(lambda shard: getPageFromAny(
				# create the upstream descriptions
				itertools.imap(
					lambda rep_idx, rep_uri:
					((shard, rep_idx),    # upstream identifier
					 rep_uri,             # url
					 [],                  # factory args
					 {}),                 # factor kwargs
					*zip(*enumerate(self.conf_data.nodes(shard))))),
				self.conf_data.shards(db_name)),
			fireOnOneErrback=1,
			consumeErrors=1)
		deferred.addCallback(finish_request)
		deferred.addErrback(make_errback(request))

		return server.NOT_DONE_YET