def read_with_timeout(self, size): timeout = Timeout(self.timeout) try: chunk = os.read(self.obj_data, size) except Timeout as t: if t is timeout: self.close() raise t except Exception as e: self.close() raise e finally: timeout.cancel() return chunk
def _call(self, method_name, timeout_sec, **kwargs): LOG.debug("Calling %s" % method_name) timeout = Timeout(timeout_sec) try: result = rpc.call(self.context, self._get_routing_key(), {'method': method_name, 'args': kwargs}) LOG.debug("Result is %s" % result) return result except Exception as e: LOG.error(e) raise exception.GuestError(original_message=str(e)) except Timeout as t: if t is not timeout: raise else: raise exception.GuestTimeout() finally: timeout.cancel()
def poll_response(self, response): """ The API might return a job nr in the response in case of a async response: https://github.com/fog/fog/issues/575 """ status = response.status timeout = Timeout(CONF[CFG_GROUP].job_timeout) try: while status == 307: time.sleep(1) url = response.headers.get('Location') LOG.debug("Polling %s" % url) polled_response = self.get(url) status = response.status except Timeout as t: if t == timeout: raise DynTimeoutError('Timeout reached when pulling job.') finally: timeout.cancel() return polled_response
def _make_req(node, part, method, path, headers, stype, conn_timeout=5, response_timeout=15, send_timeout=15, contents=None, content_length=None, chunk_size=65535): """ Make request to backend storage node. (i.e. 'Account', 'Container', 'Object') :param node: a node dict from a ring :param part: an integer, the partition number :param method: a string, the HTTP method (e.g. 'PUT', 'DELETE', etc) :param path: a string, the request path :param headers: a dict, header name => value :param stype: a string, describing the type of service :param conn_timeout: timeout while waiting for connection; default is 5 seconds :param response_timeout: timeout while waiting for response; default is 15 seconds :param send_timeout: timeout for sending request body; default is 15 seconds :param contents: an iterable or string to read object data from :param content_length: value to send as content-length header :param chunk_size: if defined, chunk size of data to send :returns: an HTTPResponse object :raises DirectClientException: if the response status is not 2xx :raises eventlet.Timeout: if either conn_timeout or response_timeout is exceeded """ if contents is not None: if content_length is not None: headers['Content-Length'] = str(content_length) else: for n, v in headers.items(): if n.lower() == 'content-length': content_length = int(v) if not contents: headers['Content-Length'] = '0' if isinstance(contents, six.string_types): contents = [contents] if content_length is None: headers['Transfer-Encoding'] = 'chunked' ip, port = get_ip_port(node, headers) headers.setdefault('X-Backend-Allow-Reserved-Names', 'true') with Timeout(conn_timeout): conn = http_connect(ip, port, node['device'], part, method, path, headers=headers) if contents is not None: contents_f = FileLikeIter(contents) with Timeout(send_timeout): if content_length is None: chunk = contents_f.read(chunk_size) while chunk: conn.send(b'%x\r\n%s\r\n' % (len(chunk), chunk)) chunk = contents_f.read(chunk_size) conn.send(b'0\r\n\r\n') else: left = content_length while left > 0: size = chunk_size if size > left: size = left chunk = contents_f.read(size) if not chunk: break conn.send(chunk) left -= len(chunk) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if not is_success(resp.status): raise DirectClientException(stype, method, node, part, path, resp) return resp
def account_update(self, req, account, container, broker): """ Update the account server(s) with latest container info. :param req: swob.Request object :param account: account name :param container: container name :param broker: container DB broker object :returns: if all the account requests return a 404 error code, HTTPNotFound response object, if the account cannot be updated due to a malformed header, an HTTPBadRequest response object, otherwise None. """ account_hosts = [ h.strip() for h in req.headers.get('X-Account-Host', '').split(',') ] account_devices = [ d.strip() for d in req.headers.get('X-Account-Device', '').split(',') ] account_partition = req.headers.get('X-Account-Partition', '') if len(account_hosts) != len(account_devices): # This shouldn't happen unless there's a bug in the proxy, # but if there is, we want to know about it. self.logger.error( _('ERROR Account update failed: different ' 'numbers of hosts and devices in request: ' '"%(hosts)s" vs "%(devices)s"') % { 'hosts': req.headers.get('X-Account-Host', ''), 'devices': req.headers.get('X-Account-Device', '') }) return HTTPBadRequest(req=req) if account_partition: # zip is lazy on py3, but we need a list, so force evaluation. # On py2 it's an extra list copy, but the list is so small # (one element per replica in account ring, usually 3) that it # doesn't matter. updates = list(zip(account_hosts, account_devices)) else: updates = [] account_404s = 0 for account_host, account_device in updates: account_ip, account_port = account_host.rsplit(':', 1) new_path = '/' + '/'.join([account, container]) info = broker.get_info() account_headers = HeaderKeyDict({ 'x-put-timestamp': info['put_timestamp'], 'x-delete-timestamp': info['delete_timestamp'], 'x-object-count': info['object_count'], 'x-bytes-used': info['bytes_used'], 'x-trans-id': req.headers.get('x-trans-id', '-'), 'X-Backend-Storage-Policy-Index': info['storage_policy_index'], 'user-agent': 'container-server %s' % os.getpid(), 'referer': req.as_referer() }) if req.headers.get('x-account-override-deleted', 'no').lower() == \ 'yes': account_headers['x-account-override-deleted'] = 'yes' try: with ConnectionTimeout(self.conn_timeout): conn = http_connect(account_ip, account_port, account_device, account_partition, 'PUT', new_path, account_headers) with Timeout(self.node_timeout): account_response = conn.getresponse() account_response.read() if account_response.status == HTTP_NOT_FOUND: account_404s += 1 elif not is_success(account_response.status): self.logger.error( _('ERROR Account update failed ' 'with %(ip)s:%(port)s/%(device)s (will retry ' 'later): Response %(status)s %(reason)s'), { 'ip': account_ip, 'port': account_port, 'device': account_device, 'status': account_response.status, 'reason': account_response.reason }) except (Exception, Timeout): self.logger.exception( _('ERROR account update failed with ' '%(ip)s:%(port)s/%(device)s (will retry later)'), { 'ip': account_ip, 'port': account_port, 'device': account_device }) if updates and account_404s == len(updates): return HTTPNotFound(req=req) else: return None
def wait(self, timeout=None): with Timeout(timeout): return self.ready.wait()
def _get_suffixes_to_sync(self, job, node): """ For SYNC jobs we need to make a remote REPLICATE request to get the remote node's current suffix's hashes and then compare to our local suffix's hashes to decide which suffixes (if any) are out of sync. :param: the job dict, with the keys defined in ``_get_part_jobs`` :param node: the remote node dict :returns: a (possibly empty) list of strings, the suffixes to be synced with the remote node. """ # get hashes from the remote node remote_suffixes = None try: with Timeout(self.http_timeout): resp = http_connect(node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '', headers=self.headers).getresponse() if resp.status == HTTP_INSUFFICIENT_STORAGE: self.logger.error( _('%s responded as unmounted'), _full_path(node, job['partition'], '', job['policy'])) elif resp.status != HTTP_OK: full_path = _full_path(node, job['partition'], '', job['policy']) self.logger.error( _("Invalid response %(resp)s from %(full_path)s"), { 'resp': resp.status, 'full_path': full_path }) else: remote_suffixes = pickle.loads(resp.read()) except (Exception, Timeout): # all exceptions are logged here so that our caller can # safely catch our exception and continue to the next node # without logging self.logger.exception( 'Unable to get remote suffix hashes ' 'from %r' % _full_path(node, job['partition'], '', job['policy'])) if remote_suffixes is None: raise SuffixSyncError('Unable to get remote suffix hashes') suffixes = self.get_suffix_delta(job['hashes'], job['frag_index'], remote_suffixes, node['index']) # now recalculate local hashes for suffixes that don't # match so we're comparing the latest local_suff = self._get_hashes(job['local_dev']['device'], job['partition'], job['policy'], recalculate=suffixes) suffixes = self.get_suffix_delta(local_suff, job['frag_index'], remote_suffixes, node['index']) self.suffix_count += len(suffixes) return suffixes
def run_wsgi(conf_path, app_section, *args, **kwargs): """ Runs the server according to some strategy. The default strategy runs a specified number of workers in pre-fork model. The object-server (only) may use a servers-per-port strategy if its config has a servers_per_port setting with a value greater than zero. :param conf_path: Path to paste.deploy style configuration file/directory :param app_section: App name from conf file to load config from :returns: 0 if successful, nonzero otherwise """ # Load configuration, Set logger and Load request processor try: (conf, logger, log_name) = \ _initrp(conf_path, app_section, *args, **kwargs) except ConfigFileError as e: print(e) return 1 # optional nice/ionice priority scheduling utils.modify_priority(conf, logger) servers_per_port = int(conf.get('servers_per_port', '0') or 0) # NOTE: for now servers_per_port is object-server-only; future work could # be done to test and allow it to be used for account and container # servers, but that has not been done yet. if servers_per_port and app_section == 'object-server': strategy = ServersPerPortStrategy(conf, logger, servers_per_port=servers_per_port) else: strategy = WorkersStrategy(conf, logger) error_msg = strategy.bind_ports() if error_msg: logger.error(error_msg) print(error_msg) return 1 # Ensure the configuration and application can be loaded before proceeding. global_conf = {'log_name': log_name} if 'global_conf_callback' in kwargs: kwargs['global_conf_callback'](conf, global_conf) loadapp(conf_path, global_conf=global_conf) # set utils.FALLOCATE_RESERVE if desired utils.FALLOCATE_RESERVE, utils.FALLOCATE_IS_PERCENT = \ utils.config_fallocate_value(conf.get('fallocate_reserve', '1%')) # redirect errors to logger and close stdio capture_stdio(logger) no_fork_sock = strategy.no_fork_sock() if no_fork_sock: run_server(conf, logger, no_fork_sock, global_conf=global_conf) return 0 def kill_children(*args): """Kills the entire process group.""" logger.error('SIGTERM received') signal.signal(signal.SIGTERM, signal.SIG_IGN) running[0] = False os.killpg(0, signal.SIGTERM) def hup(*args): """Shuts down the server, but allows running requests to complete""" logger.error('SIGHUP received') signal.signal(signal.SIGHUP, signal.SIG_IGN) running[0] = False running = [True] signal.signal(signal.SIGTERM, kill_children) signal.signal(signal.SIGHUP, hup) while running[0]: for sock, sock_info in strategy.new_worker_socks(): pid = os.fork() if pid == 0: signal.signal(signal.SIGHUP, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) strategy.post_fork_hook() run_server(conf, logger, sock) strategy.log_sock_exit(sock, sock_info) return 0 else: strategy.register_worker_start(sock, sock_info, pid) # The strategy may need to pay attention to something in addition to # child process exits (like new ports showing up in a ring). # # NOTE: a timeout value of None will just instantiate the Timeout # object and not actually schedule it, which is equivalent to no # timeout for the green_os.wait(). loop_timeout = strategy.loop_timeout() with Timeout(loop_timeout, exception=False): try: try: pid, status = green_os.wait() if os.WIFEXITED(status) or os.WIFSIGNALED(status): strategy.register_worker_exit(pid) except OSError as err: if err.errno not in (errno.EINTR, errno.ECHILD): raise if err.errno == errno.ECHILD: # If there are no children at all (ECHILD), then # there's nothing to actually wait on. We sleep # for a little bit to avoid a tight CPU spin # and still are able to catch any KeyboardInterrupt # events that happen. The value of 0.01 matches the # value in eventlet's waitpid(). sleep(0.01) except KeyboardInterrupt: logger.notice('User quit') running[0] = False break strategy.shutdown_sockets() logger.notice('Exited') return 0
self.app.logger.exception( _('ERROR Exception causing client disconnect')) return HTTPClientDisconnect(request=req) if req.content_length and bytes_transferred < req.content_length: req.client_disconnect = True self.app.logger.warn( _('Client disconnected without sending enough data')) self.app.logger.increment('client_disconnects') return HTTPClientDisconnect(request=req) statuses = [] reasons = [] bodies = [] etags = set() for conn in conns: try: with Timeout(self.app.node_timeout): response = conn.getresponse() statuses.append(response.status) reasons.append(response.reason) bodies.append(response.read()) if response.status >= HTTP_INTERNAL_SERVER_ERROR: self.error_occurred(conn.node, _('ERROR %(status)d %(body)s From Object Server ' \ 're: %(path)s') % {'status': response.status, 'body': bodies[-1][:1024], 'path': req.path}) elif is_success(response.status): etags.add(response.getheader('etag').strip('"')) except (Exception, Timeout): self.exception_occurred( conn.node, _('Object'), _('Trying to get final status of PUT to %s') % req.path)
def __str__(self): return '%s: %s' % (Timeout.__str__(self), self.msg)
def direct_get_object(node, part, account, container, obj, conn_timeout=5, response_timeout=15, resp_chunk_size=None, headers={}): """ Get object directly from the object server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param obj: object name :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :param resp_chunk_size: if defined, chunk size of data to read. :param headers: dict to be passed into HTTPConnection headers :returns: a tuple of (response headers, the object's contents) The response headers will be a dict and all header names will be lowercase. """ path = '/%s/%s/%s' % (account, container, obj) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'GET', path, headers=headers) with Timeout(response_timeout): resp = conn.getresponse() if resp.status < 200 or resp.status >= 300: resp.read() raise ClientException( 'Object server %s:%s direct GET %s gave status %s' % (node['ip'], node['port'], repr('/%s/%s%s' % (node['device'], part, path)), resp.status), http_host=node['ip'], http_port=node['port'], http_device=node['device'], http_status=resp.status, http_reason=resp.reason) if resp_chunk_size: def _object_body(): buf = resp.read(resp_chunk_size) while buf: yield buf buf = resp.read(resp_chunk_size) object_body = _object_body() else: object_body = resp.read() resp_headers = {} for header, value in resp.getheaders(): resp_headers[header.lower()] = value return resp_headers, object_body
def direct_put_object(node, part, account, container, name, contents, content_length=None, etag=None, content_type=None, headers=None, conn_timeout=5, response_timeout=15, resp_chunk_size=None): """ Put object directly from the object server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param name: object name :param contents: a string to read object data from :param content_length: value to send as content-length header :param etag: etag of contents :param content_type: value to send as content-type header :param headers: additional headers to include in the request :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :param chunk_size: if defined, chunk size of data to send. :returns: etag from the server response """ # TODO: Add chunked puts path = '/%s/%s/%s' % (account, container, name) if headers is None: headers = {} if etag: headers['ETag'] = etag.strip('"') if content_length is not None: headers['Content-Length'] = str(content_length) if content_type is not None: headers['Content-Type'] = content_type else: headers['Content-Type'] = 'application/octet-stream' if not contents: headers['Content-Length'] = '0' headers['X-Timestamp'] = normalize_timestamp(time()) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'PUT', path, headers=headers) conn.send(contents) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if resp.status < 200 or resp.status >= 300: raise ClientException( 'Object server %s:%s direct PUT %s gave status %s' % (node['ip'], node['port'], repr('/%s/%s%s' % (node['device'], part, path)), resp.status), http_host=node['ip'], http_port=node['port'], http_device=node['device'], http_status=resp.status, http_reason=resp.reason) return resp.getheader('etag').strip('"')
def update(self, job): """ High-level method that replicates a single partition. :param job: a dict containing info about the partition to be replicated """ self.replication_count += 1 self.logger.increment('partition.update.count.%s' % (job['device'], )) headers = dict(self.default_headers) headers['X-Backend-Storage-Policy-Index'] = int(job['policy']) target_devs_info = set() failure_devs_info = set() begin = time.time() df_mgr = self._df_router[job['policy']] try: hashed, local_hash = tpool_reraise(df_mgr._get_hashes, job['device'], job['partition'], job['policy'], do_listdir=_do_listdir( int(job['partition']), self.replication_cycle)) self.suffix_hash += hashed self.logger.update_stats('suffix.hashes', hashed) attempts_left = len(job['nodes']) synced_remote_regions = set() random.shuffle(job['nodes']) nodes = itertools.chain( job['nodes'], job['policy'].object_ring.get_more_nodes( int(job['partition']))) while attempts_left > 0: # If this throws StopIteration it will be caught way below node = next(nodes) target_devs_info.add((node['replication_ip'], node['device'])) attempts_left -= 1 # if we have already synced to this remote region, # don't sync again on this replication pass if node['region'] in synced_remote_regions: continue try: with Timeout(self.http_timeout): resp = http_connect(node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '', headers=headers).getresponse() if resp.status == HTTP_INSUFFICIENT_STORAGE: self.logger.error( _('%(replication_ip)s/%(device)s ' 'responded as unmounted'), node) attempts_left += 1 failure_devs_info.add( (node['replication_ip'], node['device'])) continue if resp.status != HTTP_OK: self.logger.error( _("Invalid response %(resp)s " "from %(ip)s"), { 'resp': resp.status, 'ip': node['replication_ip'] }) failure_devs_info.add( (node['replication_ip'], node['device'])) continue remote_hash = pickle.loads(resp.read()) del resp suffixes = [ suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1) ] if not suffixes: self.stats['hashmatch'] += 1 continue hashed, recalc_hash = tpool_reraise(df_mgr._get_hashes, job['device'], job['partition'], job['policy'], recalculate=suffixes) self.logger.update_stats('suffix.hashes', hashed) local_hash = recalc_hash suffixes = [ suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1) ] self.stats['rsync'] += 1 success, _junk = self.sync(node, job, suffixes) with Timeout(self.http_timeout): conn = http_connect(node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '/' + '-'.join(suffixes), headers=headers) conn.getresponse().read() if not success: failure_devs_info.add( (node['replication_ip'], node['device'])) # add only remote region when replicate succeeded if success and node['region'] != job['region']: synced_remote_regions.add(node['region']) self.suffix_sync += len(suffixes) self.logger.update_stats('suffix.syncs', len(suffixes)) except (Exception, Timeout): failure_devs_info.add( (node['replication_ip'], node['device'])) self.logger.exception( _("Error syncing with node: %s") % node) self.suffix_count += len(local_hash) except (Exception, Timeout): failure_devs_info.update(target_devs_info) self._add_failure_stats(failure_devs_info) self.logger.exception(_("Error syncing partition")) finally: self.stats['success'] += len(target_devs_info - failure_devs_info) self.partition_times.append(time.time() - begin) self.logger.timing_since('partition.update.timing', begin)
def replicate(self, override_devices=None, override_partitions=None, override_policies=None): """Run a replication pass""" self.start = time.time() self.suffix_count = 0 self.suffix_sync = 0 self.suffix_hash = 0 self.replication_count = 0 self.last_replication_count = -1 self.replication_cycle = (self.replication_cycle + 1) % 10 self.partition_times = [] self.my_replication_ips = self._get_my_replication_ips() self.all_devs_info = set() self.handoffs_remaining = 0 stats = eventlet.spawn(self.heartbeat) lockup_detector = eventlet.spawn(self.detect_lockups) eventlet.sleep() # Give spawns a cycle current_nodes = None try: self.run_pool = GreenPool(size=self.concurrency) jobs = self.collect_jobs(override_devices=override_devices, override_partitions=override_partitions, override_policies=override_policies) for job in jobs: current_nodes = job['nodes'] if override_devices and job['device'] not in override_devices: continue if override_partitions and \ job['partition'] not in override_partitions: continue dev_path = join(self.devices_dir, job['device']) if self.mount_check and not ismount(dev_path): self._add_failure_stats([(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in job['nodes']]) self.logger.warning(_('%s is not mounted'), job['device']) continue if self.handoffs_first and not job['delete']: # in handoffs first mode, we won't process primary # partitions until rebalance was successful! if self.handoffs_remaining: self.logger.warning( _("Handoffs first mode still has handoffs " "remaining. Aborting current " "replication pass.")) break if not self.check_ring(job['policy'].object_ring): self.logger.info( _("Ring change detected. Aborting " "current replication pass.")) return try: if isfile(job['path']): # Clean up any (probably zero-byte) files where a # partition should be. self.logger.warning( 'Removing partition directory ' 'which was a file: %s', job['path']) os.remove(job['path']) continue except OSError: continue if job['delete']: self.run_pool.spawn(self.update_deleted, job) else: self.run_pool.spawn(self.update, job) current_nodes = None with Timeout(self.lockup_timeout): self.run_pool.waitall() except (Exception, Timeout): if current_nodes: self._add_failure_stats([(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in current_nodes]) else: self._add_failure_stats(self.all_devs_info) self.logger.exception(_("Exception in top-level replication loop")) self.kill_coros() finally: stats.kill() lockup_detector.kill() self.stats_line() self.stats['attempted'] = self.replication_count
def update_deleted(self, job): """ High-level method that replicates a single partition that doesn't belong on this node. :param job: a dict containing info about the partition to be replicated """ def tpool_get_suffixes(path): return [ suff for suff in os.listdir(path) if len(suff) == 3 and isdir(join(path, suff)) ] self.replication_count += 1 self.logger.increment('partition.delete.count.%s' % (job['device'], )) headers = dict(self.default_headers) headers['X-Backend-Storage-Policy-Index'] = int(job['policy']) failure_devs_info = set() begin = time.time() handoff_partition_deleted = False try: responses = [] suffixes = tpool.execute(tpool_get_suffixes, job['path']) synced_remote_regions = {} delete_objs = None if suffixes: for node in job['nodes']: self.stats['rsync'] += 1 kwargs = {} if node['region'] in synced_remote_regions and \ self.conf.get('sync_method', 'rsync') == 'ssync': kwargs['remote_check_objs'] = \ synced_remote_regions[node['region']] # candidates is a dict(hash=>timestamp) of objects # for deletion success, candidates = self.sync(node, job, suffixes, **kwargs) if success: with Timeout(self.http_timeout): conn = http_connect(node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '/' + '-'.join(suffixes), headers=headers) conn.getresponse().read() if node['region'] != job['region']: synced_remote_regions[node['region']] = viewkeys( candidates) else: failure_devs_info.add( (node['replication_ip'], node['device'])) responses.append(success) for cand_objs in synced_remote_regions.values(): if delete_objs is None: delete_objs = cand_objs else: delete_objs = delete_objs & cand_objs if self.handoff_delete: # delete handoff if we have had handoff_delete successes delete_handoff = len([resp for resp in responses if resp]) >= \ self.handoff_delete else: # delete handoff if all syncs were successful delete_handoff = len(responses) == len(job['nodes']) and \ all(responses) if delete_handoff: self.stats['remove'] += 1 if (self.conf.get('sync_method', 'rsync') == 'ssync' and delete_objs is not None): self.logger.info(_("Removing %s objects"), len(delete_objs)) _junk, error_paths = self.delete_handoff_objs( job, delete_objs) # if replication works for a hand-off device and it failed, # the remote devices which are target of the replication # from the hand-off device will be marked. Because cleanup # after replication failed means replicator needs to # replicate again with the same info. if error_paths: failure_devs_info.update([ (failure_dev['replication_ip'], failure_dev['device']) for failure_dev in job['nodes'] ]) else: self.delete_partition(job['path']) handoff_partition_deleted = True elif not suffixes: self.delete_partition(job['path']) handoff_partition_deleted = True except (Exception, Timeout): self.logger.exception(_("Error syncing handoff partition")) self._add_failure_stats(failure_devs_info) finally: target_devs_info = set([(target_dev['replication_ip'], target_dev['device']) for target_dev in job['nodes']]) self.stats['success'] += len(target_devs_info - failure_devs_info) if not handoff_partition_deleted: self.handoffs_remaining += 1 self.partition_times.append(time.time() - begin) self.logger.timing_since('partition.delete.timing', begin)
def update_deleted(self, job): """ High-level method that replicates a single partition that doesn't belong on this node. :param job: a dict containing info about the partition to be replicated """ def tpool_get_suffixes(path): return [ suff for suff in os.listdir(path) if len(suff) == 3 and isdir(join(path, suff)) ] self.replication_count += 1 self.logger.increment('partition.delete.count.%s' % (job['device'], )) self.headers['X-Backend-Storage-Policy-Index'] = int(job['policy']) begin = time.time() try: responses = [] suffixes = tpool.execute(tpool_get_suffixes, job['path']) synced_remote_regions = {} delete_objs = None if suffixes: for node in job['nodes']: kwargs = {} if node['region'] in synced_remote_regions and \ self.conf.get('sync_method', 'rsync') == 'ssync': kwargs['remote_check_objs'] = \ synced_remote_regions[node['region']] # candidates is a dict(hash=>timestamp) of objects # for deletion success, candidates = self.sync(node, job, suffixes, **kwargs) if success: with Timeout(self.http_timeout): conn = http_connect(node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '/' + '-'.join(suffixes), headers=self.headers) conn.getresponse().read() if node['region'] != job['region']: synced_remote_regions[node['region']] = \ candidates.keys() responses.append(success) for region, cand_objs in synced_remote_regions.iteritems(): if delete_objs is None: delete_objs = set(cand_objs) else: delete_objs = delete_objs.intersection(cand_objs) if self.handoff_delete: # delete handoff if we have had handoff_delete successes delete_handoff = len([resp for resp in responses if resp]) >= \ self.handoff_delete else: # delete handoff if all syncs were successful delete_handoff = len(responses) == len(job['nodes']) and \ all(responses) if delete_handoff: if (self.conf.get('sync_method', 'rsync') == 'ssync' and delete_objs is not None): self.logger.info(_("Removing %s objects"), len(delete_objs)) self.delete_handoff_objs(job, delete_objs) else: self.delete_partition(job['path']) elif not suffixes: self.delete_partition(job['path']) except (Exception, Timeout): self.logger.exception(_("Error syncing handoff partition")) finally: self.partition_times.append(time.time() - begin) self.logger.timing_since('partition.delete.timing', begin)
def account_update(self, req, account, container, broker): """ Update the account server(s) with latest container info. :param req: swob.Request object :param account: account name :param container: container name :param broker: container DB broker object :returns: if all the account requests return a 404 error code, HTTPNotFound response object, otherwise None. """ account_hosts = [ h.strip() for h in req.headers.get('X-Account-Host', '').split(',') ] account_devices = [ d.strip() for d in req.headers.get('X-Account-Device', '').split(',') ] account_partition = req.headers.get('X-Account-Partition', '') if len(account_hosts) != len(account_devices): # This shouldn't happen unless there's a bug in the proxy, # but if there is, we want to know about it. self.logger.error( _('ERROR Account update failed: different ' 'numbers of hosts and devices in request: ' '"%s" vs "%s"' % (req.headers.get('X-Account-Host', ''), req.headers.get('X-Account-Device', '')))) return if account_partition: updates = zip(account_hosts, account_devices) else: updates = [] account_404s = 0 for account_host, account_device in updates: account_ip, account_port = account_host.rsplit(':', 1) new_path = '/' + '/'.join([account, container]) info = broker.get_info() account_headers = { 'x-put-timestamp': info['put_timestamp'], 'x-delete-timestamp': info['delete_timestamp'], 'x-object-count': info['object_count'], 'x-bytes-used': info['bytes_used'], 'x-trans-id': req.headers.get('x-trans-id', '-') } if req.headers.get('x-account-override-deleted', 'no').lower() == \ 'yes': account_headers['x-account-override-deleted'] = 'yes' try: with ConnectionTimeout(self.conn_timeout): conn = http_connect(account_ip, account_port, account_device, account_partition, 'PUT', new_path, account_headers) with Timeout(self.node_timeout): account_response = conn.getresponse() account_response.read() if account_response.status == HTTP_NOT_FOUND: account_404s += 1 elif not is_success(account_response.status): self.logger.error( _('ERROR Account update failed ' 'with %(ip)s:%(port)s/%(device)s (will retry ' 'later): Response %(status)s %(reason)s'), { 'ip': account_ip, 'port': account_port, 'device': account_device, 'status': account_response.status, 'reason': account_response.reason }) except (Exception, Timeout): self.logger.exception( _('ERROR account update failed with ' '%(ip)s:%(port)s/%(device)s (will retry later)'), { 'ip': account_ip, 'port': account_port, 'device': account_device }) if updates and account_404s == len(updates): return HTTPNotFound(req=req) else: return None
def test_api_call_timeout(self): self.call_context.call.side_effect = Timeout() self.assertRaises(exception.GuestTimeout, self.api.restart)
def direct_get_account(node, part, account, marker=None, limit=None, prefix=None, delimiter=None, conn_timeout=5, response_timeout=15): """ Get listings directly from the account server. :param node: node dictionary from the ring :param part: partition the account is on :param account: account name :param marker: marker query :param limit: query limit :param prefix: prefix query :param delimeter: delimeter for the query :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :returns: a tuple of (response headers, a list of containers) The response headers will be a dict and all header names will be lowercase. """ path = '/' + account qs = 'format=json' if marker: qs += '&marker=%s' % quote(marker) if limit: qs += '&limit=%d' % limit if prefix: qs += '&prefix=%s' % quote(prefix) if delimiter: qs += '&delimiter=%s' % quote(delimiter) with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'GET', path, query_string=qs) with Timeout(response_timeout): resp = conn.getresponse() if resp.status < 200 or resp.status >= 300: resp.read() raise ClientException( 'Account server %s:%s direct GET %s gave status %s' % (node['ip'], node['port'], repr('/%s/%s%s' % (node['device'], part, path)), resp.status), http_host=node['ip'], http_port=node['port'], http_device=node['device'], http_status=resp.status, http_reason=resp.reason) resp_headers = {} for header, value in resp.getheaders(): resp_headers[header.lower()] = value if resp.status == 204: resp.read() return resp_headers, [] return resp_headers, json_loads(resp.read())
def getresponse(self): if kwargs.get('raise_exc'): raise Exception('test') if kwargs.get('raise_timeout_exc'): raise Timeout() return self
def direct_put_object(node, part, account, container, name, contents, content_length=None, etag=None, content_type=None, headers=None, conn_timeout=5, response_timeout=15, chunk_size=65535): """ Put object directly from the object server. :param node: node dictionary from the ring :param part: partition the container is on :param account: account name :param container: container name :param name: object name :param contents: an iterable or string to read object data from :param content_length: value to send as content-length header :param etag: etag of contents :param content_type: value to send as content-type header :param headers: additional headers to include in the request :param conn_timeout: timeout in seconds for establishing the connection :param response_timeout: timeout in seconds for getting the response :param chunk_size: if defined, chunk size of data to send. :returns: etag from the server response :raises ClientException: HTTP PUT request failed """ path = '/%s/%s/%s' % (account, container, name) if headers is None: headers = {} if etag: headers['ETag'] = etag.strip('"') if content_length is not None: headers['Content-Length'] = str(content_length) else: for n, v in headers.items(): if n.lower() == 'content-length': content_length = int(v) if content_type is not None: headers['Content-Type'] = content_type else: headers['Content-Type'] = 'application/octet-stream' if not contents: headers['Content-Length'] = '0' if isinstance(contents, six.string_types): contents = [contents] # Incase the caller want to insert an object with specific age add_ts = 'X-Timestamp' not in headers if content_length is None: headers['Transfer-Encoding'] = 'chunked' with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'PUT', path, headers=gen_headers(headers, add_ts)) contents_f = FileLikeIter(contents) if content_length is None: chunk = contents_f.read(chunk_size) while chunk: conn.send('%x\r\n%s\r\n' % (len(chunk), chunk)) chunk = contents_f.read(chunk_size) conn.send('0\r\n\r\n') else: left = content_length while left > 0: size = chunk_size if size > left: size = left chunk = contents_f.read(size) if not chunk: break conn.send(chunk) left -= len(chunk) with Timeout(response_timeout): resp = conn.getresponse() resp.read() if not is_success(resp.status): raise DirectClientException('Object', 'PUT', node, part, path, resp) return resp.getheader('etag').strip('"')
def __init__(self, seconds=None, msg=None): Timeout.__init__(self, seconds=seconds) self.msg = msg
def container_update(self, op, account, container, obj, request, headers_out, objdevice, policy): """ Update the container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param request: the original request object driving the update :param headers_out: dictionary of headers to send in the container request(s) :param objdevice: device name that the object is in :param policy: the BaseStoragePolicy instance """ headers_in = request.headers conthosts = [h.strip() for h in headers_in.get('X-Container-Host', '').split(',')] contdevices = [d.strip() for d in headers_in.get('X-Container-Device', '').split(',')] contpartition = headers_in.get('X-Container-Partition', '') if len(conthosts) != len(contdevices): # This shouldn't happen unless there's a bug in the proxy, # but if there is, we want to know about it. self.logger.error(_('ERROR Container update failed: different ' 'numbers of hosts and devices in request: ' '"%s" vs "%s"') % (headers_in.get('X-Container-Host', ''), headers_in.get('X-Container-Device', ''))) return if contpartition: updates = zip(conthosts, contdevices) else: updates = [] headers_out['x-trans-id'] = headers_in.get('x-trans-id', '-') headers_out['referer'] = request.as_referer() headers_out['X-Backend-Storage-Policy-Index'] = int(policy) update_greenthreads = [] for conthost, contdevice in updates: gt = spawn(self.async_update, op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice, policy, logger_thread_locals=self.logger.thread_locals) update_greenthreads.append(gt) # Wait a little bit to see if the container updates are successful. # If we immediately return after firing off the greenthread above, then # we're more likely to confuse the end-user who does a listing right # after getting a successful response to the object create. The # `container_update_timeout` bounds the length of time we wait so that # one slow container server doesn't make the entire request lag. try: with Timeout(self.container_update_timeout): for gt in update_greenthreads: gt.wait() except Timeout: # updates didn't go through, log it and return self.logger.debug( 'Container update timeout (%.4fs) waiting for %s', self.container_update_timeout, updates)
def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ full_path = '/%s/%s/%s' % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if is_success(response.status): return else: self.logger.error( _('ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), { 'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice }) except (Exception, Timeout): self.logger.exception( _('ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)' ), { 'ip': ip, 'port': port, 'dev': contdevice }) async_dir = os.path.join(self.devices, objdevice, ASYNCDIR) ohash = hash_path(account, container, obj) self.logger.increment('async_pendings') write_pickle( { 'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out }, os.path.join( async_dir, ohash[-3:], ohash + '-' + normalize_timestamp(headers_out['x-timestamp'])), os.path.join(self.devices, objdevice, 'tmp'))
def run_wsgi(conf_path, app_section, *args, **kwargs): """ Runs the server according to some strategy. The default strategy runs a specified number of workers in pre-fork model. The object-server (only) may use a servers-per-port strategy if its config has a servers_per_port setting with a value greater than zero. :param conf_path: Path to paste.deploy style configuration file/directory :param app_section: App name from conf file to load config from :returns: 0 if successful, nonzero otherwise """ # Load configuration, Set logger and Load request processor # conf: 包含配置信息的字典 # logger: log对象 # log_name: log名,eg:proxy_logging try: (conf, logger, log_name) = \ _initrp(conf_path, app_section, *args, **kwargs) except ConfigFileError as e: print(e) return 1 # optional nice/ionice priority scheduling utils.modify_priority(conf, logger) servers_per_port = int(conf.get('servers_per_port', '0') or 0) # NOTE: for now servers_per_port is object-server-only; future work could # be done to test and allow it to be used for account and container # servers, but that has not been done yet. if servers_per_port and app_section == 'object-server': strategy = ServersPerPortStrategy( conf, logger, servers_per_port=servers_per_port) else: # 多个 server 进程监听一个端口,默认为逻辑cpu的个数 strategy = WorkersStrategy(conf, logger) # patch event before loadapp # 运行时修改已有的代码,动态替换已有的标准库 utils.eventlet_monkey_patch() # Ensure the configuration and application can be loaded before proceeding. global_conf = {'log_name': log_name} if 'global_conf_callback' in kwargs: kwargs['global_conf_callback'](conf, global_conf) loadapp(conf_path, global_conf=global_conf) # set utils.FALLOCATE_RESERVE if desired utils.FALLOCATE_RESERVE, utils.FALLOCATE_IS_PERCENT = \ utils.config_fallocate_value(conf.get('fallocate_reserve', '1%')) # Start listening on bind_addr/port error_msg = strategy.do_bind_ports() if error_msg: logger.error(error_msg) print(error_msg) return 1 # Redirect errors to logger and close stdio. Do this *after* binding ports; # we use this to signal that the service is ready to accept connections. capture_stdio(logger) no_fork_sock = strategy.no_fork_sock() if no_fork_sock: run_server(conf, logger, no_fork_sock, global_conf=global_conf) return 0 def stop_with_signal(signum, *args): """Set running flag to False and capture the signum""" running_context[0] = False running_context[1] = signum # context to hold boolean running state and stop signum running_context = [True, None] signal.signal(signal.SIGTERM, stop_with_signal) signal.signal(signal.SIGHUP, stop_with_signal) while running_context[0]: for sock, sock_info in strategy.new_worker_socks(): pid = os.fork() if pid == 0: # in child process signal.signal(signal.SIGHUP, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) strategy.post_fork_hook() run_server(conf, logger, sock) strategy.log_sock_exit(sock, sock_info) return 0 else: # in parent process strategy.register_worker_start(sock, sock_info, pid) # The strategy may need to pay attention to something in addition to # child process exits (like new ports showing up in a ring). # # NOTE: a timeout value of None will just instantiate the Timeout # object and not actually schedule it, which is equivalent to no # timeout for the green_os.wait(). loop_timeout = strategy.loop_timeout() with Timeout(loop_timeout, exception=False): try: try: pid, status = green_os.wait() if os.WIFEXITED(status) or os.WIFSIGNALED(status): strategy.register_worker_exit(pid) except OSError as err: if err.errno not in (errno.EINTR, errno.ECHILD): raise if err.errno == errno.ECHILD: # If there are no children at all (ECHILD), then # there's nothing to actually wait on. We sleep # for a little bit to avoid a tight CPU spin # and still are able to catch any KeyboardInterrupt # events that happen. The value of 0.01 matches the # value in eventlet's waitpid(). sleep(0.01) except KeyboardInterrupt: logger.notice('User quit') running_context[0] = False break if running_context[1] is not None: try: signame = SIGNUM_TO_NAME[running_context[1]] except KeyError: logger.error('Stopping with unexpected signal %r' % running_context[1]) else: logger.error('%s received', signame) if running_context[1] == signal.SIGTERM: os.killpg(0, signal.SIGTERM) strategy.shutdown_sockets() signal.signal(signal.SIGTERM, signal.SIG_IGN) logger.notice('Exited (%s)', os.getpid()) return 0
def fake_get_hashes(*args, **kwargs): self.get_hash_count += 1 if self.get_hash_count == 3: # raise timeout on last call to get hashes raise Timeout() return 2, {'abc': 'def'}
def read(self, size): raise Timeout()
class TestContainerController(unittest.TestCase): """ Test swift.container_server.ContainerController """ def setUp(self): """ Set up for testing swift.object_server.ObjectController """ self.testdir = os.path.join(mkdtemp(), 'tmp_test_object_server_ObjectController') mkdirs(self.testdir) rmtree(self.testdir) mkdirs(os.path.join(self.testdir, 'sda1')) mkdirs(os.path.join(self.testdir, 'sda1', 'tmp')) self.controller = container_server.ContainerController( {'devices': self.testdir, 'mount_check': 'false'}) def tearDown(self): """ Tear down for testing swift.object_server.ObjectController """ rmtree(os.path.dirname(self.testdir), ignore_errors=1) def test_acl_container(self): # Ensure no acl by default req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': '0'}) self.controller.PUT(req) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'}) response = self.controller.HEAD(req) self.assert_(response.status.startswith('204')) self.assert_('x-container-read' not in response.headers) self.assert_('x-container-write' not in response.headers) # Ensure POSTing acls works req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'}, headers={'X-Timestamp': '1', 'X-Container-Read': '.r:*', 'X-Container-Write': 'account:user'}) self.controller.POST(req) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'}) response = self.controller.HEAD(req) self.assert_(response.status.startswith('204')) self.assertEquals(response.headers.get('x-container-read'), '.r:*') self.assertEquals(response.headers.get('x-container-write'), 'account:user') # Ensure we can clear acls on POST req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'}, headers={'X-Timestamp': '3', 'X-Container-Read': '', 'X-Container-Write': ''}) self.controller.POST(req) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'}) response = self.controller.HEAD(req) self.assert_(response.status.startswith('204')) self.assert_('x-container-read' not in response.headers) self.assert_('x-container-write' not in response.headers) # Ensure PUTing acls works req = Request.blank('/sda1/p/a/c2', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': '4', 'X-Container-Read': '.r:*', 'X-Container-Write': 'account:user'}) self.controller.PUT(req) req = Request.blank('/sda1/p/a/c2', environ={'REQUEST_METHOD': 'HEAD'}) response = self.controller.HEAD(req) self.assert_(response.status.startswith('204')) self.assertEquals(response.headers.get('x-container-read'), '.r:*') self.assertEquals(response.headers.get('x-container-write'), 'account:user') def test_HEAD(self): req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '0'}) self.controller.PUT(req) response = self.controller.HEAD(req) self.assert_(response.status.startswith('204')) self.assertEquals(int(response.headers['x-container-bytes-used']), 0) self.assertEquals(int(response.headers['x-container-object-count']), 0) req2 = Request.blank('/sda1/p/a/c/o', environ= {'HTTP_X_TIMESTAMP': '1', 'HTTP_X_SIZE': 42, 'HTTP_X_CONTENT_TYPE': 'text/plain', 'HTTP_X_ETAG': 'x'}) self.controller.PUT(req2) response = self.controller.HEAD(req) self.assertEquals(int(response.headers['x-container-bytes-used']), 42) self.assertEquals(int(response.headers['x-container-object-count']), 1) def test_HEAD_not_found(self): req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'}) resp = self.controller.HEAD(req) self.assertEquals(resp.status_int, 404) def test_PUT(self): req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '1'}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 201) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '2'}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 202) def test_PUT_obj_not_found(self): req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': '1', 'X-Size': '0', 'X-Content-Type': 'text/plain', 'X-ETag': 'e'}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 404) def test_PUT_GET_metadata(self): # Set metadata header req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': normalize_timestamp(1), 'X-Container-Meta-Test': 'Value'}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 201) req = Request.blank('/sda1/p/a/c') resp = self.controller.GET(req) self.assertEquals(resp.status_int, 204) self.assertEquals(resp.headers.get('x-container-meta-test'), 'Value') # Set another metadata header, ensuring old one doesn't disappear req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'}, headers={'X-Timestamp': normalize_timestamp(1), 'X-Container-Meta-Test2': 'Value2'}) resp = self.controller.POST(req) self.assertEquals(resp.status_int, 204) req = Request.blank('/sda1/p/a/c') resp = self.controller.GET(req) self.assertEquals(resp.status_int, 204) self.assertEquals(resp.headers.get('x-container-meta-test'), 'Value') self.assertEquals(resp.headers.get('x-container-meta-test2'), 'Value2') # Update metadata header req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': normalize_timestamp(3), 'X-Container-Meta-Test': 'New Value'}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 202) req = Request.blank('/sda1/p/a/c') resp = self.controller.GET(req) self.assertEquals(resp.status_int, 204) self.assertEquals(resp.headers.get('x-container-meta-test'), 'New Value') # Send old update to metadata header req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': normalize_timestamp(2), 'X-Container-Meta-Test': 'Old Value'}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 202) req = Request.blank('/sda1/p/a/c') resp = self.controller.GET(req) self.assertEquals(resp.status_int, 204) self.assertEquals(resp.headers.get('x-container-meta-test'), 'New Value') # Remove metadata header (by setting it to empty) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': normalize_timestamp(4), 'X-Container-Meta-Test': ''}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 202) req = Request.blank('/sda1/p/a/c') resp = self.controller.GET(req) self.assertEquals(resp.status_int, 204) self.assert_('x-container-meta-test' not in resp.headers) def test_POST_HEAD_metadata(self): req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': normalize_timestamp(1)}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 201) # Set metadata header req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'}, headers={'X-Timestamp': normalize_timestamp(1), 'X-Container-Meta-Test': 'Value'}) resp = self.controller.POST(req) self.assertEquals(resp.status_int, 204) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'}) resp = self.controller.HEAD(req) self.assertEquals(resp.status_int, 204) self.assertEquals(resp.headers.get('x-container-meta-test'), 'Value') # Update metadata header req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'}, headers={'X-Timestamp': normalize_timestamp(3), 'X-Container-Meta-Test': 'New Value'}) resp = self.controller.POST(req) self.assertEquals(resp.status_int, 204) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'}) resp = self.controller.HEAD(req) self.assertEquals(resp.status_int, 204) self.assertEquals(resp.headers.get('x-container-meta-test'), 'New Value') # Send old update to metadata header req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'}, headers={'X-Timestamp': normalize_timestamp(2), 'X-Container-Meta-Test': 'Old Value'}) resp = self.controller.POST(req) self.assertEquals(resp.status_int, 204) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'}) resp = self.controller.HEAD(req) self.assertEquals(resp.status_int, 204) self.assertEquals(resp.headers.get('x-container-meta-test'), 'New Value') # Remove metadata header (by setting it to empty) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'POST'}, headers={'X-Timestamp': normalize_timestamp(4), 'X-Container-Meta-Test': ''}) resp = self.controller.POST(req) self.assertEquals(resp.status_int, 204) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'HEAD'}) resp = self.controller.HEAD(req) self.assertEquals(resp.status_int, 204) self.assert_('x-container-meta-test' not in resp.headers) def test_DELETE_obj_not_found(self): req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'DELETE'}, headers={'X-Timestamp': '1'}) resp = self.controller.DELETE(req) self.assertEquals(resp.status_int, 404) def test_PUT_utf8(self): snowman = u'\u2603' container_name = snowman.encode('utf-8') req = Request.blank('/sda1/p/a/%s'%container_name, environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '1'}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 201) def test_PUT_account_update(self): bindsock = listen(('127.0.0.1', 0)) def accept(return_code, expected_timestamp): try: with Timeout(3): sock, addr = bindsock.accept() inc = sock.makefile('rb') out = sock.makefile('wb') out.write('HTTP/1.1 %d OK\r\nContent-Length: 0\r\n\r\n' % return_code) out.flush() self.assertEquals(inc.readline(), 'PUT /sda1/123/a/c HTTP/1.1\r\n') headers = {} line = inc.readline() while line and line != '\r\n': headers[line.split(':')[0].lower()] = \ line.split(':')[1].strip() line = inc.readline() self.assertEquals(headers['x-put-timestamp'], expected_timestamp) except BaseException, err: return err return None req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': '0000000001.00000', 'X-Account-Host': '%s:%s' % bindsock.getsockname(), 'X-Account-Partition': '123', 'X-Account-Device': 'sda1'}) event = spawn(accept, 201, '0000000001.00000') try: with Timeout(3): resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 201) finally: err = event.wait() if err: raise Exception(err) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'DELETE'}, headers={'X-Timestamp': '2'}) resp = self.controller.DELETE(req) self.assertEquals(resp.status_int, 204) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': '0000000003.00000', 'X-Account-Host': '%s:%s' % bindsock.getsockname(), 'X-Account-Partition': '123', 'X-Account-Device': 'sda1'}) event = spawn(accept, 404, '0000000003.00000') try: with Timeout(3): resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 404) finally: err = event.wait() if err: raise Exception(err) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': '0000000005.00000', 'X-Account-Host': '%s:%s' % bindsock.getsockname(), 'X-Account-Partition': '123', 'X-Account-Device': 'sda1'}) event = spawn(accept, 503, '0000000005.00000') got_exc = False try: with Timeout(3): resp = self.controller.PUT(req) except BaseException, err: got_exc = True
def update(self, job): """ High-level method that replicates a single partition. :param job: a dict containing info about the partition to be replicated """ self.replication_count += 1 begin = time.time() try: hashed, local_hash = tpool.execute( tpooled_get_hashes, job['path'], do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age) # See tpooled_get_hashes "Hack". if isinstance(hashed, BaseException): raise hashed self.suffix_hash += hashed attempts_left = self.object_ring.replica_count - 1 nodes = itertools.chain( job['nodes'], self.object_ring.get_more_nodes(int(job['partition']))) while attempts_left > 0: # If this throws StopIterator it will be caught way below node = next(nodes) attempts_left -= 1 try: with Timeout(self.http_timeout): resp = http_connect(node['ip'], node['port'], node['device'], job['partition'], 'REPLICATE', '', headers={ 'Content-Length': '0' }).getresponse() if resp.status == 507: self.logger.error( _('%(ip)s/%(device)s responded' ' as unmounted'), node) attempts_left += 1 continue if resp.status != 200: self.logger.error( _("Invalid response %(resp)s " "from %(ip)s"), { 'resp': resp.status, 'ip': node['ip'] }) continue remote_hash = pickle.loads(resp.read()) del resp suffixes = [ suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1) ] if not suffixes: continue hashed, recalc_hash = tpool.execute( tpooled_get_hashes, job['path'], recalculate=suffixes, reclaim_age=self.reclaim_age) # See tpooled_get_hashes "Hack". if isinstance(hashed, BaseException): raise hashed local_hash = recalc_hash suffixes = [ suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1) ] self.rsync(node, job, suffixes) with Timeout(self.http_timeout): conn = http_connect(node['ip'], node['port'], node['device'], job['partition'], 'REPLICATE', '/' + '-'.join(suffixes), headers={'Content-Length': '0'}) conn.getresponse().read() self.suffix_sync += len(suffixes) except (Exception, Timeout): self.logger.exception( _("Error syncing with node: %s") % node) self.suffix_count += len(local_hash) except (Exception, Timeout): self.logger.exception(_("Error syncing partition")) finally: self.partition_times.append(time.time() - begin)
db_conns = [] for i in range(num_locks): db_conn = connect(db_files[i]) db_conn.execute('begin exclusive transaction') db_conns.append(db_conn) if catch_503: exc = None try: client.delete_container(self.url, self.token, container) except client.ClientException, err: exc = err self.assertEquals(exc.http_status, 503) else: client.delete_container(self.url, self.token, container) pool = GreenPool() try: with Timeout(15): pool.spawn(run_test, 1, False) pool.spawn(run_test, 2, True) pool.spawn(run_test, 3, True) pool.waitall() except Timeout, err: raise Exception( "The server did not return a 503 on container db locks, " "it just hangs: %s" % err) if __name__ == '__main__': main()
def update(self, job): """ High-level method that replicates a single partition. :param job: a dict containing info about the partition to be replicated """ self.replication_count += 1 self.logger.increment('partition.update.count.%s' % (job['device'], )) self.headers['X-Backend-Storage-Policy-Index'] = job['policy_idx'] begin = time.time() try: hashed, local_hash = tpool_reraise( get_hashes, job['path'], do_listdir=(self.replication_count % 10) == 0, reclaim_age=self.reclaim_age) self.suffix_hash += hashed self.logger.update_stats('suffix.hashes', hashed) attempts_left = len(job['nodes']) nodes = itertools.chain( job['nodes'], job['object_ring'].get_more_nodes(int(job['partition']))) while attempts_left > 0: # If this throws StopIterator it will be caught way below node = next(nodes) attempts_left -= 1 try: with Timeout(self.http_timeout): resp = http_connect( node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '', headers=self.headers).getresponse() if resp.status == HTTP_INSUFFICIENT_STORAGE: self.logger.error( _('%(ip)s/%(device)s responded' ' as unmounted'), node) attempts_left += 1 continue if resp.status != HTTP_OK: self.logger.error( _("Invalid response %(resp)s " "from %(ip)s"), { 'resp': resp.status, 'ip': node['replication_ip'] }) continue remote_hash = pickle.loads(resp.read()) del resp suffixes = [ suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1) ] if not suffixes: continue hashed, recalc_hash = tpool_reraise( get_hashes, job['path'], recalculate=suffixes, reclaim_age=self.reclaim_age) self.logger.update_stats('suffix.hashes', hashed) local_hash = recalc_hash suffixes = [ suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1) ] self.sync(node, job, suffixes) with Timeout(self.http_timeout): conn = http_connect(node['replication_ip'], node['replication_port'], node['device'], job['partition'], 'REPLICATE', '/' + '-'.join(suffixes), headers=self.headers) conn.getresponse().read() self.suffix_sync += len(suffixes) self.logger.update_stats('suffix.syncs', len(suffixes)) except (Exception, Timeout): self.logger.exception( _("Error syncing with node: %s") % node) self.suffix_count += len(local_hash) except (Exception, Timeout): self.logger.exception(_("Error syncing partition")) finally: self.partition_times.append(time.time() - begin) self.logger.timing_since('partition.update.timing', begin)
def get_node_timeout(self, timeout): """ Return node timeout. """ return Timeout(timeout)
except BaseException, err: return err return None req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': '1'}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 201) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'DELETE'}, headers={'X-Timestamp': '0000000002.00000', 'X-Account-Host': '%s:%s' % bindsock.getsockname(), 'X-Account-Partition': '123', 'X-Account-Device': 'sda1'}) event = spawn(accept, 204, '0000000002.00000') try: with Timeout(3): resp = self.controller.DELETE(req) self.assertEquals(resp.status_int, 204) finally: err = event.wait() if err: raise Exception(err) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '2'}) resp = self.controller.PUT(req) self.assertEquals(resp.status_int, 201) req = Request.blank('/sda1/p/a/c', environ={'REQUEST_METHOD': 'DELETE'}, headers={'X-Timestamp': '0000000003.00000', 'X-Account-Host': '%s:%s' % bindsock.getsockname(), 'X-Account-Partition': '123',