def synchronize_flavors(self, ctx): """ Get a list of all public flavors from PowerVC. If it is in configuration white list, and not in black list, insert it. if it is already in local tables, ignore it. """ LOG.info(_("Flavors synchronization starts.")) # Get all public flavors. By default, detail and public is set. pvcFlavors = self.driver.list_flavors() # Sync flavors in list for flavor in pvcFlavors: LOG.info(_("Flavor:%s") % str(flavor)) greenthread.sleep(0) # This check is added to eliminate sync of private flavors # Can be removed once PowerVC fixes to return only public flavors # by default. if not(flavor.__dict__.get(constants.IS_PUBLIC)): continue if (self._check_for_sync(flavor.name)): response = self._check_for_extraspecs(flavor) if response is not None: self._sync_flavor(ctx, flavor, response[1]) LOG.info(_("Flavors synchronization ends."))
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: start = timeutils.utcnow() self.f(*self.args, **self.kw) end = timeutils.utcnow() if not self._running: break delay = interval - timeutils.delta_seconds(start, end) if delay <= 0: LOG.warn(_('task run outlasted interval by %s sec') % -delay) greenthread.sleep(delay if delay > 0 else 0) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception: LOG.exception(_('in fixed duration looping call')) done.send_exception(*sys.exc_info()) return else: done.send(True)
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: start = _ts() self.f(*self.args, **self.kw) end = _ts() if not self._running: break delay = end - start - interval if delay > 0: LOG.warn(_LW('task %(func_name)r run outlasted ' 'interval by %(delay).2f sec'), {'func_name': self.f, 'delay': delay}) greenthread.sleep(-delay if delay < 0 else 0) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception: LOG.exception(_LE('in fixed duration looping call')) done.send_exception(*sys.exc_info()) return else: done.send(True)
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: idle = self.f(*self.args, **self.kw) if not self._running: break if periodic_interval_max is not None: idle = min(idle, periodic_interval_max) LOG.debug( "Dynamic looping call %(func_name)s sleeping " "for %(idle).02f seconds", {"func_name": repr(self.f), "idle": idle}, ) greenthread.sleep(idle) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception: LOG.exception(_LE("in dynamic looping call")) done.send_exception(*sys.exc_info()) return else: done.send(True)
def _inner(): """Function to do the image data transfer through an update and thereon checks if the state is 'active'.""" self.glance_client.update_image(self.image_id, image_meta=self.image_meta, image_data=self.input) self._running = True while self._running: try: image_status = self.glance_client.get_image_meta(self.image_id).get("status") if image_status == "active": self.stop() self.done.send(True) # If the state is killed, then raise an exception. elif image_status == "killed": self.stop() exc_msg = _("Glance image %s is in killed state") % self.image_id LOG.exception(exc_msg) self.done.send_exception(exception.Error(exc_msg)) elif image_status in ["saving", "queued"]: greenthread.sleep(GLANCE_POLL_INTERVAL) else: self.stop() exc_msg = _("Glance image " "%(image_id)s is in unknown state " "- %(state)s") % { "image_id": self.image_id, "state": image_status, } LOG.exception(exc_msg) self.done.send_exception(exception.Error(exc_msg)) except Exception, exc: self.stop() self.done.send_exception(exc)
def collect_disk_info(self): all_domains = [] all_domains_id = self.connection.listDomainsID() for domain_id in all_domains_id: all_domains.append(self.connection.lookupByID(domain_id)) # all_domains_samples looks like this: # all_domains_samples = {'instance_uuid1': [{ # 'instance_uuid': 'uuid', # 'tenant_id': 'tenant-id' # 'disk':{'rd_req':100, # 'rd_bytes':101, # 'wr_req':102, # 'wr_bytes':103}, # 'time': 1234567889], # 'instance_uuid2': [{},{}]} all_domains_samples = {} for domain in all_domains: all_domains_samples[domain.UUIDString()] = [] # we get vms nic info several times which equals to # cfg.CONF.net_task_times in total cfg.CONF.net_task_period time, # then we static the average value in a interval period time for count in range(cfg.CONF.net_task_times): self._get_all_domains_one_sample(all_domains, all_domains_samples) # before next sample, we need to sleep some time #greenthread.sleep(random.randint(1,cfg.CONF.net_task_period / cfg.CONF.net_task_times)) greenthread.sleep(5) # statics result and write into database self._static_result(all_domains_samples)
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: idle = self.f(*self.args, **self.kw) if not self._running: break if periodic_interval_max is not None: idle = min(idle, periodic_interval_max) LOG.debug('Dynamic looping call %(func_name)r sleeping ' 'for %(idle).02f seconds', {'func_name': self.f, 'idle': idle}) greenthread.sleep(idle) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception: LOG.exception(_LE('in dynamic looping call')) done.send_exception(*sys.exc_info()) return else: done.send(True)
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: idle = self.f(*self.args, **self.kw) # callback if not self._running: break if periodic_interval_max is not None: idle = min(idle, periodic_interval_max) logger.debug("Dynamic looping call sleeping for %d seconds", idle) greenthread.sleep(idle) except LoopingCallDone as e: logger.info('DynamicLoopingCall _inner: Exception1') self.stop() done.send(e.retvalue) except Exception as ex: logger.info('DynamicLoopingCall _inner: Exception2') logger.debug(ex) done.send_exception(*sys.exc_info()) return else: done.send(True)
def _prepare_fc_map(self, fc_map_id, timeout): self.ssh.prestartfcmap(fc_map_id) mapping_ready = False wait_time = 5 max_retries = (timeout / wait_time) + 1 for try_number in range(1, max_retries): mapping_attrs = self._get_flashcopy_mapping_attributes(fc_map_id) if (mapping_attrs is None or 'status' not in mapping_attrs): break if mapping_attrs['status'] == 'prepared': mapping_ready = True break elif mapping_attrs['status'] == 'stopped': self.ssh.prestartfcmap(fc_map_id) elif mapping_attrs['status'] != 'preparing': msg = (_('Unexecpted mapping status %(status)s for mapping' '%(id)s. Attributes: %(attr)s') % {'status': mapping_attrs['status'], 'id': fc_map_id, 'attr': mapping_attrs}) LOG.error(msg) raise exception.VolumeBackendAPIException(data=msg) greenthread.sleep(wait_time) if not mapping_ready: msg = (_('Mapping %(id)s prepare failed to complete within the' 'allotted %(to)d seconds timeout. Terminating.') % {'id': fc_map_id, 'to': timeout}) LOG.error(msg) raise exception.VolumeDriverException(message=msg)
def _run_ssh(self, cmd_list, check_exit=True, attempts=1): utils.check_ssh_injection(cmd_list) command = ' '. join(cmd_list) if not self.sshpool: self.sshpool = utils.SSHPool(self.config.san_ip, self.config.san_ssh_port, self.config.ssh_conn_timeout, self.config.san_login, password=self.config.san_password, privatekey= self.config.san_private_key, min_size= self.config.ssh_min_pool_conn, max_size= self.config.ssh_max_pool_conn) try: total_attempts = attempts with self.sshpool.item() as ssh: while attempts > 0: attempts -= 1 try: return self._ssh_execute(ssh, command, check_exit_code=check_exit) except Exception as e: LOG.error(e) greenthread.sleep(randint(20, 500) / 100.0) msg = (_("SSH Command failed after '%(total_attempts)r' " "attempts : '%(command)s'") % {'total_attempts': total_attempts, 'command': command}) raise paramiko.SSHException(msg) except Exception: with excutils.save_and_reraise_exception(): LOG.error(_("Error running ssh command: %s") % command)
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: start = datetime.datetime.utcnow() self.f(*self.args, **self.kw) # callback end = datetime.datetime.utcnow() if not self._running: break delay = interval - (end-start).total_seconds() logger.debug('delay was %f sec', delay) if delay <= 0: logger.info('task run outlasted interval by %s sec', -delay) greenthread.sleep(delay if delay > 0 else 0) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception as e: logger.info('exception taken') logger.exception(e) done.send_exception(*sys.exc_info()) return else: done.send(True)
def start(self, interval=None): def _inner(): self.run() def _loopingcall_callback(): self._monitor_busy = True try: self._check_pending_tasks() except Exception: LOG.exception(_("Exception in _check_pending_tasks")) self._monitor_busy = False if self._thread is not None: return self if interval is None or interval == 0: interval = self._interval self._stopped = False self._thread = greenthread.spawn(_inner) self._monitor = loopingcall.FixedIntervalLoopingCall( _loopingcall_callback) self._monitor.start(interval / 1000.0, interval / 1000.0) # To allow the created thread start running greenthread.sleep(0) return self
def _run_loop(self, kind, event, idle_for_func, initial_delay=None, stop_on_exception=True): func_name = reflection.get_callable_name(self.f) func = self.f if stop_on_exception else _safe_wrapper(self.f, kind, func_name) if initial_delay: greenthread.sleep(initial_delay) try: watch = timeutils.StopWatch() while self._running: watch.restart() result = func(*self.args, **self.kw) watch.stop() if not self._running: break idle = idle_for_func(result, watch.elapsed()) LOG.trace('%(kind)s %(func_name)r sleeping ' 'for %(idle).02f seconds', {'func_name': func_name, 'idle': idle, 'kind': kind}) greenthread.sleep(idle) except LoopingCallDone as e: event.send(e.retvalue) except Exception: exc_info = sys.exc_info() try: LOG.error(_LE('%(kind)s %(func_name)r failed'), {'kind': kind, 'func_name': func_name}, exc_info=exc_info) event.send_exception(*exc_info) finally: del exc_info return else: event.send(True)
def _server_update_thread(self, context, id, zone, image, flavor, gateway_name=None, device_mapping=None, nics=None): try_count = CONF.instance_try while try_count > 0: try_count -= 1 server = agent.server_get(self.request_get(context, zone), id) if server and server.status.lower() in (vm_states.ACTIVE, vm_states.ERROR): _server = self._format_server(server, nics=nics) if server.status.lower() == vm_states.ACTIVE: image_obj = self.db.image_get(context, image) if gateway_name is None: gateway_name = self.available_gateway(context, _server['host']) firewall = {'instance_id': id, 'hostname': gateway_name} for port in image_obj.get('property', []): gateway = self.db.gateway_count(context, gateway_name) firewall['gateway_port'] = gateway.count firewall['service_port'] = port self._firewall_create(context, firewall) self.resource_notify(context, 'instance.create', id, zone=zone, flavor=flavor) if device_mapping is not None: self.resource_notify(context, 'disk.create', id, device_mapping=device_mapping) self.db.server_update(context, id, _server) break else: greenthread.sleep(2)
def execute(cmd, root_helper=None, process_input=None, addl_env=None, check_exit_code=True, return_stderr=False, log_fail_as_error=True, extra_ok_codes=None): try: obj, cmd = create_process(cmd, root_helper=root_helper, addl_env=addl_env) _stdout, _stderr = (process_input and obj.communicate(process_input) or obj.communicate()) obj.stdin.close() m = "\nCommand: %s\nExit code: %s\nStdout: %r\n, Stderr: %r" % (cmd, obj.returncode, _stdout, _stderr) extra_ok_codes = extra_ok_codes or [] if obj.returncode and obj.returncode in extra_ok_codes: obj.returncode = None if obj.returncode and log_fail_as_error: LOG.error(m) else: LOG.debug(m) if obj.returncode and check_exit_code: raise RuntimeError(m) finally: # NOTE(termie): this appears to be necessary to let the subprocess # call clean something up in between calls, without # it two execute calls in a row hangs the second one greenthread.sleep(0) return return_stderr and (_stdout, _stderr) or _stdout
def _inner(): """Task performing the file read-write operation.""" self._running = True while self._running: try: data = self._input_file.read(rw_handles.READ_CHUNKSIZE) if not data: LOG.debug("File read-write task is done.") self.stop() self._done.send(True) self._output_file.write(data) # update lease progress if applicable if hasattr(self._input_file, "update_progress"): self._input_file.update_progress() if hasattr(self._output_file, "update_progress"): self._output_file.update_progress() greenthread.sleep(FILE_READ_WRITE_TASK_SLEEP_TIME) except Exception as excep: self.stop() excep_msg = _("Error occurred during file read-write " "task.") LOG.exception(excep_msg) excep = exceptions.ImageTransferException(excep_msg, excep) self._done.send_exception(excep)
def wait_until_dvs_portgroup_available(session, vm_ref, pg_name, wait_time): """Wait until a portgroup is available on a DVS.""" time_elapsed = 0 while time_elapsed < wait_time: host = session._call_method(vim_util, "get_dynamic_property", vm_ref, "VirtualMachine", "runtime.host") vm_networks_prop = session._call_method(vim_util, "get_dynamic_property", host, "HostSystem", "network") if vm_networks_prop: vm_networks = vm_networks_prop.ManagedObjectReference for network in vm_networks: if network._type == 'DistributedVirtualPortgroup': props = session._call_method(vim_util, "get_dynamic_property", network, network._type, "config") if props.name in pg_name: LOG.debug("DistributedVirtualPortgroup created %s ", pg_name) return True LOG.debug("Portgroup %s not created yet. Retrying again " "after 5 seconds", pg_name) greenthread.sleep(5) time_elapsed += 5 if time_elapsed >= wait_time: LOG.debug("Portgroup %(pg)s not created within %(secs)s secs", {'pg': pg_name, 'secs': wait_time}) return False
def tunnel_exists(self, name=None, folder='Common'): """ Does tunnel exist? """ folder = str(folder).replace('/', '') request_url = self.bigip.icr_url + '/net/tunnels/tunnel/' request_url += '~' + folder + '~' + name for retry in range(2): if retry > 0: Log.error('VXLAN', 'Attempting REST retry after 401 error') response = self.bigip.icr_session.get( request_url, timeout=const.CONNECTION_TIMEOUT) if response.status_code < 400: return True elif response.status_code == 401: if retry < 1: greenthread.sleep(1) continue else: Log.error('VXLAN', response.text) raise exceptions.VXLANQueryException(response.text) elif response.status_code != 404: Log.error('VXLAN', response.text) raise exceptions.VXLANQueryException(response.text) else: return False return False
def consume(self, limit=None): while True: item = self.get(self.current_queue) if item: self.current_callback(item) raise StopIteration() greenthread.sleep(0)
def test_run_instances(self): if FLAGS.connection_type == 'fake': LOG.debug(_("Can't test instances without a real virtual env.")) return image_id = FLAGS.default_image instance_type = FLAGS.default_instance_type max_count = 1 kwargs = {'image_id': image_id, 'instance_type': instance_type, 'max_count': max_count} rv = self.cloud.run_instances(self.context, **kwargs) # TODO: check for proper response instance_id = rv['reservationSet'][0].keys()[0] instance = rv['reservationSet'][0][instance_id][0] LOG.debug(_("Need to watch instance %s until it's running..."), instance['instance_id']) while True: greenthread.sleep(1) info = self.cloud._get_instance(instance['instance_id']) LOG.debug(info['state']) if info['state'] == power_state.RUNNING: break self.assert_(rv) if FLAGS.connection_type != 'fake': time.sleep(45) # Should use boto for polling here for reservations in rv['reservationSet']: # for res_id in reservations.keys(): # LOG.debug(reservations[res_id]) # for instance in reservations[res_id]: for instance in reservations[reservations.keys()[0]]: instance_id = instance['instance_id'] LOG.debug(_("Terminating instance %s"), instance_id) rv = self.compute.terminate_instance(instance_id)
def execute(cmd, root_helper=None, process_input=None, addl_env=None, check_exit_code=True, return_stderr=False): if root_helper: cmd = shlex.split(root_helper) + cmd cmd = map(str, cmd) LOG.debug(_("Running command: %s"), cmd) env = os.environ.copy() if addl_env: env.update(addl_env) try: obj = utils.subprocess_popen(cmd, shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) _stdout, _stderr = (process_input and obj.communicate(process_input) or obj.communicate()) obj.stdin.close() m = _("\nCommand: %(cmd)s\nExit code: %(code)s\nStdout: %(stdout)r\n" "Stderr: %(stderr)r") % {'cmd': cmd, 'code': obj.returncode, 'stdout': _stdout, 'stderr': _stderr} LOG.debug(m) if obj.returncode and check_exit_code: raise RuntimeError(m) finally: # NOTE(termie): this appears to be necessary to let the subprocess # call clean something up in between calls, without # it two execute calls in a row hangs the second one greenthread.sleep(0) return return_stderr and (_stdout, _stderr) or _stdout
def _run_ssh(self, cmd_list, check_exit=True, attempts=2): self.check_ssh_injection(cmd_list) command = ' '. join(cmd_list) try: total_attempts = attempts while attempts > 0: attempts -= 1 try: return self._ssh_execute(command, check_exit_code=check_exit) except Exception as e: self._logger.error(e) if attempts > 0: greenthread.sleep(randint(20, 500) / 100.0) if not self.ssh.get_transport().is_alive(): self._create_ssh() msg = ("SSH Command failed after '%(total_attempts)r' " "attempts : '%(command)s'" % {'total_attempts': total_attempts, 'command': command}) self._logger.error(msg) raise exceptions.SSHException(message=msg) except Exception: self._logger.error("Error running ssh command: %s" % command) raise
def execute(cmd, root_helper=None, process_input=None, addl_env=None, check_exit_code=True, return_stderr=False): try: obj, cmd = create_process(cmd, root_helper=root_helper, addl_env=addl_env) _stdout, _stderr = (process_input and obj.communicate(process_input) or obj.communicate()) obj.stdin.close() m = _("\nCommand: %(cmd)s\nExit code: %(code)s\nStdout: %(stdout)r\n" "Stderr: %(stderr)r") % {'cmd': cmd, 'code': obj.returncode, 'stdout': _stdout, 'stderr': _stderr} if obj.returncode: LOG.error(m) if check_exit_code: raise RuntimeError(m) else: LOG.debug(m) finally: # NOTE(termie): this appears to be necessary to let the subprocess # call clean something up in between calls, without # it two execute calls in a row hangs the second one greenthread.sleep(0) return return_stderr and (_stdout, _stderr) or _stdout
def _bind_security_rules(self, port, sg_rules): acls = port.associators(wmi_result_class=self._PORT_EXT_ACL_SET_DATA) # Add the ACL only if it don't already exist. add_acls = [] weights = self._get_new_weights(sg_rules, acls) index = 0 for sg_rule in sg_rules: filtered_acls = self._filter_security_acls(sg_rule, acls) if filtered_acls: # ACL already exists. continue acl = self._create_security_acl(sg_rule, weights[index]) add_acls.append(acl) index += 1 # append sg_rule the acls list, to make sure that the same rule # is not processed twice. acls.append(sg_rule) # yielding to other threads that must run (like state reporting) greenthread.sleep() if add_acls: self._jobutils.add_multiple_virt_features(add_acls, port)
def test_proxycallback_handles_exceptions(self): """Make sure exceptions unpacking messages don't cause hangs.""" orig_unpack = rpc_amqp.unpack_context info = {'unpacked': False} def fake_unpack_context(*args, **kwargs): info['unpacked'] = True raise test.TestingException('moo') self.stubs.Set(rpc_amqp, 'unpack_context', fake_unpack_context) value = 41 self.rpc.cast(FLAGS, self.context, 'test', {"method": "echo", "args": {"value": value}}) # Wait for the cast to complete. for x in xrange(50): if info['unpacked']: break greenthread.sleep(0.1) else: self.fail("Timeout waiting for message to be consued") # Now see if we get a response even though we raised an # exception for the cast above. self.stubs.Set(rpc_amqp, 'unpack_context', orig_unpack) value = 42 result = self.rpc.call(FLAGS, self.context, 'test', {"method": "echo", "args": {"value": value}}) self.assertEqual(value, result)
def _run_ssh(self, command, check_exit=True, attempts=1): if not self.sshpool: self.sshpool = utils.SSHPool(self.config.san_ip, self.config.san_ssh_port, self.config.ssh_conn_timeout, self.config.san_login, password=self.config.san_password, privatekey= self.config.san_private_key, min_size= self.config.ssh_min_pool_conn, max_size= self.config.ssh_max_pool_conn) try: total_attempts = attempts with self.sshpool.item() as ssh: while attempts > 0: attempts -= 1 try: return self._ssh_execute(ssh, command, check_exit_code=check_exit) except Exception as e: LOG.error(e) greenthread.sleep(randint(20, 500) / 100.0) raise paramiko.SSHException(_("SSH Command failed after " "'%(total_attempts)r' attempts" ": '%(command)s'"), locals()) except Exception as e: LOG.error(_("Error running ssh command: %s") % command) raise e
def _await_port_status(self, context, port_id, ip_address): # TODO(yamahata): creating volume simultaneously # reduces creation time? # TODO(yamahata): eliminate dumb polling start = time.time() retries = CONF.port_allocate_retries if retries < 0: LOG.warn(_LW("Treating negative config value (%(retries)s) for " "'block_device_retries' as 0."), {'retries': retries}) # (1) treat negative config value as 0 # (2) the configured value is 0, one attempt should be made # (3) the configured value is > 0, then the total number attempts # is (retries + 1) attempts = 1 if retries >= 1: attempts = retries + 1 for attempt in range(1, attempts + 1): LOG.debug("Port id: %s finished being attached", port_id) exit_status = self._check_connect_sucess(ip_address) if exit_status: return attempt else: continue greenthread.sleep(CONF.port_allocate_retries_interval) # NOTE(harlowja): Should only happen if we ran out of attempts raise exception.PortNotattach(port_id=port_id, seconds=int(time.time() - start), attempts=attempts)
def unfilter_instance(self, instance, network_info): """Clear out the nwfilter rules.""" for vif in network_info: nic_id = vif['address'].replace(':', '') instance_filter_name = self._instance_filter_name(instance, nic_id) # nwfilters may be defined in a separate thread in the case # of libvirt non-blocking mode, so we wait for completion max_retry = CONF.live_migration_retry_count for cnt in range(max_retry): try: _nw = self._conn.nwfilterLookupByName(instance_filter_name) _nw.undefine() break except libvirt.libvirtError as e: if cnt == max_retry - 1: raise errcode = e.get_error_code() if errcode == libvirt.VIR_ERR_OPERATION_INVALID: # This happens when the instance filter is still in use # (ie. when the instance has not terminated properly) LOG.info('Failed to undefine network filter ' '%(name)s. Try %(cnt)d of %(max_retry)d.', {'name': instance_filter_name, 'cnt': cnt + 1, 'max_retry': max_retry}, instance=instance) greenthread.sleep(1) else: LOG.debug('The nwfilter(%s) is not found.', instance_filter_name, instance=instance) break
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: start = timeutils.utcnow() self.f(*self.args, **self.kw) end = timeutils.utcnow() if not self._running: break delay = interval - timeutils.delta_seconds(start, end) if delay <= 0: LOG.warn( _LW("task %(func_name)s run outlasted " "interval by %(delay)s sec"), {"func_name": repr(self.f), "delay": -delay}, ) greenthread.sleep(delay if delay > 0 else 0) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception: LOG.exception(_LE("in fixed duration looping call")) done.send_exception(*sys.exc_info()) return else: done.send(True)
def wait_creation(self, context, volume): volume_id = volume['id'] while True: volume = self.get(context, volume_id) if volume['status'] != 'creating': return greenthread.sleep(1)
def _ssh_execute(self, cmd_list, check_exit_code=True, attempts=1): """Execute cli with status update. Executes CLI commands such as cfgsave where status return is expected. """ utils.check_ssh_injection(cmd_list) command = ' '.join(cmd_list) if not self.sshpool: self.sshpool = ssh_utils.SSHPool(self.switch_ip, self.switch_port, None, self.switch_user, self.switch_pwd, min_size=1, max_size=5) stdin, stdout, stderr = None, None, None LOG.debug("Executing command via ssh: %s" % command) last_exception = None try: with self.sshpool.item() as ssh: while attempts > 0: attempts -= 1 try: stdin, stdout, stderr = ssh.exec_command(command) greenthread.sleep(random.randint(20, 500) / 100.0) stdin.write("%s\n" % ZoneConstant.YES) channel = stdout.channel exit_status = channel.recv_exit_status() LOG.debug("Exit Status from ssh:%s", exit_status) # exit_status == -1 if no exit code was returned if exit_status != -1: LOG.debug('Result was %s' % exit_status) if check_exit_code and exit_status != 0: raise processutils.ProcessExecutionError( exit_code=exit_status, stdout=stdout, stderr=stderr, cmd=command) else: return True else: return True except Exception as e: LOG.error(e) last_exception = e greenthread.sleep(random.randint(20, 500) / 100.0) LOG.debug("Handling error case after " "SSH:%s", last_exception) try: raise processutils.ProcessExecutionError( exit_code=last_exception.exit_code, stdout=last_exception.stdout, stderr=last_exception.stderr, cmd=last_exception.cmd) except AttributeError: raise processutils.ProcessExecutionError( exit_code=-1, stdout="", stderr="Error running SSH command", cmd=command) except Exception as e: with excutils.save_and_reraise_exception(): LOG.error(_LE("Error executing command via ssh: %s"), e) finally: if stdin: stdin.flush() stdin.close() if stdout: stdout.close() if stderr: stderr.close()
def f(id): for x in range(10): seen_threads.append(id) greenthread.sleep(0)
def execute(*cmd, **kwargs): """ Helper method to shell out and execute a command through subprocess with optional retry. :param cmd: Passed to subprocess.Popen. :type cmd: string :param process_input: Send to opened process. :type proces_input: string :param check_exit_code: Single bool, int, or list of allowed exit codes. Defaults to [0]. Raise :class:`ProcessExecutionError` unless program exits with one of these code. :type check_exit_code: boolean, int, or [int] :param delay_on_retry: True | False. Defaults to True. If set to True, wait a short amount of time before retrying. :type delay_on_retry: boolean :param attempts: How many times to retry cmd. :type attempts: int :param run_as_root: True | False. Defaults to False. If set to True, the command is prefixed by the command specified in the root_helper kwarg. :type run_as_root: boolean :param root_helper: command to prefix to commands called with run_as_root=True :type root_helper: string :param shell: whether or not there should be a shell used to execute this command. Defaults to false. :type shell: boolean :returns: (stdout, stderr) from process execution :raises: :class:`UnknownArgumentError` on receiving unknown arguments :raises: :class:`ProcessExecutionError` """ process_input = kwargs.pop('process_input', None) check_exit_code = kwargs.pop('check_exit_code', [0]) ignore_exit_code = False delay_on_retry = kwargs.pop('delay_on_retry', True) attempts = kwargs.pop('attempts', 1) run_as_root = kwargs.pop('run_as_root', False) root_helper = kwargs.pop('root_helper', '') shell = kwargs.pop('shell', False) if isinstance(check_exit_code, bool): ignore_exit_code = not check_exit_code check_exit_code = [0] elif isinstance(check_exit_code, int): check_exit_code = [check_exit_code] if kwargs: raise UnknownArgumentError(_('Got unknown keyword args ' 'to utils.execute: %r') % kwargs) if run_as_root and os.geteuid() != 0: if not root_helper: raise NoRootWrapSpecified( message=('Command requested root, but did not specify a root ' 'helper.')) cmd = shlex.split(root_helper) + list(cmd) cmd = [str(c) for c in cmd] while attempts > 0: attempts -= 1 try: LOG.debug(_('Running cmd (subprocess): %s'), ' '.join(cmd)) _PIPE = subprocess.PIPE # pylint: disable=E1101 if os.name == 'nt': preexec_fn = None close_fds = False else: preexec_fn = _subprocess_setup close_fds = True obj = subprocess.Popen(cmd, stdin=_PIPE, stdout=_PIPE, stderr=_PIPE, close_fds=close_fds, preexec_fn=preexec_fn, shell=shell, universal_newlines=True) result = None if process_input is not None: result = obj.communicate(process_input) else: result = obj.communicate() obj.stdin.close() # pylint: disable=E1101 _returncode = obj.returncode # pylint: disable=E1101 if _returncode: LOG.debug(_('Result was %s') % _returncode) if not ignore_exit_code and _returncode not in check_exit_code: (stdout, stderr) = result raise ProcessExecutionError(exit_code=_returncode, stdout=stdout, stderr=stderr, cmd=' '.join(cmd)) return result except ProcessExecutionError: if not attempts: raise else: LOG.debug(_('%r failed. Retrying.'), cmd) if delay_on_retry: greenthread.sleep(random.randint(20, 200) / 100.0) finally: # NOTE(termie): this appears to be necessary to let the subprocess # call clean something up in between calls, without # it two execute calls in a row hangs the second one greenthread.sleep(0)
def execute(*cmd, **kwargs): """Helper method to execute command with optional retry. If you add a run_as_root=True command, don't forget to add the corresponding filter to etc/sm_api/rootwrap.d ! :param cmd: Passed to subprocess.Popen. :param process_input: Send to opened process. :param check_exit_code: Single bool, int, or list of allowed exit codes. Defaults to [0]. Raise exception.ProcessExecutionError unless program exits with one of these code. :param delay_on_retry: True | False. Defaults to True. If set to True, wait a short amount of time before retrying. :param attempts: How many times to retry cmd. :param run_as_root: True | False. Defaults to False. If set to True, the command is run with rootwrap. :raises exception.SmApiException: on receiving unknown arguments :raises exception.ProcessExecutionError: :returns: a tuple, (stdout, stderr) from the spawned process, or None if the command fails. """ process_input = kwargs.pop('process_input', None) check_exit_code = kwargs.pop('check_exit_code', [0]) ignore_exit_code = False if isinstance(check_exit_code, bool): ignore_exit_code = not check_exit_code check_exit_code = [0] elif isinstance(check_exit_code, int): check_exit_code = [check_exit_code] delay_on_retry = kwargs.pop('delay_on_retry', True) attempts = kwargs.pop('attempts', 1) run_as_root = kwargs.pop('run_as_root', False) shell = kwargs.pop('shell', False) if len(kwargs): raise exception.SmApiException(_('Got unknown keyword args ' 'to utils.execute: %r') % kwargs) if run_as_root and os.geteuid() != 0: cmd = ['sudo', 'sm_api-rootwrap', CONF.rootwrap_config] + list(cmd) cmd = [str(c) for c in cmd] while attempts > 0: attempts -= 1 try: LOG.debug(_('Running cmd (subprocess): %s'), ' '.join(cmd)) _PIPE = subprocess.PIPE # pylint: disable=E1101 if os.name == 'nt': preexec_fn = None close_fds = False else: preexec_fn = _subprocess_setup close_fds = True obj = subprocess.Popen(cmd, stdin=_PIPE, stdout=_PIPE, stderr=_PIPE, close_fds=close_fds, preexec_fn=preexec_fn, shell=shell, universal_newlines=True) result = None if process_input is not None: result = obj.communicate(process_input) else: result = obj.communicate() obj.stdin.close() # pylint: disable=E1101 _returncode = obj.returncode # pylint: disable=E1101 LOG.debug(_('Result was %s') % _returncode) if not ignore_exit_code and _returncode not in check_exit_code: (stdout, stderr) = result raise exception.ProcessExecutionError( exit_code=_returncode, stdout=stdout, stderr=stderr, cmd=' '.join(cmd)) return result except exception.ProcessExecutionError: if not attempts: raise else: LOG.debug(_('%r failed. Retrying.'), cmd) if delay_on_retry: greenthread.sleep(random.randint(20, 200) / 100.0) finally: # NOTE(termie): this appears to be necessary to let the subprocess # call clean something up in between calls, without # it two execute calls in a row hangs the second one greenthread.sleep(0)
def execute(*cmd, **kwargs): """Helper method to shell out and execute a command through subprocess. Allows optional retry. :param cmd: Passed to subprocess.Popen. :type cmd: string :param process_input: Send to opened process. :type process_input: string :param env_variables: Environment variables and their values that will be set for the process. :type env_variables: dict :param check_exit_code: Single bool, int, or list of allowed exit codes. Defaults to [0]. Raise :class:`ProcessExecutionError` unless program exits with one of these code. :type check_exit_code: boolean, int, or [int] :param delay_on_retry: True | False. Defaults to True. If set to True, wait a short amount of time before retrying. :type delay_on_retry: boolean :param attempts: How many times to retry cmd. :type attempts: int :param run_as_root: True | False. Defaults to False. If set to True, the command is prefixed by the command specified in the root_helper kwarg. :type run_as_root: boolean :param root_helper: command to prefix to commands called with run_as_root=True :type root_helper: string :param shell: whether or not there should be a shell used to execute this command. Defaults to false. :type shell: boolean :param loglevel: log level for execute commands. :type loglevel: int. (Should be logging.DEBUG or logging.INFO) :returns: (stdout, stderr) from process execution :raises: :class:`UnknownArgumentError` on receiving unknown arguments :raises: :class:`ProcessExecutionError` """ process_input = kwargs.pop('process_input', None) env_variables = kwargs.pop('env_variables', None) check_exit_code = kwargs.pop('check_exit_code', [0]) ignore_exit_code = False delay_on_retry = kwargs.pop('delay_on_retry', True) attempts = kwargs.pop('attempts', 1) run_as_root = kwargs.pop('run_as_root', False) root_helper = kwargs.pop('root_helper', '') shell = kwargs.pop('shell', False) loglevel = kwargs.pop('loglevel', logging.DEBUG) if isinstance(check_exit_code, bool): ignore_exit_code = not check_exit_code check_exit_code = [0] elif isinstance(check_exit_code, int): check_exit_code = [check_exit_code] if kwargs: raise UnknownArgumentError(_('Got unknown keyword args: %r') % kwargs) if run_as_root and hasattr(os, 'geteuid') and os.geteuid() != 0: if not root_helper: raise NoRootWrapSpecified( message=_('Command requested root, but did not ' 'specify a root helper.')) cmd = shlex.split(root_helper) + list(cmd) cmd = map(str, cmd) sanitized_cmd = strutils.mask_password(' '.join(cmd)) while attempts > 0: attempts -= 1 try: LOG.log(loglevel, _('Running cmd (subprocess): %s'), sanitized_cmd) _PIPE = subprocess.PIPE # pylint: disable=E1101 if os.name == 'nt': preexec_fn = None close_fds = False else: preexec_fn = _subprocess_setup close_fds = True obj = subprocess.Popen(cmd, stdin=_PIPE, stdout=_PIPE, stderr=_PIPE, close_fds=close_fds, preexec_fn=preexec_fn, shell=shell, env=env_variables) result = None for _i in six.moves.range(20): # NOTE(russellb) 20 is an arbitrary number of retries to # prevent any chance of looping forever here. try: if process_input is not None: result = obj.communicate(process_input) else: result = obj.communicate() except OSError as e: if e.errno in (errno.EAGAIN, errno.EINTR): continue raise break obj.stdin.close() # pylint: disable=E1101 _returncode = obj.returncode # pylint: disable=E1101 LOG.log(loglevel, 'Result was %s' % _returncode) if not ignore_exit_code and _returncode not in check_exit_code: (stdout, stderr) = result sanitized_stdout = strutils.mask_password(stdout) sanitized_stderr = strutils.mask_password(stderr) raise ProcessExecutionError(exit_code=_returncode, stdout=sanitized_stdout, stderr=sanitized_stderr, cmd=sanitized_cmd) return result except ProcessExecutionError: if not attempts: raise else: LOG.log(loglevel, _('%r failed. Retrying.'), sanitized_cmd) if delay_on_retry: greenthread.sleep(random.randint(20, 200) / 100.0) finally: # NOTE(termie): this appears to be necessary to let the subprocess # call clean something up in between calls, without # it two execute calls in a row hangs the second one greenthread.sleep(0)
def execute(*cmd, **kwargs): """Helper method to execute command with optional retry. If you add a run_as_root=True command, don't forget to add the corresponding filter to etc/cinder/rootwrap.d ! :param cmd: Passed to subprocess.Popen. :param process_input: Send to opened process. :param check_exit_code: Single bool, int, or list of allowed exit codes. Defaults to [0]. Raise exception.ProcessExecutionError unless program exits with one of these code. :param delay_on_retry: True | False. Defaults to True. If set to True, wait a short amount of time before retrying. :param attempts: How many times to retry cmd. :param run_as_root: True | False. Defaults to False. If set to True, the command is prefixed by the command specified in the root_helper FLAG. :raises exception.Error: on receiving unknown arguments :raises exception.ProcessExecutionError: :returns: a tuple, (stdout, stderr) from the spawned process, or None if the command fails. """ process_input = kwargs.pop('process_input', None) check_exit_code = kwargs.pop('check_exit_code', [0]) ignore_exit_code = False if isinstance(check_exit_code, bool): ignore_exit_code = not check_exit_code check_exit_code = [0] elif isinstance(check_exit_code, int): check_exit_code = [check_exit_code] delay_on_retry = kwargs.pop('delay_on_retry', True) attempts = kwargs.pop('attempts', 1) run_as_root = kwargs.pop('run_as_root', False) shell = kwargs.pop('shell', False) if len(kwargs): raise exception.Error(_('Got unknown keyword args ' 'to utils.execute: %r') % kwargs) if run_as_root: if FLAGS.rootwrap_config is None or FLAGS.root_helper != 'sudo': deprecated.warn(_('The root_helper option (which lets you specify ' 'a root wrapper different from cinder-rootwrap, ' 'and defaults to using sudo) is now deprecated. ' 'You should use the rootwrap_config option ' 'instead.')) if (FLAGS.rootwrap_config is not None): cmd = ['sudo', 'cinder-rootwrap', FLAGS.rootwrap_config] + list(cmd) else: cmd = shlex.split(FLAGS.root_helper) + list(cmd) cmd = map(str, cmd) while attempts > 0: attempts -= 1 try: LOG.debug(_('Running cmd (subprocess): %s'), ' '.join(cmd)) _PIPE = subprocess.PIPE # pylint: disable=E1101 obj = subprocess.Popen(cmd, stdin=_PIPE, stdout=_PIPE, stderr=_PIPE, close_fds=True, preexec_fn=_subprocess_setup, shell=shell) result = None if process_input is not None: result = obj.communicate(process_input) else: result = obj.communicate() obj.stdin.close() # pylint: disable=E1101 _returncode = obj.returncode # pylint: disable=E1101 if _returncode: LOG.debug(_('Result was %s') % _returncode) if not ignore_exit_code and _returncode not in check_exit_code: (stdout, stderr) = result raise exception.ProcessExecutionError( exit_code=_returncode, stdout=stdout, stderr=stderr, cmd=' '.join(cmd)) return result except exception.ProcessExecutionError: if not attempts: raise else: LOG.debug(_('%r failed. Retrying.'), cmd) if delay_on_retry: greenthread.sleep(random.randint(20, 200) / 100.0) finally: # NOTE(termie): this appears to be necessary to let the subprocess # call clean something up in between calls, without # it two execute calls in a row hangs the second one greenthread.sleep(0)
def _create_or_update_agent(self, context, agent_state): """Registers new agent in the database or updates existing. Returns agent status from server point of view: alive, new or revived. It could be used by agent to do some sync with the server if needed. """ status = n_const.AGENT_ALIVE with context.session.begin(subtransactions=True): res_keys = ['agent_type', 'binary', 'host', 'topic'] res = dict((k, agent_state[k]) for k in res_keys) if 'availability_zone' in agent_state: res['availability_zone'] = agent_state['availability_zone'] configurations_dict = agent_state.get('configurations', {}) res['configurations'] = jsonutils.dumps(configurations_dict) resource_versions_dict = agent_state.get('resource_versions') if resource_versions_dict: res['resource_versions'] = jsonutils.dumps( resource_versions_dict) res['load'] = self._get_agent_load(agent_state) current_time = timeutils.utcnow() try: agent_db = self._get_agent_by_type_and_host( context, agent_state['agent_type'], agent_state['host']) if not agent_db.is_active: status = n_const.AGENT_REVIVED if 'resource_versions' not in agent_state: # updating agent_state with resource_versions taken # from db so that # _update_local_agent_resource_versions() will call # version_manager and bring it up to date agent_state['resource_versions'] = self._get_dict( agent_db, 'resource_versions', ignore_missing=True) res['heartbeat_timestamp'] = current_time if agent_state.get('start_flag'): res['started_at'] = current_time greenthread.sleep(0) self._log_heartbeat(agent_state, agent_db, configurations_dict) agent_db.update(res) event_type = events.AFTER_UPDATE except ext_agent.AgentNotFoundByTypeHost: greenthread.sleep(0) res['created_at'] = current_time res['started_at'] = current_time res['heartbeat_timestamp'] = current_time res['admin_state_up'] = cfg.CONF.enable_new_agents agent_db = Agent(**res) greenthread.sleep(0) context.session.add(agent_db) event_type = events.AFTER_CREATE self._log_heartbeat(agent_state, agent_db, configurations_dict) status = n_const.AGENT_NEW greenthread.sleep(0) registry.notify(resources.AGENT, event_type, self, context=context, host=agent_state['host'], plugin=self, agent=agent_state) return status
def restore(self, context, backup_id, volume_id=None): """ Make the RPC call to restore a volume backup. """ backup = self.get(context, backup_id) if backup['status'] != 'available': msg = _('Backup status must be available') raise exception.InvalidBackup(reason=msg) size = backup['size'] if size is None: msg = _('Backup to be restored has invalid size') raise exception.InvalidBackup(reason=msg) # Create a volume if none specified. If a volume is specified check # it is large enough for the backup if volume_id is None: name = 'restore_backup_%s' % backup_id description = 'auto-created_from_restore_from_swift' LOG.audit(_("Creating volume of %(size)s GB for restore of " "backup %(backup_id)s"), locals(), context=context) volume = self.volume_api.create(context, size, name, description) volume_id = volume['id'] while True: volume = self.volume_api.get(context, volume_id) if volume['status'] != 'creating': break greenthread.sleep(1) else: volume = self.volume_api.get(context, volume_id) volume_size = volume['size'] if volume_size < size: err = _('volume size %(volume_size)d is too small to restore ' 'backup of size %(size)d.') % locals() raise exception.InvalidVolume(reason=err) if volume['status'] != "available": msg = _('Volume to be restored to must be available') raise exception.InvalidVolume(reason=msg) LOG.debug('Checking backup size %s against volume size %s', size, volume['size']) if size > volume['size']: msg = _('Volume to be restored to is smaller ' 'than the backup to be restored') raise exception.InvalidVolume(reason=msg) LOG.audit(_("Overwriting volume %(volume_id)s with restore of " "backup %(backup_id)s"), locals(), context=context) # Setting the status here rather than setting at start and unrolling # for each error condition, it should be a very small window self.db.backup_update(context, backup_id, {'status': 'restoring'}) self.db.volume_update(context, volume_id, {'status': 'restoring-backup'}) self.backup_rpcapi.restore_backup(context, backup['host'], backup['id'], volume_id) d = { 'backup_id': backup_id, 'volume_id': volume_id, } return d
def _notification_reciever(self, env, start_response): try: len = env['CONTENT_LENGTH'] if len > 0: body = env['wsgi.input'].read(len) json_data = json.loads(body) self.rc_util.syslogout_ex( "RecoveryController_0009", syslog.LOG_INFO) msg = "Recieved notification : " + body self.rc_util.syslogout(msg, syslog.LOG_INFO) ret = self._check_json_param(json_data) if ret == 1: # Return Response start_response( '400 Bad Request', [('Content-Type', 'text/plain')]) return ['method _notification_reciever returned.\r\n'] # Insert notification into notification_list_db notification_list_dic = {} notification_list_dic = self._create_notification_list_db( json_data) # Return Response start_response('200 OK', [('Content-Type', 'text/plain')]) if notification_list_dic != {}: # Start thread if notification_list_dic.get("recover_by") == 0 and \ notification_list_dic.get("progress") == 0: th = threading.Thread( target=self.rc_worker.host_maintenance_mode, args=(notification_list_dic.get( "notification_id"), notification_list_dic.get( "notification_hostname"), False, )) th.start() # Sleep until nova recognizes the node down. self.rc_util.syslogout_ex( "RecoveryController_0029", syslog.LOG_INFO) dic = self.rc_config.get_value('recover_starter') node_err_wait = dic.get("node_err_wait") msg = "Sleeping " + node_err_wait \ + " sec before starting recovery thread," \ + " until nova recognizes the node down..." self.rc_util.syslogout(msg, syslog.LOG_INFO) greenthread.sleep(int(node_err_wait)) retry_mode = False th = threading.Thread( target=self.rc_starter.add_failed_host, args=(notification_list_dic.get( "notification_id"), notification_list_dic.get( "notification_hostname"), notification_list_dic.get( "notification_cluster_port"), retry_mode, )) th.start() elif notification_list_dic.get("recover_by") == 0 and \ notification_list_dic.get("progress") == 3: th = threading.Thread( target=self.rc_worker.host_maintenance_mode, args=(notification_list_dic.get( "notification_id"), notification_list_dic.get( "notification_hostname"), False, )) th.start() elif notification_list_dic.get("recover_by") == 1: retry_mode = False th = threading.Thread( target=self.rc_starter.add_failed_instance, args=( notification_list_dic.get("notification_id"), notification_list_dic.get( "notification_uuid"), retry_mode, )) th.start() elif notification_list_dic.get("recover_by") == 2: th = threading.Thread( target=self.rc_worker.host_maintenance_mode, args=( notification_list_dic.get("notification_id"), notification_list_dic.get( "notification_hostname"), True, )) th.start() else: self.rc_util.syslogout_ex( "RecoveryController_0010", syslog.LOG_INFO) self.rc_util.syslogout( "Column \"recover_by\" \ on notification_list DB is invalid value.", syslog.LOG_INFO) except MySQLdb.Error: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout_ex( "RecoveryController_0011", syslog.LOG_ERR) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) start_response( '500 Internal Server Error', [('Content-Type', 'text/plain')]) except KeyError: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout_ex( "RecoveryController_0012", syslog.LOG_ERR) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) start_response( '500 Internal Server Error', [('Content-Type', 'text/plain')]) except: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout_ex( "RecoveryController_0013", syslog.LOG_ERR) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) start_response( '500 Internal Server Error', [('Content-Type', 'text/plain')]) return ['method _notification_reciever returned.\r\n']
def restore(self, context, backup_id, volume_id=None, name=None): """Make the RPC call to restore a volume backup.""" check_policy(context, 'restore') backup = self.get(context, backup_id) if backup['status'] != fields.BackupStatus.AVAILABLE: msg = _('Backup status must be available') raise exception.InvalidBackup(reason=msg) size = backup['size'] if size is None: msg = _('Backup to be restored has invalid size') raise exception.InvalidBackup(reason=msg) # Create a volume if none specified. If a volume is specified check # it is large enough for the backup if volume_id is None: if name is None: name = 'restore_backup_%s' % backup_id description = 'auto-created_from_restore_from_backup' LOG.info( _LI("Creating volume of %(size)s GB for restore of " "backup %(backup_id)s."), { 'size': size, 'backup_id': backup_id }) volume = self.volume_api.create(context, size, name, description) volume_id = volume['id'] while True: volume = self.volume_api.get(context, volume_id) if volume['status'] != 'creating': break greenthread.sleep(1) else: volume = self.volume_api.get(context, volume_id) if volume['status'] != "available": msg = _('Volume to be restored to must be available') raise exception.InvalidVolume(reason=msg) LOG.debug('Checking backup size %(bs)s against volume size %(vs)s', { 'bs': size, 'vs': volume['size'] }) if size > volume['size']: msg = (_('volume size %(volume_size)d is too small to restore ' 'backup of size %(size)d.') % { 'volume_size': volume['size'], 'size': size }) raise exception.InvalidVolume(reason=msg) LOG.info( _LI("Overwriting volume %(volume_id)s with restore of " "backup %(backup_id)s"), { 'volume_id': volume_id, 'backup_id': backup_id }) # Setting the status here rather than setting at start and unrolling # for each error condition, it should be a very small window backup.host = self._get_available_backup_service_host( backup.host, backup.availability_zone) backup.status = fields.BackupStatus.RESTORING backup.restore_volume_id = volume.id backup.save() self.db.volume_update(context, volume_id, {'status': 'restoring-backup'}) self.backup_rpcapi.restore_backup(context, backup.host, backup, volume_id) d = { 'backup_id': backup_id, 'volume_id': volume_id, 'volume_name': volume['display_name'], } return d
def _ssh_execute(self, cmd_list, check_exit_code=True, attempts=1): """Execute cli with status update. Executes CLI commands where status return is expected. cmd_list is a list of commands, where each command is itself a list of parameters. We use utils.check_ssh_injection to check each command, but then join then with " ; " to form a single command. """ # Check that each command is secure for cmd in cmd_list: utils.check_ssh_injection(cmd) # Combine into a single command. command = ' ; '.join(map(lambda x: ' '.join(x), cmd_list)) if not self.sshpool: self.sshpool = ssh_utils.SSHPool(self.switch_ip, self.switch_port, None, self.switch_user, self.switch_pwd, min_size=1, max_size=5) stdin, stdout, stderr = None, None, None LOG.debug("Executing command via ssh: %s", command) last_exception = None try: with self.sshpool.item() as ssh: while attempts > 0: attempts -= 1 try: stdin, stdout, stderr = ssh.exec_command(command) greenthread.sleep(random.randint(20, 500) / 100.0) channel = stdout.channel exit_status = channel.recv_exit_status() LOG.debug("Exit Status from ssh:%s", exit_status) # exit_status == -1 if no exit code was returned if exit_status != -1: LOG.debug('Result was %s', exit_status) if check_exit_code and exit_status != 0: raise processutils.ProcessExecutionError( exit_code=exit_status, stdout=stdout, stderr=stderr, cmd=command) else: return True else: return True except Exception as e: msg = _("Exception: %s") % six.text_type(e) LOG.error(msg) last_exception = e greenthread.sleep(random.randint(20, 500) / 100.0) LOG.debug("Handling error case after SSH:%s", last_exception) try: raise processutils.ProcessExecutionError( exit_code=last_exception.exit_code, stdout=last_exception.stdout, stderr=last_exception.stderr, cmd=last_exception.cmd) except AttributeError: raise processutils.ProcessExecutionError( exit_code=-1, stdout="", stderr="Error running SSH command", cmd=command) except Exception as e: with excutils.save_and_reraise_exception(): msg = (_("Error executing command via ssh: %s") % six.text_type(e)) LOG.error(msg) finally: if stdin: stdin.flush() stdin.close() if stdout: stdout.close() if stderr: stderr.close()
def restore(self, context, backup_id, volume_id=None): """Make the RPC call to restore a volume backup.""" check_policy(context, 'restore') backup = self.get(context, backup_id) if backup['status'] != 'available': msg = _('Backup status must be available') raise exception.InvalidBackup(reason=msg) size = backup['size'] if size is None: msg = _('Backup to be restored has invalid size') raise exception.InvalidBackup(reason=msg) # Create a volume if none specified. If a volume is specified check # it is large enough for the backup if volume_id is None: name = 'restore_backup_%s' % backup_id description = 'auto-created_from_restore_from_backup' LOG.info(_LI("Creating volume of %(size)s GB for restore of " "backup %(backup_id)s"), { 'size': size, 'backup_id': backup_id }, context=context) volume = self.volume_api.create(context, size, name, description) volume_id = volume['id'] while True: volume = self.volume_api.get(context, volume_id) if volume['status'] != 'creating': break greenthread.sleep(1) else: volume = self.volume_api.get(context, volume_id) if volume['status'] not in ["available", "in-use"]: msg = (_('Volume to be backed up must be available ' 'or in-use, but the current status is "%s".') % volume['status']) raise exception.InvalidVolume(reason=msg) elif volume['status'] in ["in-use"]: for attachment in volume['volume_attachment']: instance_uuid = attachment['instance_uuid'] instance = nova.API().get_server(context, instance_uuid) if instance.status not in ['SHUTOFF']: msg = (_('Volume to be backed up can be in-use, but the ' 'attached vm should in poweroff status, now vm ' 'status is "%s".') % instance.status) raise exception.InvalidVolume(reason=msg) # record volume status in the display_description self.db.volume_update(context, volume_id, {'display_description': volume['status']}) LOG.debug('Checking backup size %(bs)s against volume size %(vs)s', { 'bs': size, 'vs': volume['size'] }) # backup size is in GB if size > volume['size'] * 1024: msg = (_('volume size %(volume_size)d GB is too small to restore ' 'backup of size %(size)d MB.') % { 'volume_size': volume['size'], 'size': size }) raise exception.InvalidVolume(reason=msg) LOG.info(_LI("Overwriting volume %(volume_id)s with restore of " "backup %(backup_id)s"), { 'volume_id': volume_id, 'backup_id': backup_id }, context=context) # Setting the status here rather than setting at start and unrolling # for each error condition, it should be a very small window self.db.backup_update(context, backup_id, {'status': 'restoring'}) self.db.volume_update(context, volume_id, {'status': 'restoring-backup'}) volume_host = volume_utils.extract_host(volume['host'], 'host') self.backup_rpcapi.restore_backup(context, volume_host, backup['id'], volume_id) d = { 'backup_id': backup_id, 'volume_id': volume_id, } return d
def restore(self, context, backup_id, volume_id=None, availability_zone=None, description=None): """Make the RPC call to restore a volume backup.""" check_policy(context, 'restore') backup = self.get(context, backup_id) if backup['status'] != 'available': msg = _('Backup status must be available') raise exception.InvalidBackup(reason=msg) size = backup['size'] if size is None: msg = _('Backup to be restored has invalid size') raise exception.InvalidBackup(reason=msg) # Create a volume if none specified. If a volume is specified check # it is large enough for the backup if volume_id is None: name = 'restore_backup_%s' % backup_id description = 'auto-created_from_restore_from_backup' LOG.info(_("Creating volume of %(size)s GB for restore of " "backup %(backup_id)s"), { 'size': size, 'backup_id': backup_id }, context=context) # code begin by luobin # TODO: specify the target volume's volume_type volume = self.volume_api.create( context, size, name, description, availability_zone=availability_zone) # code end by luobin volume_id = volume['id'] while True: volume = self.volume_api.get(context, volume_id) if volume['status'] != 'creating': break greenthread.sleep(1) else: volume = self.volume_api.get(context, volume_id) if volume['status'] != "available": msg = _('Volume to be restored to must be available') raise exception.InvalidVolume(reason=msg) LOG.debug('Checking backup size %s against volume size %s', size, volume['size']) if size > volume['size']: msg = (_('volume size %(volume_size)d is too small to restore ' 'backup of size %(size)d.') % { 'volume_size': volume['size'], 'size': size }) raise exception.InvalidVolume(reason=msg) LOG.info(_("Overwriting volume %(volume_id)s with restore of " "backup %(backup_id)s"), { 'volume_id': volume_id, 'backup_id': backup_id }, context=context) # Setting the status here rather than setting at start and unrolling # for each error condition, it should be a very small window self.db.backup_update(context, backup_id, {'status': 'restoring'}) self.db.volume_update(context, volume_id, {'status': 'restoring-backup'}) volume_host = volume_utils.extract_host(volume['host'], 'host') self.backup_rpcapi.restore_backup(context, volume_host, backup['id'], volume_id) d = { 'backup_id': backup_id, 'volume_id': volume_id, } return d
def masakari(self): """ RecoveryController class main processing: This processing checks the VM list table of DB. If an unprocessed VM exists, and start thread to execute the recovery process. Then, the processing starts the wsgi server and waits for the notification. """ try: self.rc_util.syslogout_ex( "RecoveryController_0004", syslog.LOG_INFO) self.rc_util.syslogout( "masakari START.", syslog.LOG_INFO) conn = None cursor = None # Get database session conn, cursor = self.rc_util_db.connect_database() self._update_old_records_notification_list(conn, cursor) result = self._find_reprocessing_records_notification_list(conn, cursor) self.rc_util_db.disconnect_database(conn, cursor) preprocessing_count = len(result) if preprocessing_count > 0: for row in result: if row.get("recover_by") == 0: # node recovery event th = threading.Thread( target=self.rc_worker.host_maintenance_mode, args=(row.get( "notification_id"), row.get( "notification_hostname"), False, )) th.start() # Sleep until updating nova-compute service status down. self.rc_util.syslogout_ex( "RecoveryController_0035", syslog.LOG_INFO) dic = self.rc_config.get_value('recover_starter') node_err_wait = dic.get("node_err_wait") msg = "Sleeping " + node_err_wait \ + " sec before starting node recovery thread," \ + " until updateing nova-compute service status." self.rc_util.syslogout(msg, syslog.LOG_INFO) greenthread.sleep(int(node_err_wait)) # Start add_failed_host thread #TODO(sampath): #Avoid create thread here, #insted call rc_starter.add_failed_host retry_mode = True th = threading.Thread( target=self.rc_starter.add_failed_host, args=(row.get("notification_id"), row.get("notification_hostname"), row.get("notification_cluster_port"), retry_mode, )) th.start() elif row.get("recover_by") == 1: # instance recovery event #TODO(sampath): #Avoid create thread here, #insted call rc_starter.add_failed_instance th = threading.Thread( target=self.rc_starter.add_failed_instance, args=(row.get("notification_id"), row.get( "notification_uuid"), )) th.start() else: # maintenance mode event th = threading.Thread( target=self.rc_worker.host_maintenance_mode, args=(row.get("notification_id"), row.get( "notification_hostname"), True, )) th.start() # Start handle_pending_instances thread #TODO(sampath): #Avoid create thread here, #insted call rc_starter.handle_pending_instances() th = threading.Thread( target=self.rc_starter.handle_pending_instances) th.start() # Start reciever process for notification conf_wsgi_dic = self.rc_config.get_value('wsgi') wsgi.server( eventlet.listen(('', int(conf_wsgi_dic['server_port']))), self._notification_reciever) except MySQLdb.Error: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout_ex( "RecoveryController_0005", syslog.LOG_ERR) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) sys.exit() except KeyError: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout_ex( "RecoveryController_0006", syslog.LOG_ERR) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) sys.exit() except: error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout_ex( "RecoveryController_0007", syslog.LOG_ERR) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) sys.exit()
def func(): greenthread.spawn_after_local(0.1, self.lst.pop) greenthread.sleep(0.2)
def create_or_update_agent(self, context, agent_state, agent_timestamp=None): """Registers new agent in the database or updates existing. Returns tuple of agent status and state. Status is from server point of view: alive, new or revived. It could be used by agent to do some sync with the server if needed. """ status = agent_consts.AGENT_ALIVE with db_api.CONTEXT_WRITER.using(context): res_keys = ['agent_type', 'binary', 'host', 'topic'] res = dict((k, agent_state[k]) for k in res_keys) if 'availability_zone' in agent_state: res['availability_zone'] = agent_state['availability_zone'] configurations_dict = agent_state.get('configurations', {}) res['configurations'] = jsonutils.dumps(configurations_dict) resource_versions_dict = agent_state.get('resource_versions') if resource_versions_dict: res['resource_versions'] = jsonutils.dumps( resource_versions_dict) res['load'] = self._get_agent_load(agent_state) current_time = timeutils.utcnow() try: agent = self._get_agent_by_type_and_host( context, agent_state['agent_type'], agent_state['host']) agent_state_orig = copy.deepcopy(agent_state) agent_state_previous = copy.deepcopy(agent) if not agent.is_active: status = agent_consts.AGENT_REVIVED if 'resource_versions' not in agent_state: # updating agent_state with resource_versions taken # from db so that # _update_local_agent_resource_versions() will call # version_manager and bring it up to date agent_state['resource_versions'] = self._get_dict( agent, 'resource_versions', ignore_missing=True) res['heartbeat_timestamp'] = current_time if agent_state.get('start_flag'): res['started_at'] = current_time greenthread.sleep(0) self._log_heartbeat(agent_state, agent, configurations_dict, agent_timestamp) agent.update_fields(res) agent.update() event_type = events.AFTER_UPDATE except agent_exc.AgentNotFoundByTypeHost: agent_state_orig = None agent_state_previous = None greenthread.sleep(0) res['created_at'] = current_time res['started_at'] = current_time res['heartbeat_timestamp'] = current_time res['admin_state_up'] = cfg.CONF.enable_new_agents agent = agent_obj.Agent(context=context, **res) greenthread.sleep(0) agent.create() event_type = events.AFTER_CREATE self._log_heartbeat(agent_state, agent, configurations_dict, agent_timestamp) status = agent_consts.AGENT_NEW greenthread.sleep(0) agent_state['agent_status'] = status agent_state['admin_state_up'] = agent.admin_state_up agent_state['id'] = agent.id registry.publish(resources.AGENT, event_type, self, payload=events.DBEventPayload( context=context, metadata={ 'host': agent_state['host'], 'plugin': self, 'status': status }, states=(agent_state_orig, agent_state_previous), desired_state=agent_state, resource_id=agent.id )) return status, agent_state
def test_cancel_already_started(self): gt = greenthread.spawn_after(0, waiter, 22) greenthread.sleep(0) gt.cancel() self.assertEqual(gt.wait(), 22)
def test_n(self): gt = greenthread.spawn_n(passthru, 2, b=3) assert not gt.dead greenthread.sleep(0) assert gt.dead self.assertEqual(_g_results, [((2, ), {'b': 3})])
def wait(): while True: greenthread.sleep(5)
def test_kill_already_started(self): gt = greenthread.spawn_after(0, waiter, 22) greenthread.sleep(0) gt.kill() self.assert_dead(gt)
def _inner(): """Task performing the image write operation. This method performs image data transfer through an update call. After the update, it waits until the image state becomes 'active', 'killed' or unknown. If the final state is not 'active' an instance of ImageTransferException is thrown. :raises: ImageTransferException """ LOG.debug("Calling image service update on image: %(image)s " "with meta: %(meta)s", {'image': self._image_id, 'meta': self._image_meta}) try: self._image_service.update(self._context, self._image_id, self._image_meta, data=self._input_file) self._running = True while self._running: LOG.debug("Retrieving status of image: %s.", self._image_id) image_meta = self._image_service.show(self._context, self._image_id) image_status = image_meta.get('status') if image_status == 'active': self.stop() LOG.debug("Image: %s is now active.", self._image_id) self._done.send(True) elif image_status == 'killed': self.stop() excep_msg = (_("Image: %s is in killed state.") % self._image_id) LOG.error(excep_msg) excep = exceptions.ImageTransferException(excep_msg) self._done.send_exception(excep) elif image_status in ['saving', 'queued']: LOG.debug("Image: %(image)s is in %(state)s state; " "sleeping for %(sleep)d seconds.", {'image': self._image_id, 'state': image_status, 'sleep': IMAGE_SERVICE_POLL_INTERVAL}) greenthread.sleep(IMAGE_SERVICE_POLL_INTERVAL) else: self.stop() excep_msg = (_("Image: %(image)s is in unknown " "state: %(state)s.") % {'image': self._image_id, 'state': image_status}) LOG.error(excep_msg) excep = exceptions.ImageTransferException(excep_msg) self._done.send_exception(excep) except Exception as excep: self.stop() excep_msg = (_("Error occurred while writing image: %s") % self._image_id) LOG.exception(excep_msg) excep = exceptions.ImageTransferException(excep_msg, excep) self._done.send_exception(excep)
def waiter(a): greenthread.sleep(0.1) return a
def _result(task): if result_wait: greenthread.sleep(0)
def f(_id): with lockutils.lock('testlock2', 'test-', external=False): for x in range(10): seen_threads.append(_id) greenthread.sleep(0)
def _exec(task): if exec_wait: greenthread.sleep(0.01) return ts_const.TaskStatus.PENDING
def _exec(task): task.userdata['executing'] = True while not task.userdata['tested']: greenthread.sleep(0) task.userdata['executing'] = False return ts_const.TaskStatus.COMPLETED
def get_delay_server_twice(server_dict): for i in range(2): delay_ms = get_delay_server(server_dict) #if delay_ms == "Failed": # break greenthread.sleep(0)
def _status(task): greenthread.sleep(0.01) return ts_const.TaskStatus.PENDING
def _get_vm_param(self, uuid): try: # Initalize return values. conf_dic = self.rc_config.get_value('recover_starter') api_max_retry_cnt = conf_dic.get('api_max_retry_cnt') api_retry_interval = conf_dic.get('api_retry_interval') cnt = 0 while cnt < int(api_max_retry_cnt) + 1: # Call nova show API. rc, rbody = self.rc_util_api.do_instance_show(uuid) rbody = json.loads(rbody) if rc == '200': break elif rc == '500': if cnt == int(api_max_retry_cnt): raise EnvironmentError("Failed to nova show API.") else: self.rc_util.syslogout_ex( "RecoveryControllerWorker_0040", syslog.LOG_INFO) msg = "Retry nova show API." self.rc_util.syslogout(msg, syslog.LOG_INFO) greenthread.sleep(int(api_retry_interval)) else: raise EnvironmentError("Failed to nova show API.") cnt += 1 # Set return values. vm_info = rbody.get('server') except EnvironmentError: self.rc_util.syslogout_ex("RecoveryControllerWorker_0004", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) raise EnvironmentError except KeyError: self.rc_util.syslogout_ex("RecoveryControllerWorker_0005", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) raise KeyError except: self.rc_util.syslogout_ex("RecoveryControllerWorker_0006", syslog.LOG_ERR) error_type, error_value, traceback_ = sys.exc_info() tb_list = traceback.format_tb(traceback_) self.rc_util.syslogout(error_type, syslog.LOG_ERR) self.rc_util.syslogout(error_value, syslog.LOG_ERR) for tb in tb_list: self.rc_util.syslogout(tb, syslog.LOG_ERR) raise return vm_info
def _wait(self, query_fn, timeout=0): end = time.time() + timeout for interval in query_fn(): greenthread.sleep(interval) if timeout > 0 and time.time() >= end: raise v_exc.WaitTimeoutError()