def elapsed(self): if self._state == self._STOPPED: return max(0.0, float(timeutils.delta_seconds(self._started_at, self._stopped_at))) elif self._state == self._STARTED: return max(0.0, float(timeutils.delta_seconds(self._started_at, timeutils.utcnow()))) else: raise RuntimeError("Can not get the elapsed time of a stopwatch" " if it has not been started/stopped")
def get_interval(event_filter, period, groupby, aggregate): self.assertIsNotNone(event_filter.start) self.assertIsNotNone(event_filter.end) if event_filter.start > end or event_filter.end < start: return [] duration_start = max(event_filter.start, start) duration_end = min(event_filter.end, end) duration = timeutils.delta_seconds(duration_start, duration_end) return [ models.Statistics( unit='', min=0, max=0, avg=0, sum=0, count=0, period=None, period_start=None, period_end=None, duration=duration, duration_start=duration_start, duration_end=duration_end, groupby=None, ) ]
def _detach_volume(instance, volume_id): volume = cinder.get_volume(volume_id) try: LOG.debug("Detaching volume %s from instance %s" % ( volume_id, instance.instance_name)) nova.client().volumes.delete_server_volume(instance.instance_id, volume_id) except Exception: LOG.exception(_LE("Can't detach volume %s"), volume.id) detach_timeout = CONF.detach_volume_timeout LOG.debug("Waiting %d seconds to detach %s volume" % (detach_timeout, volume_id)) s_time = tu.utcnow() while tu.delta_seconds(s_time, tu.utcnow()) < detach_timeout: volume = cinder.get_volume(volume_id) if volume.status not in ['available', 'error']: context.sleep(2) else: LOG.debug("Volume %s has been detached" % volume_id) return else: LOG.warn(_LW("Can't detach volume %(volume)s. " "Current status of volume: %(status)s"), {'volume': volume_id, 'status': volume.status})
def convert_image(source, dest, out_format, run_as_root=True): """Convert image to other format.""" cmd = ('qemu-img', 'convert', '-O', out_format, source, dest) start_time = timeutils.utcnow() utils.execute(*cmd, run_as_root=run_as_root) duration = timeutils.delta_seconds(start_time, timeutils.utcnow()) # NOTE(jdg): use a default of 1, mostly for unit test, but in # some incredible event this is 0 (cirros image?) don't barf if duration < 1: duration = 1 fsz_mb = os.stat(source).st_size / units.Mi mbps = (fsz_mb / duration) msg = ("Image conversion details: src %(src)s, size %(sz).2f MB, " "duration %(duration).2f sec, destination %(dest)s") LOG.debug(msg % {"src": source, "sz": fsz_mb, "duration": duration, "dest": dest}) msg = _("Converted %(sz).2f MB image at %(mbps).2f MB/s") LOG.info(msg % {"sz": fsz_mb, "mbps": mbps})
def _inner(): if initial_delay: greenthread.sleep(initial_delay) try: while self._running: start = timeutils.utcnow() self.f(*self.args, **self.kw) end = timeutils.utcnow() if not self._running: break delay = interval - timeutils.delta_seconds(start, end) if delay <= 0: LOG.warn(_LW('task run outlasted interval by %s sec'), -delay) greenthread.sleep(delay if delay > 0 else 0) except LoopingCallDone as e: self.stop() done.send(e.retvalue) except Exception: LOG.exception(_LE('in fixed duration looping call')) done.send_exception(*sys.exc_info()) return else: done.send(True)
def _start_cloudera_manager(cluster): manager = pu.get_manager(cluster) with manager.remote() as r: cmd.start_cloudera_db(r) cmd.start_manager(r) timeout = 300 LOG.debug("Waiting %(timeout)s seconds for Manager to start : " % { 'timeout': timeout}) s_time = timeutils.utcnow() while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: try: conn = telnetlib.Telnet(manager.management_ip, CM_API_PORT) conn.close() break except IOError: context.sleep(2) else: message = _("Cloudera Manager failed to start in %(timeout)s minutes " "on node '%(node)s' of cluster '%(cluster)s'") % { 'timeout': timeout / 60, 'node': manager.management_ip, 'cluster': cluster.name} raise ex.HadoopProvisionError(message) LOG.info(_LI("Cloudera Manager has been started"))
def handle_sample(self, context, s): """Handle a sample, converting if necessary.""" LOG.debug(_('handling sample %s'), (s,)) key = s.name + s.resource_id prev = self.cache.get(key) timestamp = timeutils.parse_isotime(s.timestamp) self.cache[key] = (s.volume, timestamp) if prev: prev_volume = prev[0] prev_timestamp = prev[1] time_delta = timeutils.delta_seconds(prev_timestamp, timestamp) # we only allow negative deltas for noncumulative samples, whereas # for cumulative we assume that a reset has occurred in the interim # so that the current volume gives a lower bound on growth volume_delta = (s.volume - prev_volume if (prev_volume <= s.volume or s.type != sample.TYPE_CUMULATIVE) else s.volume) rate_of_change = ((1.0 * volume_delta / time_delta) if time_delta else 0.0) s = self._convert(s, rate_of_change) LOG.debug(_('converted to: %s'), (s,)) else: LOG.warn(_('dropping sample with no predecessor: %s'), (s,)) s = None return s
def _check_transient(self): pig_job_data = self.edp_info.read_pig_example_script() pig_lib_data = self.edp_info.read_pig_example_jar() self.edp_testing(job_type=utils_edp.JOB_TYPE_PIG, job_data_list=[{'pig': pig_job_data}], lib_data_list=[{'jar': pig_lib_data}]) # set timeout in seconds timeout = self.common_config.TRANSIENT_CLUSTER_TIMEOUT * 60 s_time = timeutils.utcnow() raise_failure = True # wait for cluster deleting while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: try: self.sahara.clusters.get(self.cluster_id) except sab.APIException as api_ex: if 'not found' in api_ex.message: raise_failure = False break time.sleep(2) if raise_failure: self.fail('Transient cluster has not been deleted within %s ' 'minutes.' % self.common_config.TRANSIENT_CLUSTER_TIMEOUT)
def _wait_on_task_execution(self): """Wait until all the tasks have finished execution and are in state of success or failure. """ start = timeutils.utcnow() # wait for maximum of 5 seconds while timeutils.delta_seconds(start, timeutils.utcnow()) < 5: wait = False # Verify that no task is in status of pending or processing path = "/v2/tasks" res, content = self.http.request(path, 'GET', headers=minimal_task_headers()) content_dict = json.loads(content) self.assertEqual(res.status, 200) res_tasks = content_dict['tasks'] if len(res_tasks) != 0: for task in res_tasks: if task['status'] in ('pending', 'processing'): wait = True break if wait: time.sleep(0.05) continue else: break
def oid_ts(oid): """Converts an ObjectId to a UNIX timestamp. :raises: TypeError if oid isn't an ObjectId """ try: return timeutils.delta_seconds(EPOCH, oid.generation_time) except AttributeError: raise TypeError(u'Expected ObjectId and got %s' % type(oid))
def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of api_models.Statistics instances. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. """ if groupby: for group in groupby: if group not in ['user_id', 'project_id', 'resource_id']: raise ceilometer.NotImplementedError('Unable to group by ' 'these fields') if not period: for res in self._make_stats_query(sample_filter, groupby, aggregate): if res.count: yield self._stats_result_to_model(res, 0, res.tsmin, res.tsmax, groupby, aggregate) return if not sample_filter.start or not sample_filter.end: res = self._make_stats_query(sample_filter, None, aggregate).first() if not res: # NOTE(liusheng):The 'res' may be NoneType, because no # sample has found with sample filter(s). return query = self._make_stats_query(sample_filter, groupby, aggregate) # HACK(jd) This is an awful method to compute stats by period, but # since we're trying to be SQL agnostic we have to write portable # code, so here it is, admire! We're going to do one request to get # stats by period. We would like to use GROUP BY, but there's no # portable way to manipulate timestamp in SQL, so we can't. for period_start, period_end in base.iter_period( sample_filter.start or res.tsmin, sample_filter.end or res.tsmax, period): q = query.filter(models.Sample.timestamp >= period_start) q = q.filter(models.Sample.timestamp < period_end) for r in q.all(): if r.count: yield self._stats_result_to_model( result=r, period=int(timeutils.delta_seconds(period_start, period_end)), period_start=period_start, period_end=period_end, groupby=groupby, aggregate=aggregate )
def _group_key(meter): # the method to define a key for groupby call key = {} for y in sort_keys: if y == 'timestamp' and period: key[y] = (timeutils.delta_seconds(period_start, meter[y]) // period) elif y != 'timestamp': key[y] = meter[y] return key
def assertTimestampEqual(self, first, second, msg=None): """Checks that two timestamps are equals. This relies on assertAlmostEqual to avoid rounding problem, and only checks up the first microsecond values. """ return self.assertAlmostEqual( timeutils.delta_seconds(first, second), 0.0, places=5)
def _get_cached_vswitch_existence(self, host): """Returns cached existence. Old and non-cached raise ValueError.""" entry = self.ivs_host_cache.get(host) if not entry: raise ValueError(_('No cache entry for host %s') % host) diff = timeutils.delta_seconds(entry['timestamp'], datetime.datetime.now()) if diff > CACHE_VSWITCH_TIME: self.ivs_host_cache.pop(host) raise ValueError(_('Expired cache entry for host %s') % host) return entry['exists']
def _record_poll_time(self): """Method records current time as the poll time. :return: time in seconds since the last poll time was recorded """ current_time = timeutils.utcnow() duration = None if hasattr(self, '_last_poll_time'): duration = timeutils.delta_seconds(self._last_poll_time, current_time) self._last_poll_time = current_time return duration
def inner(self, *args, **kwargs): LOG.debug("Entering %(cls)s.%(method)s", {'cls': self.__class__.__name__, 'method': func.__name__}) start = timeutils.utcnow() ret = func(self, *args, **kwargs) end = timeutils.utcnow() LOG.debug("Exiting %(cls)s.%(method)s. " "Spent %(duration)s sec. " "Return %(return)s", {'cls': self.__class__.__name__, 'duration': timeutils.delta_seconds(start, end), 'method': func.__name__, 'return': ret}) return ret
def _is_master(self, interval): """Determine if the current partition is the master.""" now = timeutils.utcnow() if timeutils.delta_seconds(self.start, now) < interval * 2: LOG.debug(_('%s still warming up') % self.this) return False is_master = True for partition, last_heard in self.reports.items(): delta = timeutils.delta_seconds(last_heard, now) LOG.debug(_('last heard from %(report)s %(delta)s seconds ago') % dict(report=partition, delta=delta)) if delta > interval * 2: del self.reports[partition] self._record_oldest(partition, stale=True) LOG.debug(_('%(this)s detects stale evaluator: %(stale)s') % dict(this=self.this, stale=partition)) self.presence_changed = True elif partition < self.this: is_master = False LOG.info(_('%(this)s sees older potential master: %(older)s') % dict(this=self.this, older=partition)) LOG.info(_('%(this)s is master?: %(is_master)s') % dict(this=self.this, is_master=is_master)) return is_master
def _stats_result_to_model(result, period, period_start, period_end, groupby, aggregate): stats_args = Connection._stats_result_aggregates(result, aggregate) stats_args['unit'] = result.unit duration = (timeutils.delta_seconds(result.tsmin, result.tsmax) if result.tsmin is not None and result.tsmax is not None else None) stats_args['duration'] = duration stats_args['duration_start'] = result.tsmin stats_args['duration_end'] = result.tsmax stats_args['period'] = period stats_args['period_start'] = period_start stats_args['period_end'] = period_end stats_args['groupby'] = (dict( (g, getattr(result, g)) for g in groupby) if groupby else None) return api_models.Statistics(**stats_args)
def copy_volume(srcstr, deststr, size_in_m, blocksize, sync=False, execute=utils.execute, ionice=None): # Use O_DIRECT to avoid thrashing the system buffer cache extra_flags = [] if check_for_odirect_support(srcstr, deststr, 'iflag=direct'): extra_flags.append('iflag=direct') if check_for_odirect_support(srcstr, deststr, 'oflag=direct'): extra_flags.append('oflag=direct') # If the volume is being unprovisioned then # request the data is persisted before returning, # so that it's not discarded from the cache. if sync and not extra_flags: extra_flags.append('conv=fdatasync') blocksize, count = _calculate_count(size_in_m, blocksize) cmd = ['dd', 'if=%s' % srcstr, 'of=%s' % deststr, 'count=%d' % count, 'bs=%s' % blocksize] cmd.extend(extra_flags) if ionice is not None: cmd = ['ionice', ionice] + cmd cgcmd = setup_blkio_cgroup(srcstr, deststr, CONF.volume_copy_bps_limit) if cgcmd: cmd = cgcmd + cmd # Perform the copy start_time = timeutils.utcnow() execute(*cmd, run_as_root=True) duration = timeutils.delta_seconds(start_time, timeutils.utcnow()) # NOTE(jdg): use a default of 1, mostly for unit test, but in # some incredible event this is 0 (cirros image?) don't barf if duration < 1: duration = 1 mbps = (size_in_m / duration) mesg = ("Volume copy details: src %(src)s, dest %(dest)s, " "size %(sz).2f MB, duration %(duration).2f sec") LOG.debug(mesg % {"src": srcstr, "dest": deststr, "sz": size_in_m, "duration": duration}) mesg = _("Volume copy %(size_in_m).2f MB at %(mbps).2f MB/s") LOG.info(mesg % {'size_in_m': size_in_m, 'mbps': mbps})
def cancel_job(job_execution_id): ctx = context.ctx() job_execution = conductor.job_execution_get(ctx, job_execution_id) if job_execution.info['status'] in edp.JOB_STATUSES_TERMINATED: return job_execution cluster = conductor.cluster_get(ctx, job_execution.cluster_id) if cluster is None: return job_execution engine = _get_job_engine(cluster, job_execution) if engine is not None: job_execution = conductor.job_execution_update( ctx, job_execution_id, {'info': {'status': edp.JOB_STATUS_TOBEKILLED}}) timeout = CONF.job_canceling_timeout s_time = timeutils.utcnow() while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: if job_execution.info['status'] not in edp.JOB_STATUSES_TERMINATED: try: job_info = engine.cancel_job(job_execution) except Exception as ex: job_info = None LOG.exception( _LE("Error during cancel of job execution %(job)s: " "%(error)s"), {'job': job_execution.id, 'error': ex}) if job_info is not None: job_execution = _write_job_status(job_execution, job_info) LOG.info(_LI("Job execution %s was canceled successfully"), job_execution.id) return job_execution context.sleep(3) job_execution = conductor.job_execution_get( ctx, job_execution_id) if not job_execution: LOG.info(_LI("Job execution %(job_exec_id)s was deleted. " "Canceling current operation."), {'job_exec_id': job_execution_id}) return job_execution else: LOG.info(_LI("Job execution status %(job)s: %(status)s"), {'job': job_execution.id, 'status': job_execution.info['status']}) return job_execution else: raise e.CancelingFailed(_('Job execution %s was not canceled') % job_execution.id)
def terminate_unneeded_clusters(self, ctx): LOG.debug('Terminating unneeded transient clusters') ctx = context.get_admin_context() context.set_ctx(ctx) for cluster in conductor.cluster_get_all(ctx, status='Active'): if not cluster.is_transient: continue jc = conductor.job_execution_count(ctx, end_time=None, cluster_id=cluster.id) if jc > 0: continue cluster_updated_at = timeutils.normalize_time( timeutils.parse_isotime(cluster.updated_at)) current_time = timeutils.utcnow() spacing = timeutils.delta_seconds(cluster_updated_at, current_time) if spacing < CONF.min_transient_cluster_active_time: continue if CONF.use_identity_api_v3: trusts.use_os_admin_auth_token(cluster) LOG.info(_LI('Terminating transient cluster %(cluster)s ' 'with id %(id)s'), {'cluster': cluster.name, 'id': cluster.id}) try: ops.terminate_cluster(cluster.id) except Exception as e: LOG.info(_LI('Failed to terminate transient cluster ' '%(cluster)s with id %(id)s: %(error)s.'), {'cluster': cluster.name, 'id': cluster.id, 'error': six.text_type(e)}) else: if cluster.status != 'AwaitingTermination': conductor.cluster_update( ctx, cluster, {'status': 'AwaitingTermination'}) context.set_ctx(None)
def convert_image(source, dest, out_format, bps_limit=None, run_as_root=True): """Convert image to other format.""" cmd = ('qemu-img', 'convert', '-O', out_format, source, dest) # Check whether O_DIRECT is supported and set '-t none' if it is # This is needed to ensure that all data hit the device before # it gets unmapped remotely from the host for some backends # Reference Bug: #1363016 # NOTE(jdg): In the case of file devices qemu does the # flush properly and more efficiently than would be done # setting O_DIRECT, so check for that and skip the # setting for non BLK devs if (utils.is_blk_device(dest) and volume_utils.check_for_odirect_support(source, dest, 'oflag=direct')): cmd = ('qemu-img', 'convert', '-t', 'none', '-O', out_format, source, dest) start_time = timeutils.utcnow() cgcmd = volume_utils.setup_blkio_cgroup(source, dest, bps_limit) if cgcmd: cmd = tuple(cgcmd) + cmd utils.execute(*cmd, run_as_root=run_as_root) duration = timeutils.delta_seconds(start_time, timeutils.utcnow()) # NOTE(jdg): use a default of 1, mostly for unit test, but in # some incredible event this is 0 (cirros image?) don't barf if duration < 1: duration = 1 fsz_mb = os.stat(source).st_size / units.Mi mbps = (fsz_mb / duration) msg = ("Image conversion details: src %(src)s, size %(sz).2f MB, " "duration %(duration).2f sec, destination %(dest)s") LOG.debug(msg % {"src": source, "sz": fsz_mb, "duration": duration, "dest": dest}) msg = _("Converted %(sz).2f MB image at %(mbps).2f MB/s") LOG.info(msg % {"sz": fsz_mb, "mbps": mbps})
def _check_notification_service(self): self.srv.start() notifier = messaging.get_notifier(self.transport, "compute.vagrant-precise") notifier.info(context.RequestContext(), 'compute.instance.create.end', TEST_NOTICE_PAYLOAD) start = timeutils.utcnow() while timeutils.delta_seconds(start, timeutils.utcnow()) < 600: if len(self.publisher.samples) >= self.expected_samples: break eventlet.sleep(0) self.srv.stop() resources = list(set(s.resource_id for s in self.publisher.samples)) self.assertEqual(self.expected_samples, len(self.publisher.samples)) self.assertEqual(["9f9d01b9-4a58-4271-9e27-398b21ab20d1"], resources)
def _check_decommission(cluster, instances, check_func, timeout): s_time = timeutils.utcnow() while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: statuses = check_func(cluster) dec_ok = True for instance in instances: if statuses[instance.fqdn()] != 'decommissioned': dec_ok = False if dec_ok: return else: context.sleep(5) else: ex.DecommissionError( _("Cannot finish decommission of cluster %(cluster)s in " "%(seconds)d seconds") % {"cluster": cluster, "seconds": timeout})
def iter_period(start, end, period): """Split a time from start to end in periods of a number of seconds. This function yields the (start, end) time for each period composing the time passed as argument. :param start: When the period set start. :param end: When the period end starts. :param period: The duration of the period. """ period_start = start increment = datetime.timedelta(seconds=period) for i in moves.xrange(int(math.ceil( timeutils.delta_seconds(start, end) / float(period)))): next_start = period_start + increment yield (period_start, next_start) period_start = next_start
def clear_volume(volume_size, volume_path, volume_clear=None, volume_clear_size=None, volume_clear_ionice=None): """Unprovision old volumes to prevent data leaking between users.""" if volume_clear is None: volume_clear = CONF.volume_clear if volume_clear_size is None: volume_clear_size = CONF.volume_clear_size if volume_clear_size == 0: volume_clear_size = volume_size if volume_clear_ionice is None: volume_clear_ionice = CONF.volume_clear_ionice LOG.info(_("Performing secure delete on volume: %s") % volume_path) if volume_clear == 'zero': return copy_volume('/dev/zero', volume_path, volume_clear_size, CONF.volume_dd_blocksize, sync=True, execute=utils.execute, ionice=volume_clear_ionice) elif volume_clear == 'shred': clear_cmd = ['shred', '-n3'] if volume_clear_size: clear_cmd.append('-s%dMiB' % volume_clear_size) else: raise exception.InvalidConfigurationValue( option='volume_clear', value=volume_clear) clear_cmd.append(volume_path) start_time = timeutils.utcnow() utils.execute(*clear_cmd, run_as_root=True) duration = timeutils.delta_seconds(start_time, timeutils.utcnow()) # NOTE(jdg): use a default of 1, mostly for unit test, but in # some incredible event this is 0 (cirros image?) don't barf if duration < 1: duration = 1 LOG.info(_('Elapsed time for clear volume: %.2f sec') % duration)
def decommission_dn(nn, inst_to_be_deleted, survived_inst): with remote.get_remote(nn) as r: r.write_file_to('/etc/hadoop/dn.excl', utils.generate_fqdn_host_names(inst_to_be_deleted)) run.refresh_nodes(remote.get_remote(nn), "dfsadmin") context.sleep(3) timeout = config_helper.get_decommissioning_timeout( nn.node_group.cluster) s_time = timeutils.utcnow() all_found = False while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: cmd = r.execute_command( "sudo su -c 'hadoop dfsadmin -report' hadoop") all_found = True datanodes_info = parse_dfs_report(cmd[1]) for i in inst_to_be_deleted: for dn in datanodes_info: if (dn["Name"].startswith(i.internal_ip)) and ( dn["Decommission Status"] != "Decommissioned"): all_found = False break if all_found: r.write_files_to({ '/etc/hadoop/dn.incl': utils.generate_fqdn_host_names(survived_inst), '/etc/hadoop/dn.excl': "", }) break context.sleep(3) if not all_found: ex.DecommissionError( _("Cannot finish decommission of cluster %(cluster)s in " "%(seconds)d seconds") % { "cluster": nn.node_group.cluster, "seconds": timeout })
def _update_meter_stats(stat, meter): """Do the stats calculation on a requested time bucket in stats dict :param stats: dict where aggregated stats are kept :param index: time bucket index in stats :param meter: meter record as returned from HBase :param start_time: query start time :param period: length of the time bucket """ vol = meter['counter_volume'] ts = meter['timestamp'] stat.unit = meter['counter_unit'] stat.min = min(vol, stat.min or vol) stat.max = max(vol, stat.max) stat.sum = vol + (stat.sum or 0) stat.count += 1 stat.avg = (stat.sum / float(stat.count)) stat.duration_start = min(ts, stat.duration_start or ts) stat.duration_end = max(ts, stat.duration_end or ts) stat.duration = (timeutils.delta_seconds(stat.duration_start, stat.duration_end))
def _handle_expired_request(request): """Handle expired request. When request has expired it is removed from the requests cache and the `RequestTimeout` exception is set as a request result. """ if request.transition_and_log_error(pr.FAILURE, logger=LOG): # Raise an exception (and then catch it) so we get a nice # traceback that the request will get instead of it getting # just an exception with no traceback... try: request_age = timeutils.delta_seconds(request.created_on, timeutils.utcnow()) raise exc.RequestTimeout( "Request '%s' has expired after waiting for %0.2f" " seconds for it to transition out of (%s) states" % (request, request_age, ", ".join(pr.WAITING_STATES))) except exc.RequestTimeout: with misc.capture_failure() as fail: LOG.debug(fail.exception_str) request.set_result(fail)
def _handle_expired_request(request): """Handle expired request. When request has expired it is removed from the requests cache and the `RequestTimeout` exception is set as a request result. """ if request.transition_and_log_error(pr.FAILURE, logger=LOG): # Raise an exception (and then catch it) so we get a nice # traceback that the request will get instead of it getting # just an exception with no traceback... try: request_age = timeutils.delta_seconds(request.created_on, timeutils.utcnow()) raise exc.RequestTimeout( "Request '%s' has expired after waiting for %0.2f" " seconds for it to transition out of (%s) states" % (request, request_age, ", ".join(pr.WAITING_STATES))) except exc.RequestTimeout: with misc.capture_failure() as failure: LOG.debug(failure.exception_str) request.set_result(failure)
def _wait_for_stack_status(self, stack_identifier, status, failure_pattern='^.*_FAILED$', success_on_not_found=False): """ Waits for a Stack to reach a given status. Note this compares the full $action_$status, e.g CREATE_COMPLETE, not just COMPLETE which is exposed via the status property of Stack in heatclient """ fail_regexp = re.compile(failure_pattern) build_timeout = self.conf.build_timeout build_interval = self.conf.build_interval start = timeutils.utcnow() while timeutils.delta_seconds(start, timeutils.utcnow()) < build_timeout: try: stack = self.client.stacks.get(stack_identifier) except heat_exceptions.HTTPNotFound: if success_on_not_found: return # ignore this, as the resource may not have # been created yet else: if stack.stack_status == status: return if fail_regexp.search(stack.stack_status): raise exceptions.StackBuildErrorException( stack_identifier=stack_identifier, stack_status=stack.stack_status, stack_status_reason=stack.stack_status_reason) time.sleep(build_interval) message = ('Stack %s failed to reach %s status within ' 'the required time (%s s).' % (stack.stack_name, status, build_timeout)) raise exceptions.TimeoutException(message)
def decommission_dn(nn, inst_to_be_deleted, survived_inst): with remote.get_remote(nn) as r: r.write_file_to('/etc/hadoop/dn.excl', utils.generate_fqdn_host_names( inst_to_be_deleted)) run.refresh_nodes(remote.get_remote(nn), "dfsadmin") context.sleep(3) timeout = c_helper.get_decommissioning_timeout( nn.node_group.cluster) s_time = timeutils.utcnow() all_found = False while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: cmd = r.execute_command( "sudo -u hdfs hadoop dfsadmin -report") all_found = True datanodes_info = parse_dfs_report(cmd[1]) for i in inst_to_be_deleted: for dn in datanodes_info: if (dn["Name"].startswith(i.internal_ip)) and ( dn["Decommission Status"] != "Decommissioned"): all_found = False break if all_found: r.write_files_to({'/etc/hadoop/dn.incl': utils. generate_fqdn_host_names(survived_inst), '/etc/hadoop/dn.excl': "", }) break context.sleep(3) if not all_found: ex.DecommissionError( _("Cannot finish decommission of cluster %(cluster)s in " "%(seconds)d seconds") % {"cluster": nn.node_group.cluster, "seconds": timeout})
def is_up(self, service_ref): """Moved from nova.utils Check whether a service is up based on last heartbeat. """ last_heartbeat = service_ref['updated_at'] or service_ref['created_at'] if isinstance(last_heartbeat, six.string_types): # NOTE(russellb) If this service_ref came in over rpc via # conductor, then the timestamp will be a string and needs to be # converted back to a datetime. last_heartbeat = timeutils.parse_strtime(last_heartbeat) else: # Objects have proper UTC timezones, but the timeutils comparison # below does not (and will fail) last_heartbeat = last_heartbeat.replace(tzinfo=None) # Timestamps in DB are UTC. elapsed = timeutils.delta_seconds(last_heartbeat, timeutils.utcnow()) is_up = abs(elapsed) <= self.service_down_time if not is_up: LOG.debug('Seems service is down. Last heartbeat was %(lhb)s. ' 'Elapsed time is %(el)s', {'lhb': str(last_heartbeat), 'el': str(elapsed)}) return is_up
def is_up(self, service_ref): """Moved from nova.utils Check whether a service is up based on last heartbeat. """ last_heartbeat = service_ref['updated_at'] or service_ref['created_at'] if isinstance(last_heartbeat, six.string_types): # NOTE(russellb) If this service_ref came in over rpc via # conductor, then the timestamp will be a string and needs to be # converted back to a datetime. last_heartbeat = timeutils.parse_strtime(last_heartbeat) else: # Objects have proper UTC timezones, but the timeutils comparison # below does not (and will fail) last_heartbeat = last_heartbeat.replace(tzinfo=None) # Timestamps in DB are UTC. elapsed = timeutils.delta_seconds(last_heartbeat, timeutils.utcnow()) is_up = abs(elapsed) <= self.service_down_time if not is_up: msg = _('Seems service is down. Last heartbeat was %(lhb)s. ' 'Elapsed time is %(el)s') LOG.debug(msg, {'lhb': str(last_heartbeat), 'el': str(elapsed)}) return is_up
def verify_message_stats(self, message): """Verifies the oldest & newest message stats :param message: oldest (or) newest message returned by queue_name/stats. """ expected_keys = ['age', 'created', 'href'] response_keys = message.keys() response_keys = sorted(response_keys) self.assertEqual(response_keys, expected_keys) # Verify that age has valid values age = message['age'] self.assertTrue(0 <= age <= self.limits.max_message_ttl, msg='Invalid Age {0}'.format(age)) # Verify that GET on href returns 200 path = message['href'] result = self.client.get(path) self.assertEqual(result.status_code, 200) # Verify that created time falls within the last 10 minutes # NOTE(malini): The messages are created during the test. created_time = message['created'] created_time = timeutils.normalize_time( timeutils.parse_isotime(created_time)) now = timeutils.utcnow() delta = timeutils.delta_seconds(before=created_time, after=now) # NOTE(malini): The 'int()' below is a work around for the small time # difference between julianday & UTC. # (needed to pass this test on sqlite driver) delta = int(delta) msg = ('Invalid Time Delta {0}, Created time {1}, Now {2}'.format( delta, created_time, now)) self.assertTrue(0 <= delta <= 6000, msg)
def _await_agents(instances): api = cu.get_api_client(instances[0].node_group.cluster) timeout = 300 LOG.debug("Waiting %(timeout)s seconds for agent connected to manager" % {'timeout': timeout}) s_time = timeutils.utcnow() while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout: hostnames = [i.fqdn() for i in instances] hostnames_to_manager = [h.hostname for h in api.get_all_hosts('full')] is_ok = True for hostname in hostnames: if hostname not in hostnames_to_manager: is_ok = False break if not is_ok: context.sleep(5) else: break else: raise ex.HadoopProvisionError( _("Cloudera agents failed to connect to" " Cloudera Manager"))
def _wait_for_resource_status(self, stack_identifier, resource_name, status, failure_pattern='^.*_FAILED$', success_on_not_found=False): """Waits for a Resource to reach a given status.""" fail_regexp = re.compile(failure_pattern) build_timeout = self.conf.build_timeout build_interval = self.conf.build_interval start = timeutils.utcnow() while timeutils.delta_seconds(start, timeutils.utcnow()) < build_timeout: try: res = self.client.resources.get(stack_identifier, resource_name) except heat_exceptions.HTTPNotFound: if success_on_not_found: return # ignore this, as the resource may not have # been created yet else: if res.resource_status == status: return if fail_regexp.search(res.resource_status): raise exceptions.StackResourceBuildErrorException( resource_name=res.resource_name, stack_identifier=stack_identifier, resource_status=res.resource_status, resource_status_reason=res.resource_status_reason) time.sleep(build_interval) message = ('Resource %s failed to reach %s status within ' 'the required time (%s s).' % (res.resource_name, status, build_timeout)) raise exceptions.TimeoutException(message)
def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of models.Statistics instances. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. .. note:: Due to HBase limitations the aggregations are implemented in the driver itself, therefore this method will be quite slow because of all the Thrift traffic it is going to create. """ if groupby: raise ceilometer.NotImplementedError("Group by not implemented.") if aggregate: raise ceilometer.NotImplementedError( 'Selectable aggregates not implemented') with self.conn_pool.connection() as conn: meter_table = conn.table(self.METER_TABLE) q, start, stop, columns = ( hbase_utils.make_sample_query_from_filter(sample_filter)) # These fields are used in statistics' calculating columns.extend( ['f:timestamp', 'f:counter_volume', 'f:counter_unit']) meters = map( hbase_utils.deserialize_entry, list(meter for (ignored, meter) in meter_table.scan( filter=q, row_start=start, row_stop=stop, columns=columns))) if sample_filter.start: start_time = sample_filter.start elif meters: start_time = meters[-1][0]['timestamp'] else: start_time = None if sample_filter.end: end_time = sample_filter.end elif meters: end_time = meters[0][0]['timestamp'] else: end_time = None results = [] if not period: period = 0 period_start = start_time period_end = end_time # As our HBase meters are stored as newest-first, we need to iterate # in the reverse order for meter in meters[::-1]: ts = meter[0]['timestamp'] if period: offset = int( timeutils.delta_seconds(start_time, ts) / period) * period period_start = start_time + datetime.timedelta(0, offset) if not results or not results[-1].period_start == period_start: if period: period_end = period_start + datetime.timedelta(0, period) results.append( models.Statistics(unit='', count=0, min=0, max=0, avg=0, sum=0, period=period, period_start=period_start, period_end=period_end, duration=None, duration_start=None, duration_end=None, groupby=None)) self._update_meter_stats(results[-1], meter[0]) return results
def get_meter_statistics(self, sample_filter, period=None, groupby=None, aggregate=None): """Return an iterable of models.Statistics instance. Items are containing meter statistics described by the query parameters. The filter must have a meter value set. """ if (groupby and set(groupby) - set(['user_id', 'project_id', 'resource_id', 'source'])): raise ceilometer.NotImplementedError( "Unable to group by these fields") if aggregate: raise ceilometer.NotImplementedError( 'Selectable aggregates not implemented') q = pymongo_utils.make_query_from_filter(sample_filter) if period: if sample_filter.start: period_start = sample_filter.start else: period_start = self.db.meter.find(limit=1, sort=[('timestamp', pymongo.ASCENDING) ])[0]['timestamp'] if groupby: sort_keys = ['counter_name'] + groupby + ['timestamp'] else: sort_keys = ['counter_name', 'timestamp'] sort_instructions = self._build_sort_instructions(sort_keys=sort_keys, sort_dir='asc') meters = self.db.meter.find(q, sort=sort_instructions) def _group_key(meter): # the method to define a key for groupby call key = {} for y in sort_keys: if y == 'timestamp' and period: key[y] = ( timeutils.delta_seconds(period_start, meter[y]) // period) elif y != 'timestamp': key[y] = meter[y] return key def _to_offset(periods): return { 'days': (periods * period) // self.SECONDS_IN_A_DAY, 'seconds': (periods * period) % self.SECONDS_IN_A_DAY } for key, grouped_meters in itertools.groupby(meters, key=_group_key): stat = models.Statistics(unit=None, min=sys.maxint, max=-sys.maxint, avg=0, sum=0, count=0, period=0, period_start=0, period_end=0, duration=0, duration_start=0, duration_end=0, groupby=None) for meter in grouped_meters: stat.unit = meter.get('counter_unit', '') m_volume = meter.get('counter_volume') if stat.min > m_volume: stat.min = m_volume if stat.max < m_volume: stat.max = m_volume stat.sum += m_volume stat.count += 1 if stat.duration_start == 0: stat.duration_start = meter['timestamp'] stat.duration_end = meter['timestamp'] if groupby and not stat.groupby: stat.groupby = {} for group_key in groupby: stat.groupby[group_key] = meter[group_key] stat.duration = timeutils.delta_seconds(stat.duration_start, stat.duration_end) stat.avg = stat.sum / stat.count if period: stat.period = period periods = key.get('timestamp') stat.period_start = ( period_start + datetime.timedelta(**(_to_offset(periods)))) stat.period_end = ( period_start + datetime.timedelta(**(_to_offset(periods + 1)))) else: stat.period_start = stat.duration_start stat.period_end = stat.duration_end yield stat
def test_delta_seconds(self): before = timeutils.utcnow() after = before + datetime.timedelta( days=7, seconds=59, microseconds=123456) self.assertAlmostEquals(604859.123456, timeutils.delta_seconds(before, after))
def get_agent_uptime(self, agent): return timeutils.delta_seconds(agent.started_at, agent.heartbeat_timestamp)
def test_lifecycle(self): doc = '{"ttl": 100, "grace": 60}' # First, claim some messages body = self.simulate_post(self.claims_path, self.project_id, body=doc) self.assertEqual(self.srmock.status, falcon.HTTP_201) claimed = jsonutils.loads(body[0]) claim_href = self.srmock.headers_dict['Location'] message_href, params = claimed[0]['href'].split('?') # No more messages to claim self.simulate_post(self.claims_path, self.project_id, body=doc, query_string='limit=3') self.assertEqual(self.srmock.status, falcon.HTTP_204) headers = { 'Client-ID': str(uuid.uuid4()), } # Listing messages, by default, won't include claimed body = self.simulate_get(self.messages_path, self.project_id, headers=headers) self.assertEqual(self.srmock.status, falcon.HTTP_204) # Include claimed messages this time body = self.simulate_get(self.messages_path, self.project_id, query_string='include_claimed=true', headers=headers) listed = jsonutils.loads(body[0]) self.assertEqual(self.srmock.status, falcon.HTTP_200) self.assertEqual(len(listed['messages']), len(claimed)) now = timeutils.utcnow() + datetime.timedelta(seconds=10) timeutils_utcnow = 'zaqar.openstack.common.timeutils.utcnow' with mock.patch(timeutils_utcnow) as mock_utcnow: mock_utcnow.return_value = now body = self.simulate_get(claim_href, self.project_id) claim = jsonutils.loads(body[0]) self.assertEqual(self.srmock.status, falcon.HTTP_200) self.assertEqual(self.srmock.headers_dict['Content-Location'], claim_href) self.assertEqual(claim['ttl'], 100) # NOTE(cpp-cabrera): verify that claim age is non-negative self.assertThat(claim['age'], matchers.GreaterThan(-1)) # Try to delete the message without submitting a claim_id self.simulate_delete(message_href, self.project_id) self.assertEqual(self.srmock.status, falcon.HTTP_403) # Delete the message and its associated claim self.simulate_delete(message_href, self.project_id, query_string=params) self.assertEqual(self.srmock.status, falcon.HTTP_204) # Try to get it from the wrong project self.simulate_get(message_href, 'bogus_project', query_string=params) self.assertEqual(self.srmock.status, falcon.HTTP_404) # Get the message self.simulate_get(message_href, self.project_id, query_string=params) self.assertEqual(self.srmock.status, falcon.HTTP_404) # Update the claim new_claim_ttl = '{"ttl": 60}' creation = timeutils.utcnow() self.simulate_patch(claim_href, self.project_id, body=new_claim_ttl) self.assertEqual(self.srmock.status, falcon.HTTP_204) # Get the claimed messages (again) body = self.simulate_get(claim_href, self.project_id) query = timeutils.utcnow() claim = jsonutils.loads(body[0]) message_href, params = claim['messages'][0]['href'].split('?') self.assertEqual(claim['ttl'], 60) estimated_age = timeutils.delta_seconds(creation, query) self.assertTrue(estimated_age > claim['age']) # Delete the claim self.simulate_delete(claim['href'], 'bad_id') self.assertEqual(self.srmock.status, falcon.HTTP_204) self.simulate_delete(claim['href'], self.project_id) self.assertEqual(self.srmock.status, falcon.HTTP_204) # Try to delete a message with an invalid claim ID self.simulate_delete(message_href, self.project_id, query_string=params) self.assertEqual(self.srmock.status, falcon.HTTP_400) # Make sure it wasn't deleted! self.simulate_get(message_href, self.project_id, query_string=params) self.assertEqual(self.srmock.status, falcon.HTTP_200) # Try to get a claim that doesn't exist self.simulate_get(claim['href']) self.assertEqual(self.srmock.status, falcon.HTTP_404) # Try to update a claim that doesn't exist self.simulate_patch(claim['href'], body=doc) self.assertEqual(self.srmock.status, falcon.HTTP_404)