Example #1
0
 def elapsed(self):
     if self._state == self._STOPPED:
         return max(0.0, float(timeutils.delta_seconds(self._started_at,
                                                       self._stopped_at)))
     elif self._state == self._STARTED:
         return max(0.0, float(timeutils.delta_seconds(self._started_at,
                                                       timeutils.utcnow())))
     else:
         raise RuntimeError("Can not get the elapsed time of a stopwatch"
                            " if it has not been started/stopped")
 def get_interval(event_filter, period, groupby, aggregate):
     self.assertIsNotNone(event_filter.start)
     self.assertIsNotNone(event_filter.end)
     if event_filter.start > end or event_filter.end < start:
         return []
     duration_start = max(event_filter.start, start)
     duration_end = min(event_filter.end, end)
     duration = timeutils.delta_seconds(duration_start, duration_end)
     return [
         models.Statistics(
             unit='',
             min=0,
             max=0,
             avg=0,
             sum=0,
             count=0,
             period=None,
             period_start=None,
             period_end=None,
             duration=duration,
             duration_start=duration_start,
             duration_end=duration_end,
             groupby=None,
         )
     ]
Example #3
0
def _detach_volume(instance, volume_id):
    volume = cinder.get_volume(volume_id)
    try:
        LOG.debug("Detaching volume %s from instance %s" % (
            volume_id, instance.instance_name))
        nova.client().volumes.delete_server_volume(instance.instance_id,
                                                   volume_id)
    except Exception:
        LOG.exception(_LE("Can't detach volume %s"), volume.id)

    detach_timeout = CONF.detach_volume_timeout
    LOG.debug("Waiting %d seconds to detach %s volume" % (detach_timeout,
                                                          volume_id))
    s_time = tu.utcnow()
    while tu.delta_seconds(s_time, tu.utcnow()) < detach_timeout:
        volume = cinder.get_volume(volume_id)
        if volume.status not in ['available', 'error']:
            context.sleep(2)
        else:
            LOG.debug("Volume %s has been detached" % volume_id)
            return
    else:
        LOG.warn(_LW("Can't detach volume %(volume)s. "
                     "Current status of volume: %(status)s"),
                 {'volume': volume_id, 'status': volume.status})
Example #4
0
def convert_image(source, dest, out_format, run_as_root=True):
    """Convert image to other format."""

    cmd = ('qemu-img', 'convert',
                    '-O', out_format, source, dest)

    start_time = timeutils.utcnow()
    utils.execute(*cmd, run_as_root=run_as_root)
    duration = timeutils.delta_seconds(start_time, timeutils.utcnow())

    # NOTE(jdg): use a default of 1, mostly for unit test, but in
    # some incredible event this is 0 (cirros image?) don't barf
    if duration < 1:
        duration = 1
    fsz_mb = os.stat(source).st_size / units.Mi
    mbps = (fsz_mb / duration)
    msg = ("Image conversion details: src %(src)s, size %(sz).2f MB, "
           "duration %(duration).2f sec, destination %(dest)s")
    LOG.debug(msg % {"src": source,
                     "sz": fsz_mb,
                     "duration": duration,
                     "dest": dest})

    msg = _("Converted %(sz).2f MB image at %(mbps).2f MB/s")
    LOG.info(msg % {"sz": fsz_mb, "mbps": mbps})
Example #5
0
        def _inner():
            if initial_delay:
                greenthread.sleep(initial_delay)

            try:
                while self._running:
                    start = timeutils.utcnow()
                    self.f(*self.args, **self.kw)
                    end = timeutils.utcnow()
                    if not self._running:
                        break
                    delay = interval - timeutils.delta_seconds(start, end)
                    if delay <= 0:
                        LOG.warn(_LW('task run outlasted interval by %s sec'),
                                 -delay)
                    greenthread.sleep(delay if delay > 0 else 0)
            except LoopingCallDone as e:
                self.stop()
                done.send(e.retvalue)
            except Exception:
                LOG.exception(_LE('in fixed duration looping call'))
                done.send_exception(*sys.exc_info())
                return
            else:
                done.send(True)
Example #6
0
def _start_cloudera_manager(cluster):
    manager = pu.get_manager(cluster)
    with manager.remote() as r:
        cmd.start_cloudera_db(r)
        cmd.start_manager(r)

    timeout = 300
    LOG.debug("Waiting %(timeout)s seconds for Manager to start : " % {
        'timeout': timeout})
    s_time = timeutils.utcnow()
    while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
        try:
            conn = telnetlib.Telnet(manager.management_ip, CM_API_PORT)
            conn.close()
            break
        except IOError:
            context.sleep(2)
    else:
        message = _("Cloudera Manager failed to start in %(timeout)s minutes "
                    "on node '%(node)s' of cluster '%(cluster)s'") % {
                        'timeout': timeout / 60,
                        'node': manager.management_ip,
                        'cluster': cluster.name}
        raise ex.HadoopProvisionError(message)

    LOG.info(_LI("Cloudera Manager has been started"))
Example #7
0
    def handle_sample(self, context, s):
        """Handle a sample, converting if necessary."""
        LOG.debug(_('handling sample %s'), (s,))
        key = s.name + s.resource_id
        prev = self.cache.get(key)
        timestamp = timeutils.parse_isotime(s.timestamp)
        self.cache[key] = (s.volume, timestamp)

        if prev:
            prev_volume = prev[0]
            prev_timestamp = prev[1]
            time_delta = timeutils.delta_seconds(prev_timestamp, timestamp)
            # we only allow negative deltas for noncumulative samples, whereas
            # for cumulative we assume that a reset has occurred in the interim
            # so that the current volume gives a lower bound on growth
            volume_delta = (s.volume - prev_volume
                            if (prev_volume <= s.volume or
                                s.type != sample.TYPE_CUMULATIVE)
                            else s.volume)
            rate_of_change = ((1.0 * volume_delta / time_delta)
                              if time_delta else 0.0)

            s = self._convert(s, rate_of_change)
            LOG.debug(_('converted to: %s'), (s,))
        else:
            LOG.warn(_('dropping sample with no predecessor: %s'),
                     (s,))
            s = None
        return s
    def _check_transient(self):
        pig_job_data = self.edp_info.read_pig_example_script()
        pig_lib_data = self.edp_info.read_pig_example_jar()
        self.edp_testing(job_type=utils_edp.JOB_TYPE_PIG,
                         job_data_list=[{'pig': pig_job_data}],
                         lib_data_list=[{'jar': pig_lib_data}])

        # set timeout in seconds
        timeout = self.common_config.TRANSIENT_CLUSTER_TIMEOUT * 60
        s_time = timeutils.utcnow()
        raise_failure = True
        # wait for cluster deleting
        while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
            try:
                self.sahara.clusters.get(self.cluster_id)
            except sab.APIException as api_ex:
                if 'not found' in api_ex.message:
                    raise_failure = False
                    break
            time.sleep(2)

        if raise_failure:
            self.fail('Transient cluster has not been deleted within %s '
                      'minutes.'
                      % self.common_config.TRANSIENT_CLUSTER_TIMEOUT)
Example #9
0
    def _wait_on_task_execution(self):
        """Wait until all the tasks have finished execution and are in
        state of success or failure.
        """

        start = timeutils.utcnow()

        # wait for maximum of 5 seconds
        while timeutils.delta_seconds(start, timeutils.utcnow()) < 5:
            wait = False
            # Verify that no task is in status of pending or processing
            path = "/v2/tasks"
            res, content = self.http.request(path, 'GET',
                                             headers=minimal_task_headers())
            content_dict = json.loads(content)

            self.assertEqual(res.status, 200)
            res_tasks = content_dict['tasks']
            if len(res_tasks) != 0:
                for task in res_tasks:
                    if task['status'] in ('pending', 'processing'):
                        wait = True
                        break

            if wait:
                time.sleep(0.05)
                continue
            else:
                break
Example #10
0
def oid_ts(oid):
    """Converts an ObjectId to a UNIX timestamp.

    :raises: TypeError if oid isn't an ObjectId
    """
    try:
        return timeutils.delta_seconds(EPOCH, oid.generation_time)
    except AttributeError:
        raise TypeError(u'Expected ObjectId and got %s' % type(oid))
Example #11
0
    def get_meter_statistics(self, sample_filter, period=None, groupby=None,
                             aggregate=None):
        """Return an iterable of api_models.Statistics instances.

        Items are containing meter statistics described by the query
        parameters. The filter must have a meter value set.
        """
        if groupby:
            for group in groupby:
                if group not in ['user_id', 'project_id', 'resource_id']:
                    raise ceilometer.NotImplementedError('Unable to group by '
                                                         'these fields')

        if not period:
            for res in self._make_stats_query(sample_filter,
                                              groupby,
                                              aggregate):
                if res.count:
                    yield self._stats_result_to_model(res, 0,
                                                      res.tsmin, res.tsmax,
                                                      groupby,
                                                      aggregate)
            return

        if not sample_filter.start or not sample_filter.end:
            res = self._make_stats_query(sample_filter,
                                         None,
                                         aggregate).first()
            if not res:
                # NOTE(liusheng):The 'res' may be NoneType, because no
                # sample has found with sample filter(s).
                return

        query = self._make_stats_query(sample_filter, groupby, aggregate)
        # HACK(jd) This is an awful method to compute stats by period, but
        # since we're trying to be SQL agnostic we have to write portable
        # code, so here it is, admire! We're going to do one request to get
        # stats by period. We would like to use GROUP BY, but there's no
        # portable way to manipulate timestamp in SQL, so we can't.
        for period_start, period_end in base.iter_period(
                sample_filter.start or res.tsmin,
                sample_filter.end or res.tsmax,
                period):
            q = query.filter(models.Sample.timestamp >= period_start)
            q = q.filter(models.Sample.timestamp < period_end)
            for r in q.all():
                if r.count:
                    yield self._stats_result_to_model(
                        result=r,
                        period=int(timeutils.delta_seconds(period_start,
                                                           period_end)),
                        period_start=period_start,
                        period_end=period_end,
                        groupby=groupby,
                        aggregate=aggregate
                    )
Example #12
0
 def _group_key(meter):
     # the method to define a key for groupby call
     key = {}
     for y in sort_keys:
         if y == 'timestamp' and period:
             key[y] = (timeutils.delta_seconds(period_start,
                                               meter[y]) // period)
         elif y != 'timestamp':
             key[y] = meter[y]
     return key
Example #13
0
    def assertTimestampEqual(self, first, second, msg=None):
        """Checks that two timestamps are equals.

        This relies on assertAlmostEqual to avoid rounding problem, and only
        checks up the first microsecond values.

        """
        return self.assertAlmostEqual(
            timeutils.delta_seconds(first, second),
            0.0,
            places=5)
Example #14
0
 def _get_cached_vswitch_existence(self, host):
     """Returns cached existence. Old and non-cached raise ValueError."""
     entry = self.ivs_host_cache.get(host)
     if not entry:
         raise ValueError(_('No cache entry for host %s') % host)
     diff = timeutils.delta_seconds(entry['timestamp'],
                                    datetime.datetime.now())
     if diff > CACHE_VSWITCH_TIME:
         self.ivs_host_cache.pop(host)
         raise ValueError(_('Expired cache entry for host %s') % host)
     return entry['exists']
Example #15
0
    def _record_poll_time(self):
        """Method records current time as the poll time.

        :return: time in seconds since the last poll time was recorded
        """
        current_time = timeutils.utcnow()
        duration = None
        if hasattr(self, '_last_poll_time'):
            duration = timeutils.delta_seconds(self._last_poll_time,
                                               current_time)
        self._last_poll_time = current_time
        return duration
Example #16
0
 def inner(self, *args, **kwargs):
     LOG.debug("Entering %(cls)s.%(method)s",
               {'cls': self.__class__.__name__,
                'method': func.__name__})
     start = timeutils.utcnow()
     ret = func(self, *args, **kwargs)
     end = timeutils.utcnow()
     LOG.debug("Exiting %(cls)s.%(method)s. "
               "Spent %(duration)s sec. "
               "Return %(return)s",
               {'cls': self.__class__.__name__,
                'duration': timeutils.delta_seconds(start, end),
                'method': func.__name__,
                'return': ret})
     return ret
 def _is_master(self, interval):
     """Determine if the current partition is the master."""
     now = timeutils.utcnow()
     if timeutils.delta_seconds(self.start, now) < interval * 2:
         LOG.debug(_('%s still warming up') % self.this)
         return False
     is_master = True
     for partition, last_heard in self.reports.items():
         delta = timeutils.delta_seconds(last_heard, now)
         LOG.debug(_('last heard from %(report)s %(delta)s seconds ago') %
                   dict(report=partition, delta=delta))
         if delta > interval * 2:
             del self.reports[partition]
             self._record_oldest(partition, stale=True)
             LOG.debug(_('%(this)s detects stale evaluator: %(stale)s') %
                       dict(this=self.this, stale=partition))
             self.presence_changed = True
         elif partition < self.this:
             is_master = False
             LOG.info(_('%(this)s sees older potential master: %(older)s')
                      % dict(this=self.this, older=partition))
     LOG.info(_('%(this)s is master?: %(is_master)s') %
              dict(this=self.this, is_master=is_master))
     return is_master
Example #18
0
 def _stats_result_to_model(result, period, period_start,
                            period_end, groupby, aggregate):
     stats_args = Connection._stats_result_aggregates(result, aggregate)
     stats_args['unit'] = result.unit
     duration = (timeutils.delta_seconds(result.tsmin, result.tsmax)
                 if result.tsmin is not None and result.tsmax is not None
                 else None)
     stats_args['duration'] = duration
     stats_args['duration_start'] = result.tsmin
     stats_args['duration_end'] = result.tsmax
     stats_args['period'] = period
     stats_args['period_start'] = period_start
     stats_args['period_end'] = period_end
     stats_args['groupby'] = (dict(
         (g, getattr(result, g)) for g in groupby) if groupby else None)
     return api_models.Statistics(**stats_args)
Example #19
0
def copy_volume(srcstr, deststr, size_in_m, blocksize, sync=False,
                execute=utils.execute, ionice=None):
    # Use O_DIRECT to avoid thrashing the system buffer cache
    extra_flags = []
    if check_for_odirect_support(srcstr, deststr, 'iflag=direct'):
        extra_flags.append('iflag=direct')

    if check_for_odirect_support(srcstr, deststr, 'oflag=direct'):
        extra_flags.append('oflag=direct')

    # If the volume is being unprovisioned then
    # request the data is persisted before returning,
    # so that it's not discarded from the cache.
    if sync and not extra_flags:
        extra_flags.append('conv=fdatasync')

    blocksize, count = _calculate_count(size_in_m, blocksize)

    cmd = ['dd', 'if=%s' % srcstr, 'of=%s' % deststr,
           'count=%d' % count, 'bs=%s' % blocksize]
    cmd.extend(extra_flags)

    if ionice is not None:
        cmd = ['ionice', ionice] + cmd

    cgcmd = setup_blkio_cgroup(srcstr, deststr, CONF.volume_copy_bps_limit)
    if cgcmd:
        cmd = cgcmd + cmd

    # Perform the copy
    start_time = timeutils.utcnow()
    execute(*cmd, run_as_root=True)
    duration = timeutils.delta_seconds(start_time, timeutils.utcnow())

    # NOTE(jdg): use a default of 1, mostly for unit test, but in
    # some incredible event this is 0 (cirros image?) don't barf
    if duration < 1:
        duration = 1
    mbps = (size_in_m / duration)
    mesg = ("Volume copy details: src %(src)s, dest %(dest)s, "
            "size %(sz).2f MB, duration %(duration).2f sec")
    LOG.debug(mesg % {"src": srcstr,
                      "dest": deststr,
                      "sz": size_in_m,
                      "duration": duration})
    mesg = _("Volume copy %(size_in_m).2f MB at %(mbps).2f MB/s")
    LOG.info(mesg % {'size_in_m': size_in_m, 'mbps': mbps})
Example #20
0
def cancel_job(job_execution_id):
    ctx = context.ctx()
    job_execution = conductor.job_execution_get(ctx, job_execution_id)
    if job_execution.info['status'] in edp.JOB_STATUSES_TERMINATED:
        return job_execution
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster is None:
        return job_execution
    engine = _get_job_engine(cluster, job_execution)
    if engine is not None:
        job_execution = conductor.job_execution_update(
            ctx, job_execution_id,
            {'info': {'status': edp.JOB_STATUS_TOBEKILLED}})

        timeout = CONF.job_canceling_timeout
        s_time = timeutils.utcnow()
        while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
            if job_execution.info['status'] not in edp.JOB_STATUSES_TERMINATED:
                try:
                    job_info = engine.cancel_job(job_execution)
                except Exception as ex:
                    job_info = None
                    LOG.exception(
                        _LE("Error during cancel of job execution %(job)s: "
                            "%(error)s"), {'job': job_execution.id,
                                           'error': ex})
                if job_info is not None:
                    job_execution = _write_job_status(job_execution, job_info)
                    LOG.info(_LI("Job execution %s was canceled successfully"),
                             job_execution.id)
                    return job_execution
                context.sleep(3)
                job_execution = conductor.job_execution_get(
                    ctx, job_execution_id)
                if not job_execution:
                    LOG.info(_LI("Job execution %(job_exec_id)s was deleted. "
                                 "Canceling current operation."),
                             {'job_exec_id': job_execution_id})
                    return job_execution
            else:
                LOG.info(_LI("Job execution status %(job)s: %(status)s"),
                         {'job': job_execution.id,
                          'status': job_execution.info['status']})
                return job_execution
        else:
            raise e.CancelingFailed(_('Job execution %s was not canceled')
                                    % job_execution.id)
Example #21
0
        def terminate_unneeded_clusters(self, ctx):
            LOG.debug('Terminating unneeded transient clusters')
            ctx = context.get_admin_context()
            context.set_ctx(ctx)
            for cluster in conductor.cluster_get_all(ctx, status='Active'):
                if not cluster.is_transient:
                    continue

                jc = conductor.job_execution_count(ctx,
                                                   end_time=None,
                                                   cluster_id=cluster.id)

                if jc > 0:
                    continue

                cluster_updated_at = timeutils.normalize_time(
                    timeutils.parse_isotime(cluster.updated_at))
                current_time = timeutils.utcnow()
                spacing = timeutils.delta_seconds(cluster_updated_at,
                                                  current_time)
                if spacing < CONF.min_transient_cluster_active_time:
                    continue

                if CONF.use_identity_api_v3:
                    trusts.use_os_admin_auth_token(cluster)

                    LOG.info(_LI('Terminating transient cluster %(cluster)s '
                                 'with id %(id)s'),
                             {'cluster': cluster.name, 'id': cluster.id})

                    try:
                        ops.terminate_cluster(cluster.id)
                    except Exception as e:
                        LOG.info(_LI('Failed to terminate transient cluster '
                                 '%(cluster)s with id %(id)s: %(error)s.'),
                                 {'cluster': cluster.name,
                                  'id': cluster.id,
                                  'error': six.text_type(e)})

                else:
                    if cluster.status != 'AwaitingTermination':
                        conductor.cluster_update(
                            ctx,
                            cluster,
                            {'status': 'AwaitingTermination'})
            context.set_ctx(None)
Example #22
0
def convert_image(source, dest, out_format, bps_limit=None, run_as_root=True):
    """Convert image to other format."""

    cmd = ('qemu-img', 'convert',
           '-O', out_format, source, dest)

    # Check whether O_DIRECT is supported and set '-t none' if it is
    # This is needed to ensure that all data hit the device before
    # it gets unmapped remotely from the host for some backends
    # Reference Bug: #1363016

    # NOTE(jdg): In the case of file devices qemu does the
    # flush properly and more efficiently than would be done
    # setting O_DIRECT, so check for that and skip the
    # setting for non BLK devs
    if (utils.is_blk_device(dest) and
            volume_utils.check_for_odirect_support(source,
                                                   dest,
                                                   'oflag=direct')):
        cmd = ('qemu-img', 'convert',
               '-t', 'none',
               '-O', out_format, source, dest)

    start_time = timeutils.utcnow()
    cgcmd = volume_utils.setup_blkio_cgroup(source, dest, bps_limit)
    if cgcmd:
        cmd = tuple(cgcmd) + cmd
    utils.execute(*cmd, run_as_root=run_as_root)

    duration = timeutils.delta_seconds(start_time, timeutils.utcnow())

    # NOTE(jdg): use a default of 1, mostly for unit test, but in
    # some incredible event this is 0 (cirros image?) don't barf
    if duration < 1:
        duration = 1
    fsz_mb = os.stat(source).st_size / units.Mi
    mbps = (fsz_mb / duration)
    msg = ("Image conversion details: src %(src)s, size %(sz).2f MB, "
           "duration %(duration).2f sec, destination %(dest)s")
    LOG.debug(msg % {"src": source,
                     "sz": fsz_mb,
                     "duration": duration,
                     "dest": dest})

    msg = _("Converted %(sz).2f MB image at %(mbps).2f MB/s")
    LOG.info(msg % {"sz": fsz_mb, "mbps": mbps})
    def _check_notification_service(self):
        self.srv.start()

        notifier = messaging.get_notifier(self.transport,
                                          "compute.vagrant-precise")
        notifier.info(context.RequestContext(), 'compute.instance.create.end',
                      TEST_NOTICE_PAYLOAD)
        start = timeutils.utcnow()
        while timeutils.delta_seconds(start, timeutils.utcnow()) < 600:
            if len(self.publisher.samples) >= self.expected_samples:
                break
            eventlet.sleep(0)

        self.srv.stop()

        resources = list(set(s.resource_id for s in self.publisher.samples))
        self.assertEqual(self.expected_samples, len(self.publisher.samples))
        self.assertEqual(["9f9d01b9-4a58-4271-9e27-398b21ab20d1"], resources)
Example #24
0
def _check_decommission(cluster, instances, check_func, timeout):
    s_time = timeutils.utcnow()
    while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
        statuses = check_func(cluster)
        dec_ok = True
        for instance in instances:
            if statuses[instance.fqdn()] != 'decommissioned':
                dec_ok = False

        if dec_ok:
            return
        else:
            context.sleep(5)
    else:
        ex.DecommissionError(
            _("Cannot finish decommission of cluster %(cluster)s in "
              "%(seconds)d seconds") %
            {"cluster": cluster, "seconds": timeout})
Example #25
0
def iter_period(start, end, period):
    """Split a time from start to end in periods of a number of seconds.

    This function yields the (start, end) time for each period composing the
    time passed as argument.

    :param start: When the period set start.
    :param end: When the period end starts.
    :param period: The duration of the period.
    """
    period_start = start
    increment = datetime.timedelta(seconds=period)
    for i in moves.xrange(int(math.ceil(
            timeutils.delta_seconds(start, end)
            / float(period)))):
        next_start = period_start + increment
        yield (period_start, next_start)
        period_start = next_start
Example #26
0
def clear_volume(volume_size, volume_path, volume_clear=None,
                 volume_clear_size=None, volume_clear_ionice=None):
    """Unprovision old volumes to prevent data leaking between users."""
    if volume_clear is None:
        volume_clear = CONF.volume_clear

    if volume_clear_size is None:
        volume_clear_size = CONF.volume_clear_size

    if volume_clear_size == 0:
        volume_clear_size = volume_size

    if volume_clear_ionice is None:
        volume_clear_ionice = CONF.volume_clear_ionice

    LOG.info(_("Performing secure delete on volume: %s") % volume_path)

    if volume_clear == 'zero':
        return copy_volume('/dev/zero', volume_path, volume_clear_size,
                           CONF.volume_dd_blocksize,
                           sync=True, execute=utils.execute,
                           ionice=volume_clear_ionice)
    elif volume_clear == 'shred':
        clear_cmd = ['shred', '-n3']
        if volume_clear_size:
            clear_cmd.append('-s%dMiB' % volume_clear_size)
    else:
        raise exception.InvalidConfigurationValue(
            option='volume_clear',
            value=volume_clear)

    clear_cmd.append(volume_path)
    start_time = timeutils.utcnow()
    utils.execute(*clear_cmd, run_as_root=True)
    duration = timeutils.delta_seconds(start_time, timeutils.utcnow())

    # NOTE(jdg): use a default of 1, mostly for unit test, but in
    # some incredible event this is 0 (cirros image?) don't barf
    if duration < 1:
        duration = 1
    LOG.info(_('Elapsed time for clear volume: %.2f sec') % duration)
Example #27
0
def decommission_dn(nn, inst_to_be_deleted, survived_inst):
    with remote.get_remote(nn) as r:
        r.write_file_to('/etc/hadoop/dn.excl',
                        utils.generate_fqdn_host_names(inst_to_be_deleted))
        run.refresh_nodes(remote.get_remote(nn), "dfsadmin")
        context.sleep(3)

        timeout = config_helper.get_decommissioning_timeout(
            nn.node_group.cluster)
        s_time = timeutils.utcnow()
        all_found = False

        while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
            cmd = r.execute_command(
                "sudo su -c 'hadoop dfsadmin -report' hadoop")
            all_found = True
            datanodes_info = parse_dfs_report(cmd[1])
            for i in inst_to_be_deleted:
                for dn in datanodes_info:
                    if (dn["Name"].startswith(i.internal_ip)) and (
                            dn["Decommission Status"] != "Decommissioned"):
                        all_found = False
                        break

            if all_found:
                r.write_files_to({
                    '/etc/hadoop/dn.incl':
                    utils.generate_fqdn_host_names(survived_inst),
                    '/etc/hadoop/dn.excl':
                    "",
                })
                break
            context.sleep(3)

        if not all_found:
            ex.DecommissionError(
                _("Cannot finish decommission of cluster %(cluster)s in "
                  "%(seconds)d seconds") % {
                      "cluster": nn.node_group.cluster,
                      "seconds": timeout
                  })
Example #28
0
    def _update_meter_stats(stat, meter):
        """Do the stats calculation on a requested time bucket in stats dict

        :param stats: dict where aggregated stats are kept
        :param index: time bucket index in stats
        :param meter: meter record as returned from HBase
        :param start_time: query start time
        :param period: length of the time bucket
        """
        vol = meter['counter_volume']
        ts = meter['timestamp']
        stat.unit = meter['counter_unit']
        stat.min = min(vol, stat.min or vol)
        stat.max = max(vol, stat.max)
        stat.sum = vol + (stat.sum or 0)
        stat.count += 1
        stat.avg = (stat.sum / float(stat.count))
        stat.duration_start = min(ts, stat.duration_start or ts)
        stat.duration_end = max(ts, stat.duration_end or ts)
        stat.duration = (timeutils.delta_seconds(stat.duration_start,
                                                 stat.duration_end))
Example #29
0
    def _handle_expired_request(request):
        """Handle expired request.

        When request has expired it is removed from the requests cache and
        the `RequestTimeout` exception is set as a request result.
        """
        if request.transition_and_log_error(pr.FAILURE, logger=LOG):
            # Raise an exception (and then catch it) so we get a nice
            # traceback that the request will get instead of it getting
            # just an exception with no traceback...
            try:
                request_age = timeutils.delta_seconds(request.created_on,
                                                      timeutils.utcnow())
                raise exc.RequestTimeout(
                    "Request '%s' has expired after waiting for %0.2f"
                    " seconds for it to transition out of (%s) states"
                    % (request, request_age, ", ".join(pr.WAITING_STATES)))
            except exc.RequestTimeout:
                with misc.capture_failure() as fail:
                    LOG.debug(fail.exception_str)
                    request.set_result(fail)
Example #30
0
    def _handle_expired_request(request):
        """Handle expired request.

        When request has expired it is removed from the requests cache and
        the `RequestTimeout` exception is set as a request result.
        """
        if request.transition_and_log_error(pr.FAILURE, logger=LOG):
            # Raise an exception (and then catch it) so we get a nice
            # traceback that the request will get instead of it getting
            # just an exception with no traceback...
            try:
                request_age = timeutils.delta_seconds(request.created_on,
                                                      timeutils.utcnow())
                raise exc.RequestTimeout(
                    "Request '%s' has expired after waiting for %0.2f"
                    " seconds for it to transition out of (%s) states"
                    % (request, request_age, ", ".join(pr.WAITING_STATES)))
            except exc.RequestTimeout:
                with misc.capture_failure() as failure:
                    LOG.debug(failure.exception_str)
                    request.set_result(failure)
Example #31
0
    def _update_meter_stats(stat, meter):
        """Do the stats calculation on a requested time bucket in stats dict

        :param stats: dict where aggregated stats are kept
        :param index: time bucket index in stats
        :param meter: meter record as returned from HBase
        :param start_time: query start time
        :param period: length of the time bucket
        """
        vol = meter['counter_volume']
        ts = meter['timestamp']
        stat.unit = meter['counter_unit']
        stat.min = min(vol, stat.min or vol)
        stat.max = max(vol, stat.max)
        stat.sum = vol + (stat.sum or 0)
        stat.count += 1
        stat.avg = (stat.sum / float(stat.count))
        stat.duration_start = min(ts, stat.duration_start or ts)
        stat.duration_end = max(ts, stat.duration_end or ts)
        stat.duration = (timeutils.delta_seconds(stat.duration_start,
                                                 stat.duration_end))
Example #32
0
    def _wait_for_stack_status(self,
                               stack_identifier,
                               status,
                               failure_pattern='^.*_FAILED$',
                               success_on_not_found=False):
        """
        Waits for a Stack to reach a given status.

        Note this compares the full $action_$status, e.g
        CREATE_COMPLETE, not just COMPLETE which is exposed
        via the status property of Stack in heatclient
        """
        fail_regexp = re.compile(failure_pattern)
        build_timeout = self.conf.build_timeout
        build_interval = self.conf.build_interval

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start,
                                      timeutils.utcnow()) < build_timeout:
            try:
                stack = self.client.stacks.get(stack_identifier)
            except heat_exceptions.HTTPNotFound:
                if success_on_not_found:
                    return
                # ignore this, as the resource may not have
                # been created yet
            else:
                if stack.stack_status == status:
                    return
                if fail_regexp.search(stack.stack_status):
                    raise exceptions.StackBuildErrorException(
                        stack_identifier=stack_identifier,
                        stack_status=stack.stack_status,
                        stack_status_reason=stack.stack_status_reason)
            time.sleep(build_interval)

        message = ('Stack %s failed to reach %s status within '
                   'the required time (%s s).' %
                   (stack.stack_name, status, build_timeout))
        raise exceptions.TimeoutException(message)
Example #33
0
def decommission_dn(nn, inst_to_be_deleted, survived_inst):
    with remote.get_remote(nn) as r:
        r.write_file_to('/etc/hadoop/dn.excl',
                        utils.generate_fqdn_host_names(
                            inst_to_be_deleted))
        run.refresh_nodes(remote.get_remote(nn), "dfsadmin")
        context.sleep(3)

        timeout = c_helper.get_decommissioning_timeout(
            nn.node_group.cluster)
        s_time = timeutils.utcnow()
        all_found = False

        while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
            cmd = r.execute_command(
                "sudo -u hdfs hadoop dfsadmin -report")
            all_found = True
            datanodes_info = parse_dfs_report(cmd[1])
            for i in inst_to_be_deleted:
                for dn in datanodes_info:
                    if (dn["Name"].startswith(i.internal_ip)) and (
                            dn["Decommission Status"] != "Decommissioned"):
                        all_found = False
                        break

            if all_found:
                r.write_files_to({'/etc/hadoop/dn.incl':
                                 utils.
                                 generate_fqdn_host_names(survived_inst),
                                  '/etc/hadoop/dn.excl': "",
                                  })
                break
            context.sleep(3)

        if not all_found:
            ex.DecommissionError(
                _("Cannot finish decommission of cluster %(cluster)s in "
                  "%(seconds)d seconds") %
                {"cluster": nn.node_group.cluster,
                 "seconds": timeout})
Example #34
0
 def is_up(self, service_ref):
     """Moved from nova.utils
     Check whether a service is up based on last heartbeat.
     """
     last_heartbeat = service_ref['updated_at'] or service_ref['created_at']
     if isinstance(last_heartbeat, six.string_types):
         # NOTE(russellb) If this service_ref came in over rpc via
         # conductor, then the timestamp will be a string and needs to be
         # converted back to a datetime.
         last_heartbeat = timeutils.parse_strtime(last_heartbeat)
     else:
         # Objects have proper UTC timezones, but the timeutils comparison
         # below does not (and will fail)
         last_heartbeat = last_heartbeat.replace(tzinfo=None)
     # Timestamps in DB are UTC.
     elapsed = timeutils.delta_seconds(last_heartbeat, timeutils.utcnow())
     is_up = abs(elapsed) <= self.service_down_time
     if not is_up:
         LOG.debug('Seems service is down. Last heartbeat was %(lhb)s. '
                   'Elapsed time is %(el)s',
                   {'lhb': str(last_heartbeat), 'el': str(elapsed)})
     return is_up
Example #35
0
 def is_up(self, service_ref):
     """Moved from nova.utils
     Check whether a service is up based on last heartbeat.
     """
     last_heartbeat = service_ref['updated_at'] or service_ref['created_at']
     if isinstance(last_heartbeat, six.string_types):
         # NOTE(russellb) If this service_ref came in over rpc via
         # conductor, then the timestamp will be a string and needs to be
         # converted back to a datetime.
         last_heartbeat = timeutils.parse_strtime(last_heartbeat)
     else:
         # Objects have proper UTC timezones, but the timeutils comparison
         # below does not (and will fail)
         last_heartbeat = last_heartbeat.replace(tzinfo=None)
     # Timestamps in DB are UTC.
     elapsed = timeutils.delta_seconds(last_heartbeat, timeutils.utcnow())
     is_up = abs(elapsed) <= self.service_down_time
     if not is_up:
         msg = _('Seems service is down. Last heartbeat was %(lhb)s. '
                 'Elapsed time is %(el)s')
         LOG.debug(msg, {'lhb': str(last_heartbeat), 'el': str(elapsed)})
     return is_up
Example #36
0
File: base.py Project: rose/zaqar
    def verify_message_stats(self, message):
        """Verifies the oldest & newest message stats

        :param message: oldest (or) newest message returned by
                        queue_name/stats.
        """
        expected_keys = ['age', 'created', 'href']

        response_keys = message.keys()
        response_keys = sorted(response_keys)
        self.assertEqual(response_keys, expected_keys)

        # Verify that age has valid values
        age = message['age']
        self.assertTrue(0 <= age <= self.limits.max_message_ttl,
                        msg='Invalid Age {0}'.format(age))

        # Verify that GET on href returns 200
        path = message['href']
        result = self.client.get(path)
        self.assertEqual(result.status_code, 200)

        # Verify that created time falls within the last 10 minutes
        # NOTE(malini): The messages are created during the test.
        created_time = message['created']
        created_time = timeutils.normalize_time(
            timeutils.parse_isotime(created_time))
        now = timeutils.utcnow()

        delta = timeutils.delta_seconds(before=created_time, after=now)
        # NOTE(malini): The 'int()' below is a work around  for the small time
        # difference between julianday & UTC.
        # (needed to pass this test on sqlite driver)
        delta = int(delta)

        msg = ('Invalid Time Delta {0}, Created time {1}, Now {2}'.format(
            delta, created_time, now))
        self.assertTrue(0 <= delta <= 6000, msg)
Example #37
0
def _await_agents(instances):
    api = cu.get_api_client(instances[0].node_group.cluster)
    timeout = 300
    LOG.debug("Waiting %(timeout)s seconds for agent connected to manager" %
              {'timeout': timeout})
    s_time = timeutils.utcnow()
    while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
        hostnames = [i.fqdn() for i in instances]
        hostnames_to_manager = [h.hostname for h in api.get_all_hosts('full')]
        is_ok = True
        for hostname in hostnames:
            if hostname not in hostnames_to_manager:
                is_ok = False
                break

        if not is_ok:
            context.sleep(5)
        else:
            break
    else:
        raise ex.HadoopProvisionError(
            _("Cloudera agents failed to connect to"
              " Cloudera Manager"))
Example #38
0
    def _wait_for_resource_status(self,
                                  stack_identifier,
                                  resource_name,
                                  status,
                                  failure_pattern='^.*_FAILED$',
                                  success_on_not_found=False):
        """Waits for a Resource to reach a given status."""
        fail_regexp = re.compile(failure_pattern)
        build_timeout = self.conf.build_timeout
        build_interval = self.conf.build_interval

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start,
                                      timeutils.utcnow()) < build_timeout:
            try:
                res = self.client.resources.get(stack_identifier,
                                                resource_name)
            except heat_exceptions.HTTPNotFound:
                if success_on_not_found:
                    return
                # ignore this, as the resource may not have
                # been created yet
            else:
                if res.resource_status == status:
                    return
                if fail_regexp.search(res.resource_status):
                    raise exceptions.StackResourceBuildErrorException(
                        resource_name=res.resource_name,
                        stack_identifier=stack_identifier,
                        resource_status=res.resource_status,
                        resource_status_reason=res.resource_status_reason)
            time.sleep(build_interval)

        message = ('Resource %s failed to reach %s status within '
                   'the required time (%s s).' %
                   (res.resource_name, status, build_timeout))
        raise exceptions.TimeoutException(message)
Example #39
0
    def get_meter_statistics(self,
                             sample_filter,
                             period=None,
                             groupby=None,
                             aggregate=None):
        """Return an iterable of models.Statistics instances.

        Items are containing meter statistics described by the query
        parameters. The filter must have a meter value set.

        .. note::

          Due to HBase limitations the aggregations are implemented
          in the driver itself, therefore this method will be quite slow
          because of all the Thrift traffic it is going to create.
        """
        if groupby:
            raise ceilometer.NotImplementedError("Group by not implemented.")

        if aggregate:
            raise ceilometer.NotImplementedError(
                'Selectable aggregates not implemented')

        with self.conn_pool.connection() as conn:
            meter_table = conn.table(self.METER_TABLE)
            q, start, stop, columns = (
                hbase_utils.make_sample_query_from_filter(sample_filter))
            # These fields are used in statistics' calculating
            columns.extend(
                ['f:timestamp', 'f:counter_volume', 'f:counter_unit'])
            meters = map(
                hbase_utils.deserialize_entry,
                list(meter for (ignored, meter) in meter_table.scan(
                    filter=q, row_start=start, row_stop=stop,
                    columns=columns)))

        if sample_filter.start:
            start_time = sample_filter.start
        elif meters:
            start_time = meters[-1][0]['timestamp']
        else:
            start_time = None

        if sample_filter.end:
            end_time = sample_filter.end
        elif meters:
            end_time = meters[0][0]['timestamp']
        else:
            end_time = None

        results = []

        if not period:
            period = 0
            period_start = start_time
            period_end = end_time

        # As our HBase meters are stored as newest-first, we need to iterate
        # in the reverse order
        for meter in meters[::-1]:
            ts = meter[0]['timestamp']
            if period:
                offset = int(
                    timeutils.delta_seconds(start_time, ts) / period) * period
                period_start = start_time + datetime.timedelta(0, offset)

            if not results or not results[-1].period_start == period_start:
                if period:
                    period_end = period_start + datetime.timedelta(0, period)
                results.append(
                    models.Statistics(unit='',
                                      count=0,
                                      min=0,
                                      max=0,
                                      avg=0,
                                      sum=0,
                                      period=period,
                                      period_start=period_start,
                                      period_end=period_end,
                                      duration=None,
                                      duration_start=None,
                                      duration_end=None,
                                      groupby=None))
            self._update_meter_stats(results[-1], meter[0])
        return results
Example #40
0
    def get_meter_statistics(self,
                             sample_filter,
                             period=None,
                             groupby=None,
                             aggregate=None):
        """Return an iterable of models.Statistics instance.

        Items are containing meter statistics described by the query
        parameters. The filter must have a meter value set.
        """
        if (groupby and set(groupby) -
                set(['user_id', 'project_id', 'resource_id', 'source'])):
            raise ceilometer.NotImplementedError(
                "Unable to group by these fields")

        if aggregate:
            raise ceilometer.NotImplementedError(
                'Selectable aggregates not implemented')

        q = pymongo_utils.make_query_from_filter(sample_filter)

        if period:
            if sample_filter.start:
                period_start = sample_filter.start
            else:
                period_start = self.db.meter.find(limit=1,
                                                  sort=[('timestamp',
                                                         pymongo.ASCENDING)
                                                        ])[0]['timestamp']

        if groupby:
            sort_keys = ['counter_name'] + groupby + ['timestamp']
        else:
            sort_keys = ['counter_name', 'timestamp']

        sort_instructions = self._build_sort_instructions(sort_keys=sort_keys,
                                                          sort_dir='asc')
        meters = self.db.meter.find(q, sort=sort_instructions)

        def _group_key(meter):
            # the method to define a key for groupby call
            key = {}
            for y in sort_keys:
                if y == 'timestamp' and period:
                    key[y] = (
                        timeutils.delta_seconds(period_start, meter[y]) //
                        period)
                elif y != 'timestamp':
                    key[y] = meter[y]
            return key

        def _to_offset(periods):
            return {
                'days': (periods * period) // self.SECONDS_IN_A_DAY,
                'seconds': (periods * period) % self.SECONDS_IN_A_DAY
            }

        for key, grouped_meters in itertools.groupby(meters, key=_group_key):
            stat = models.Statistics(unit=None,
                                     min=sys.maxint,
                                     max=-sys.maxint,
                                     avg=0,
                                     sum=0,
                                     count=0,
                                     period=0,
                                     period_start=0,
                                     period_end=0,
                                     duration=0,
                                     duration_start=0,
                                     duration_end=0,
                                     groupby=None)

            for meter in grouped_meters:
                stat.unit = meter.get('counter_unit', '')
                m_volume = meter.get('counter_volume')
                if stat.min > m_volume:
                    stat.min = m_volume
                if stat.max < m_volume:
                    stat.max = m_volume
                stat.sum += m_volume
                stat.count += 1
                if stat.duration_start == 0:
                    stat.duration_start = meter['timestamp']
                stat.duration_end = meter['timestamp']
                if groupby and not stat.groupby:
                    stat.groupby = {}
                    for group_key in groupby:
                        stat.groupby[group_key] = meter[group_key]

            stat.duration = timeutils.delta_seconds(stat.duration_start,
                                                    stat.duration_end)
            stat.avg = stat.sum / stat.count
            if period:
                stat.period = period
                periods = key.get('timestamp')
                stat.period_start = (
                    period_start + datetime.timedelta(**(_to_offset(periods))))
                stat.period_end = (
                    period_start +
                    datetime.timedelta(**(_to_offset(periods + 1))))
            else:
                stat.period_start = stat.duration_start
                stat.period_end = stat.duration_end
            yield stat
Example #41
0
 def test_delta_seconds(self):
     before = timeutils.utcnow()
     after = before + datetime.timedelta(
         days=7, seconds=59, microseconds=123456)
     self.assertAlmostEquals(604859.123456,
                             timeutils.delta_seconds(before, after))
Example #42
0
 def get_agent_uptime(self, agent):
     return timeutils.delta_seconds(agent.started_at,
                                    agent.heartbeat_timestamp)
Example #43
0
    def test_lifecycle(self):
        doc = '{"ttl": 100, "grace": 60}'

        # First, claim some messages
        body = self.simulate_post(self.claims_path, self.project_id, body=doc)
        self.assertEqual(self.srmock.status, falcon.HTTP_201)

        claimed = jsonutils.loads(body[0])
        claim_href = self.srmock.headers_dict['Location']
        message_href, params = claimed[0]['href'].split('?')

        # No more messages to claim
        self.simulate_post(self.claims_path, self.project_id, body=doc,
                           query_string='limit=3')
        self.assertEqual(self.srmock.status, falcon.HTTP_204)

        headers = {
            'Client-ID': str(uuid.uuid4()),
        }

        # Listing messages, by default, won't include claimed
        body = self.simulate_get(self.messages_path, self.project_id,
                                 headers=headers)
        self.assertEqual(self.srmock.status, falcon.HTTP_204)

        # Include claimed messages this time
        body = self.simulate_get(self.messages_path, self.project_id,
                                 query_string='include_claimed=true',
                                 headers=headers)
        listed = jsonutils.loads(body[0])
        self.assertEqual(self.srmock.status, falcon.HTTP_200)
        self.assertEqual(len(listed['messages']), len(claimed))

        now = timeutils.utcnow() + datetime.timedelta(seconds=10)
        timeutils_utcnow = 'zaqar.openstack.common.timeutils.utcnow'
        with mock.patch(timeutils_utcnow) as mock_utcnow:
            mock_utcnow.return_value = now
            body = self.simulate_get(claim_href, self.project_id)

        claim = jsonutils.loads(body[0])

        self.assertEqual(self.srmock.status, falcon.HTTP_200)
        self.assertEqual(self.srmock.headers_dict['Content-Location'],
                         claim_href)
        self.assertEqual(claim['ttl'], 100)
        # NOTE(cpp-cabrera): verify that claim age is non-negative
        self.assertThat(claim['age'], matchers.GreaterThan(-1))

        # Try to delete the message without submitting a claim_id
        self.simulate_delete(message_href, self.project_id)
        self.assertEqual(self.srmock.status, falcon.HTTP_403)

        # Delete the message and its associated claim
        self.simulate_delete(message_href, self.project_id,
                             query_string=params)
        self.assertEqual(self.srmock.status, falcon.HTTP_204)

        # Try to get it from the wrong project
        self.simulate_get(message_href, 'bogus_project', query_string=params)
        self.assertEqual(self.srmock.status, falcon.HTTP_404)

        # Get the message
        self.simulate_get(message_href, self.project_id, query_string=params)
        self.assertEqual(self.srmock.status, falcon.HTTP_404)

        # Update the claim
        new_claim_ttl = '{"ttl": 60}'
        creation = timeutils.utcnow()
        self.simulate_patch(claim_href, self.project_id, body=new_claim_ttl)
        self.assertEqual(self.srmock.status, falcon.HTTP_204)

        # Get the claimed messages (again)
        body = self.simulate_get(claim_href, self.project_id)
        query = timeutils.utcnow()
        claim = jsonutils.loads(body[0])
        message_href, params = claim['messages'][0]['href'].split('?')

        self.assertEqual(claim['ttl'], 60)
        estimated_age = timeutils.delta_seconds(creation, query)
        self.assertTrue(estimated_age > claim['age'])

        # Delete the claim
        self.simulate_delete(claim['href'], 'bad_id')
        self.assertEqual(self.srmock.status, falcon.HTTP_204)

        self.simulate_delete(claim['href'], self.project_id)
        self.assertEqual(self.srmock.status, falcon.HTTP_204)

        # Try to delete a message with an invalid claim ID
        self.simulate_delete(message_href, self.project_id,
                             query_string=params)
        self.assertEqual(self.srmock.status, falcon.HTTP_400)

        # Make sure it wasn't deleted!
        self.simulate_get(message_href, self.project_id, query_string=params)
        self.assertEqual(self.srmock.status, falcon.HTTP_200)

        # Try to get a claim that doesn't exist
        self.simulate_get(claim['href'])
        self.assertEqual(self.srmock.status, falcon.HTTP_404)

        # Try to update a claim that doesn't exist
        self.simulate_patch(claim['href'], body=doc)
        self.assertEqual(self.srmock.status, falcon.HTTP_404)