def test_notification_reloaded_event_pipeline(self, fake_publisher_cls):
        fake_publisher_cls.return_value = self.publisher

        ev_pipeline_cfg_file = self.setup_event_pipeline(
            ['compute.instance.create.start'])
        self.CONF.set_override("event_pipeline_cfg_file", ev_pipeline_cfg_file)

        self.CONF.set_override("store_events", True, group="notification")
        self.expected_events = 1
        self.srv.start()

        notifier = messaging.get_notifier(self.transport,
                                          "compute.vagrant-precise")
        notifier.info(context.RequestContext(),
                      'compute.instance.create.start',
                      TEST_NOTICE_PAYLOAD)

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start, timeutils.utcnow()) < 600:
            if len(self.publisher.events) >= self.expected_events:
                break
            eventlet.sleep(0)

        self.assertEqual(self.expected_events, len(self.publisher.events))

        # Flush publisher events to test reloading
        self.publisher.events = []
        # Modify the collection targets
        updated_ev_pipeline_cfg_file = self.setup_event_pipeline(
            ['compute.instance.*'])
        # Move/re-name the updated pipeline file to the original pipeline
        # file path as recorded in oslo config
        shutil.move(updated_ev_pipeline_cfg_file, ev_pipeline_cfg_file)

        self.expected_events = 1
        # Random sleep to let the pipeline poller complete the reloading
        eventlet.sleep(3)
        # Send message again to verify the reload works
        notifier = messaging.get_notifier(self.transport,
                                          "compute.vagrant-precise")
        notifier.info(context.RequestContext(), 'compute.instance.create.end',
                      TEST_NOTICE_PAYLOAD)

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start, timeutils.utcnow()) < 600:
            if len(self.publisher.events) >= self.expected_events:
                break
            eventlet.sleep(0)

        self.assertEqual(self.expected_events, len(self.publisher.events))

        self.assertEqual(self.publisher.events[0].event_type,
                         'compute.instance.create.end')
        self.srv.stop()
    def test_notification_reloaded_pipeline(self, fake_publisher_cls):
        fake_publisher_cls.return_value = self.publisher

        pipeline_cfg_file = self.setup_pipeline(['instance'])
        self.CONF.set_override("pipeline_cfg_file", pipeline_cfg_file)

        self.expected_samples = 1
        self.srv.start()

        notifier = messaging.get_notifier(self.transport,
                                          "compute.vagrant-precise")
        notifier.info(context.RequestContext(), 'compute.instance.create.end',
                      TEST_NOTICE_PAYLOAD)

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start, timeutils.utcnow()) < 600:
            if (len(self.publisher.samples) >= self.expected_samples and
                    len(self.publisher.events) >= self.expected_events):
                break
            eventlet.sleep(0)

        self.assertEqual(self.expected_samples, len(self.publisher.samples))

        # Flush publisher samples to test reloading
        self.publisher.samples = []
        # Modify the collection targets
        updated_pipeline_cfg_file = self.setup_pipeline(['vcpus',
                                                         'disk.root.size'])
        # Move/re-name the updated pipeline file to the original pipeline
        # file path as recorded in oslo config
        shutil.move(updated_pipeline_cfg_file, pipeline_cfg_file)

        self.expected_samples = 2
        # Random sleep to let the pipeline poller complete the reloading
        eventlet.sleep(3)
        # Send message again to verify the reload works
        notifier = messaging.get_notifier(self.transport,
                                          "compute.vagrant-precise")
        notifier.info(context.RequestContext(), 'compute.instance.create.end',
                      TEST_NOTICE_PAYLOAD)

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start, timeutils.utcnow()) < 600:
            if (len(self.publisher.samples) >= self.expected_samples and
                    len(self.publisher.events) >= self.expected_events):
                break
            eventlet.sleep(0)

        self.assertEqual(self.expected_samples, len(self.publisher.samples))

        (self.assertIn(sample.name, ['disk.root.size', 'vcpus'])
         for sample in self.publisher.samples)
        self.srv.stop()
Beispiel #3
0
Datei: test.py Projekt: aawm/heat
    def _wait_for_stack_status(self, stack_identifier, status,
                               failure_pattern='^.*_FAILED$',
                               success_on_not_found=False):
        """
        Waits for a Stack to reach a given status.

        Note this compares the full $action_$status, e.g
        CREATE_COMPLETE, not just COMPLETE which is exposed
        via the status property of Stack in heatclient
        """
        fail_regexp = re.compile(failure_pattern)
        build_timeout = self.conf.build_timeout
        build_interval = self.conf.build_interval

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start,
                                      timeutils.utcnow()) < build_timeout:
            try:
                stack = self.client.stacks.get(stack_identifier)
            except heat_exceptions.HTTPNotFound:
                if success_on_not_found:
                    return
                # ignore this, as the resource may not have
                # been created yet
            else:
                if self._verify_status(stack, stack_identifier, status,
                                       fail_regexp):
                    return

            time.sleep(build_interval)

        message = ('Stack %s failed to reach %s status within '
                   'the required time (%s s).' %
                   (stack_identifier, status, build_timeout))
        raise exceptions.TimeoutException(message)
 def get_interval(sample_filter, period, groupby, aggregate):
     self.assertIsNotNone(sample_filter.start_timestamp)
     self.assertIsNotNone(sample_filter.end_timestamp)
     if (sample_filter.start_timestamp > end or
             sample_filter.end_timestamp < start):
         return []
     duration_start = max(sample_filter.start_timestamp, start)
     duration_end = min(sample_filter.end_timestamp, end)
     duration = timeutils.delta_seconds(duration_start, duration_end)
     return [
         models.Statistics(
             unit='',
             min=0,
             max=0,
             avg=0,
             sum=0,
             count=0,
             period=None,
             period_start=None,
             period_end=None,
             duration=duration,
             duration_start=duration_start,
             duration_end=duration_end,
             groupby=None,
         )
     ]
Beispiel #5
0
    def _wait_for_resource_status(self, stack_identifier, resource_name,
                                  status, failure_pattern='^.*_FAILED$',
                                  success_on_not_found=False):
        """Waits for a Resource to reach a given status."""
        fail_regexp = re.compile(failure_pattern)
        build_timeout = self.conf.build_timeout
        build_interval = self.conf.build_interval

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start,
                                      timeutils.utcnow()) < build_timeout:
            try:
                res = self.client.resources.get(
                    stack_identifier, resource_name)
            except heat_exceptions.HTTPNotFound:
                if success_on_not_found:
                    return
                # ignore this, as the resource may not have
                # been created yet
            else:
                if res.resource_status == status:
                    return
                if fail_regexp.search(res.resource_status):
                    raise exceptions.StackResourceBuildErrorException(
                        resource_name=res.resource_name,
                        stack_identifier=stack_identifier,
                        resource_status=res.resource_status,
                        resource_status_reason=res.resource_status_reason)
            time.sleep(build_interval)

        message = ('Resource %s failed to reach %s status within '
                   'the required time (%s s).' %
                   (resource_name, status, build_timeout))
        raise exceptions.TimeoutException(message)
Beispiel #6
0
        def _inner():
            if initial_delay:
                greenthread.sleep(initial_delay)

            try:
                while self._running:
                    start = timeutils.utcnow()
                    self.f(*self.args, **self.kw)
                    end = timeutils.utcnow()
                    if not self._running:
                        break
                    delay = interval - timeutils.delta_seconds(start, end)
                    if delay <= 0:
                        LOG.warn(_LW('task run outlasted interval by %s sec'),
                                 -delay)
                    greenthread.sleep(delay if delay > 0 else 0)
            except LoopingCallDone as e:
                self.stop()
                done.send(e.retvalue)
            except Exception:
                LOG.exception(_LE('in fixed duration looping call'))
                done.send_exception(*sys.exc_info())
                return
            else:
                done.send(True)
Beispiel #7
0
    def _wait_on_task_execution(self):
        """Wait until all the tasks have finished execution and are in
        state of success or failure.
        """

        start = timeutils.utcnow()

        # wait for maximum of 5 seconds
        while timeutils.delta_seconds(start, timeutils.utcnow()) < 5:
            wait = False
            # Verify that no task is in status of pending or processing
            path = "/v2/tasks"
            res, content = self.http.request(path, 'GET',
                                             headers=minimal_task_headers())
            content_dict = json.loads(content)

            self.assertEqual(200, res.status)
            res_tasks = content_dict['tasks']
            if len(res_tasks) != 0:
                for task in res_tasks:
                    if task['status'] in ('pending', 'processing'):
                        wait = True
                        break

            if wait:
                time.sleep(0.05)
                continue
            else:
                break
Beispiel #8
0
    def __call__(self):
        pstart, pend = self._my_range()
        LOG.info(
            "Refreshing zones for shards %(start)s to %(end)s",
            {
                "start": pstart,
                "end": pend
            })

        ctxt = context.DesignateContext.get_admin_context()
        ctxt.all_tenants = True

        # each zone can have a different refresh / expire etc interval defined
        # in the SOA at the source / master servers
        criterion = {
            "type": "SECONDARY"
        }
        for zone in self._iter_zones(ctxt, criterion):
            # NOTE: If the zone isn't transferred yet, ignore it.
            if zone.transferred_at is None:
                continue

            now = timeutils.utcnow(True)

            transferred = timeutils.parse_isotime(zone.transferred_at)
            seconds = timeutils.delta_seconds(transferred, now)
            if seconds > zone.refresh:
                msg = "Zone %(id)s has %(seconds)d seconds since last " \
                      "transfer, executing AXFR"
                LOG.debug(msg, {"id": zone.id, "seconds": seconds})
                self.central_api.xfr_zone(ctxt, zone.id)
Beispiel #9
0
    def handle_sample(self, context, s):
        """Handle a sample, converting if necessary."""
        LOG.debug(_('handling sample %s'), (s,))
        key = s.name + s.resource_id
        prev = self.cache.get(key)
        timestamp = timeutils.parse_isotime(s.timestamp)
        self.cache[key] = (s.volume, timestamp)

        if prev:
            prev_volume = prev[0]
            prev_timestamp = prev[1]
            time_delta = timeutils.delta_seconds(prev_timestamp, timestamp)
            # we only allow negative deltas for noncumulative samples, whereas
            # for cumulative we assume that a reset has occurred in the interim
            # so that the current volume gives a lower bound on growth
            volume_delta = (s.volume - prev_volume
                            if (prev_volume <= s.volume or
                                s.type != sample.TYPE_CUMULATIVE)
                            else s.volume)
            rate_of_change = ((1.0 * volume_delta / time_delta)
                              if time_delta else 0.0)

            s = self._convert(s, rate_of_change)
            LOG.debug(_('converted to: %s'), (s,))
        else:
            LOG.warn(_('dropping sample with no predecessor: %s'),
                     (s,))
            s = None
        return s
Beispiel #10
0
def _start_cloudera_manager(cluster):
    manager = pu.get_manager(cluster)
    with manager.remote() as r:
        cmd.start_cloudera_db(r)
        cmd.start_manager(r)

    timeout = 300
    LOG.debug("Waiting %(timeout)s seconds for Manager to start : " % {
        'timeout': timeout})
    s_time = timeutils.utcnow()
    while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
        try:
            conn = telnetlib.Telnet(manager.management_ip, CM_API_PORT)
            conn.close()
            break
        except IOError:
            context.sleep(2)
    else:
        message = _("Cloudera Manager failed to start in %(timeout)s minutes "
                    "on node '%(node)s' of cluster '%(cluster)s'") % {
                        'timeout': timeout / 60,
                        'node': manager.management_ip,
                        'cluster': cluster.name}
        raise ex.HadoopProvisionError(message)

    LOG.info(_LI("Cloudera Manager has been started"))
Beispiel #11
0
 def is_up(self, service_ref):
     """Moved from nova.utils
     Check whether a service is up based on last heartbeat.
     """
     last_heartbeat = (service_ref.get('last_seen_up') or
         service_ref['created_at'])
     if isinstance(last_heartbeat, six.string_types):
         # NOTE(russellb) If this service_ref came in over rpc via
         # conductor, then the timestamp will be a string and needs to be
         # converted back to a datetime.
         last_heartbeat = timeutils.parse_strtime(last_heartbeat)
     else:
         # Objects have proper UTC timezones, but the timeutils comparison
         # below does not (and will fail)
         last_heartbeat = last_heartbeat.replace(tzinfo=None)
     # Timestamps in DB are UTC.
     elapsed = timeutils.delta_seconds(last_heartbeat, timeutils.utcnow())
     is_up = abs(elapsed) <= self.service_down_time
     if not is_up:
         LOG.debug('Seems service %(binary)s on host %(host)s is down. '
                   'Last heartbeat was %(lhb)s. Elapsed time is %(el)s',
                   {'binary': service_ref.get('binary'),
                    'host': service_ref.get('host'),
                    'lhb': str(last_heartbeat), 'el': str(elapsed)})
     return is_up
    def _check_transient(self):
        pig_job_data = self.edp_info.read_pig_example_script()
        pig_lib_data = self.edp_info.read_pig_example_jar()
        job_ids = []
        for cluster_id in self.cluster_ids:
            self.cluster_id = cluster_id
            job_ids.append(self.edp_testing(
                job_type=utils_edp.JOB_TYPE_PIG,
                job_data_list=[{'pig': pig_job_data}],
                lib_data_list=[{'jar': pig_lib_data}]))
        self.poll_jobs_status(job_ids)

        # set timeout in seconds
        timeout = self.common_config.TRANSIENT_CLUSTER_TIMEOUT * 60
        s_time = timeutils.utcnow()
        raise_failure = True
        # wait for cluster deleting
        while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
            try:
                self.sahara.clusters.get(self.cluster_id)
            except sab.APIException as api_ex:
                if 'not found' in api_ex.message:
                    raise_failure = False
                    break
            time.sleep(2)

        if raise_failure:
            self.fail('Transient cluster has not been deleted within %s '
                      'minutes.'
                      % self.common_config.TRANSIENT_CLUSTER_TIMEOUT)
Beispiel #13
0
def fetch(context, image_service, image_id, path, _user_id, _project_id):
    # TODO(vish): Improve context handling and add owner and auth data
    #             when it is added to glance.  Right now there is no
    #             auth checking in glance, so we assume that access was
    #             checked before we got here.
    start_time = timeutils.utcnow()
    with fileutils.remove_path_on_error(path):
        with open(path, "wb") as image_file:
            try:
                image_service.download(context, image_id, image_file)
            except IOError as e:
                with excutils.save_and_reraise_exception():
                    if e.errno == errno.ENOSPC:
                        # TODO(eharney): Fire an async error message for this
                        LOG.error(_LE("No space left in image_conversion_dir "
                                      "path (%(path)s) while fetching "
                                      "image %(image)s."),
                                  {'path': os.path.dirname(path),
                                   'image': image_id})

    duration = timeutils.delta_seconds(start_time, timeutils.utcnow())

    # NOTE(jdg): use a default of 1, mostly for unit test, but in
    # some incredible event this is 0 (cirros image?) don't barf
    if duration < 1:
        duration = 1
    fsz_mb = os.stat(image_file.name).st_size / units.Mi
    mbps = (fsz_mb / duration)
    msg = ("Image fetch details: dest %(dest)s, size %(sz).2f MB, "
           "duration %(duration).2f sec")
    LOG.debug(msg, {"dest": image_file.name,
                    "sz": fsz_mb,
                    "duration": duration})
    msg = _LI("Image download %(sz).2f MB at %(mbps).2f MB/s")
    LOG.info(msg, {"sz": fsz_mb, "mbps": mbps})
Beispiel #14
0
def _copy_volume_with_file(src, dest, size_in_m):
    src_handle = src
    if isinstance(src, six.string_types):
        src_handle = _open_volume_with_path(src, 'rb')

    dest_handle = dest
    if isinstance(dest, six.string_types):
        dest_handle = _open_volume_with_path(dest, 'wb')

    if not src_handle:
        raise exception.DeviceUnavailable(
            _("Failed to copy volume, source device unavailable."))

    if not dest_handle:
        raise exception.DeviceUnavailable(
            _("Failed to copy volume, destination device unavailable."))

    start_time = timeutils.utcnow()

    _transfer_data(src_handle, dest_handle, size_in_m * units.Mi, units.Mi * 4)

    duration = max(1, timeutils.delta_seconds(start_time, timeutils.utcnow()))

    if isinstance(src, six.string_types):
        src_handle.close()
    if isinstance(dest, six.string_types):
        dest_handle.close()

    mbps = (size_in_m / duration)
    LOG.info(_LI("Volume copy completed (%(size_in_m).2f MB at "
                 "%(mbps).2f MB/s)."),
             {'size_in_m': size_in_m, 'mbps': mbps})
Beispiel #15
0
    def _batching_samples(self, expected_samples, call_count):
        pipeline = yaml.dump({
            'sources': [{
                'name': 'test_pipeline',
                'interval': 1,
                'meters': ['testbatch'],
                'resources': ['alpha', 'beta', 'gamma', 'delta'],
                'sinks': ['test_sink']}],
            'sinks': [{
                'name': 'test_sink',
                'transformers': [],
                'publishers': ["test"]}]
        })

        pipeline_cfg_file = self.setup_pipeline_file(pipeline)

        self.CONF.set_override("pipeline_cfg_file", pipeline_cfg_file)

        self.mgr.tg = os_service.threadgroup.ThreadGroup(1000)
        self.mgr.start()
        start = timeutils.utcnow()
        while timeutils.delta_seconds(start, timeutils.utcnow()) < 600:
            if len(self.notified_samples) >= expected_samples:
                break
            eventlet.sleep(0)

        samples = self.notified_samples
        self.assertEqual(expected_samples, len(samples))
        self.assertEqual(call_count, self.notifier.info.call_count)
Beispiel #16
0
    def _update_duration(self, start_timestamp, end_timestamp):
        # "Clamp" the timestamps we return to the original time
        # range, excluding the offset.
        if (start_timestamp and
                self.duration_start and
                self.duration_start < start_timestamp):
            self.duration_start = start_timestamp
            LOG.debug(_('clamping min timestamp to range'))
        if (end_timestamp and
                self.duration_end and
                self.duration_end > end_timestamp):
            self.duration_end = end_timestamp
            LOG.debug(_('clamping max timestamp to range'))

        # If we got valid timestamps back, compute a duration in seconds.
        #
        # If the min > max after clamping then we know the
        # timestamps on the samples fell outside of the time
        # range we care about for the query, so treat them as
        # "invalid."
        #
        # If the timestamps are invalid, return None as a
        # sentinel indicating that there is something "funny"
        # about the range.
        if (self.duration_start and
                self.duration_end and
                self.duration_start <= self.duration_end):
            self.duration = timeutils.delta_seconds(self.duration_start,
                                                    self.duration_end)
        else:
            self.duration_start = self.duration_end = self.duration = None
Beispiel #17
0
    def _bracketer_calculate(self, request_id):
        """Evaluate the brackelet expression and return a new event if successful."""
        try:
            begin_event = self.cache[request_id][self.begin_event_type]
            end_event = self.cache[request_id][self.end_event_type]

            result = timeutils.delta_seconds(begin_event.generated,
                                             end_event.generated)

            if result < 0:
                LOG.warn(_('brackelet result %(result)s'
                           'from %(begin_event)s: %(end_event)s < 0'),
                         {'result': result,
                          'begin_event': begin_event,
                          'end_event': end_event})
                return

            event_type = self.target.get('event_type')
            message_id = uuid.uuid4()
            when = timeutils.utcnow()
            #End_event has resource_id trait
            traits = deepcopy(end_event.traits)
            latency_trait = models.Trait(self.target_trait_name,
                                         self.target_trait_type,
                                         result)
            traits.append(latency_trait)
            raw = {}

            event = models.Event(message_id, event_type, when, traits, raw)
            print event
            return event
        except Exception:
            LOG.warn(_('Unable to evaluate Evaluate the brackelet expression'))
Beispiel #18
0
    def _check_clock_sync_on_agent_start(self, agent_state, agent_time):
        """Checks if the server and the agent times are in sync.

        Method checks if the agent time is in sync with the server time
        on start up. Ignores it, on subsequent re-connects.
        """
        if agent_state.get('start_flag'):
            time_server_now = timeutils.utcnow()
            diff = abs(timeutils.delta_seconds(time_server_now, agent_time))
            if diff > cfg.CONF.agent_down_time:
                agent_name = agent_state['agent_type']
                time_agent = datetime.datetime.isoformat(agent_time)

                host = agent_state['host']
                log_dict = {'host': host,
                            'agent_name': agent_name,
                            'agent_time': time_agent,
                            'threshold': cfg.CONF.agent_down_time,
                            'serv_time': (datetime.datetime.isoformat
                                          (time_server_now)),
                            'diff': diff}
                LOG.error(_LE("Message received from the host: %(host)s "
                              "during the registration of %(agent_name)s has "
                              "a timestamp: %(agent_time)s. This differs from "
                              "the current server timestamp: %(serv_time)s by "
                              "%(diff)s seconds, which is more than the "
                              "threshold agent down"
                              "time: %(threshold)s."), log_dict)
Beispiel #19
0
 def _dispatch(self, message):
     if LOG.isEnabledFor(logging.TRACE):
         LOG.trace("Dispatching message %s (it took %s seconds"
                   " for it to arrive for processing after being"
                   " sent)", message,
                   timeutils.delta_seconds(message['sent_on'],
                                           timeutils.utcnow()))
     try:
         kind = message['kind']
         sender = message['sender']
         body = message['body']
     except (KeyError, ValueError, TypeError):
         LOG.warn("Badly formatted message %s received", message,
                  exc_info=True)
         return
     target = self._targets.get(sender['id'])
     if target is None:
         # Must of been removed...
         return
     if kind == _KIND_COMPLETE_ME:
         target.dispatched[kind] += 1
         target.barrier.set()
     elif kind == _KIND_EVENT:
         task = target.task
         target.dispatched[kind] += 1
         task.notifier.notify(body['event_type'], body['details'])
     else:
         LOG.warn("Unknown message '%s' found in message from sender"
                  " %s to target '%s'", kind, sender, target)
Beispiel #20
0
 def _do_get_members():
     if not os.path.isdir(group_dir):
         raise coordination.GroupNotCreated(group_id)
     members = set()
     try:
         entries = os.listdir(group_dir)
     except EnvironmentError as e:
         # Did someone manage to remove it before we got here...
         if e.errno != errno.ENOENT:
             raise
     else:
         for entry in entries:
             if not entry.endswith('.raw'):
                 continue
             entry_path = os.path.join(group_dir, entry)
             try:
                 m_time = datetime.datetime.fromtimestamp(
                     os.stat(entry_path).st_mtime)
                 current_time = datetime.datetime.now()
                 delta_time = timeutils.delta_seconds(m_time,
                                                      current_time)
                 if delta_time >= 0 and delta_time <= self._timeout:
                     member_id = self._read_member_id(entry_path)
                 else:
                     continue
             except EnvironmentError as e:
                 if e.errno != errno.ENOENT:
                     raise
             else:
                 members.add(member_id)
     return members
Beispiel #21
0
    def handle_sample(self, context, s):
        """Handle a sample, converting if necessary."""
        key = s.name + s.resource_id
        prev = self.cache.get(key)
        timestamp = timeutils.parse_isotime(s.timestamp)
        self.cache[key] = (s.volume, timestamp)

        if prev:
            prev_volume = prev[0]
            prev_timestamp = prev[1]
            time_delta = timeutils.delta_seconds(prev_timestamp, timestamp)
            # disallow violations of the arrow of time
            if time_delta < 0:
                LOG.warn(_LW('Dropping out of time order sample: %s'), (s,))
                # Reset the cache to the newer sample.
                self.cache[key] = prev
                return None
            volume_delta = s.volume - prev_volume
            if self.growth_only and volume_delta < 0:
                LOG.warn(_LW('Negative delta detected, dropping value'))
                s = None
            else:
                s = self._convert(s, volume_delta)
                LOG.debug('Converted to: %s', s)
        else:
            LOG.warn(_LW('Dropping sample with no predecessor: %s'),
                     (s,))
            s = None
        return s
Beispiel #22
0
def convert_image(source, dest, out_format, run_as_root=True):
    """Convert image to other format."""

    cmd = ('qemu-img', 'convert',
           '-O', out_format, source, dest)

    start_time = timeutils.utcnow()
    execute(*cmd, run_as_root=True)
    duration = timeutils.delta_seconds(start_time, timeutils.utcnow())

    if duration < 1:
        duration = 1
    try:
        image_size = qemu_img_info(source, run_as_root=True).virtual_size
    except ValueError as e:
        msg = _LI("The image was successfully converted, but image size "
                  "is unavailable. src %(src)s, dest %(dest)s. %(error)s")
        LOG.info(msg, {"src": source,
                       "dest": dest,
                       "error": e})
        return

    fsz_mb = image_size / units.Mi
    mbps = (fsz_mb / duration)
    msg = ("Image conversion details: src %(src)s, size %(sz).2f MB, "
           "duration %(duration).2f sec, destination %(dest)s")
    LOG.debug(msg, {"src": source,
                    "sz": fsz_mb,
                    "duration": duration,
                    "dest": dest})

    msg = _LI("Converted %(sz).2f MB image at %(mbps).2f MB/s")
    LOG.info(msg, {"sz": fsz_mb, "mbps": mbps})
 def _get_eligible_ovsvapp_agent(self, cluster_id, vcenter_id):
     cluster_agents = []
     agents = self.plugin.get_agents(
         self.context,
         filters={'agent_type': [ovsvapp_const.AGENT_TYPE_OVSVAPP]})
     for agent in agents:
         agent_cluster_id = agent['configurations'].get('cluster_id')
         agent_vcenter_id = agent['configurations'].get('vcenter_id')
         if (cluster_id != agent_cluster_id) or (
             vcenter_id != agent_vcenter_id):
             continue
         cluster_agents.append(agent)
     if not cluster_agents:
         return
     _agent = random.choice(cluster_agents)
     recent_time = _agent['heartbeat_timestamp']
     if not timeutils.is_older_than(recent_time,
                                    cfg.CONF.agent_down_time):
         return _agent
     cluster_agents.remove(_agent)
     for agent in cluster_agents:
         delta = timeutils.delta_seconds(recent_time,
                                         agent['heartbeat_timestamp'])
         if delta > 0:
             if not timeutils.is_older_than(agent['heartbeat_timestamp'],
                                            cfg.CONF.agent_down_time):
                 return agent
    def _check_notifications(self, fake_publisher_cls):
        fake_publisher_cls.side_effect = [self.publisher, self.publisher2]

        self.srv = notification.NotificationService()
        self.srv2 = notification.NotificationService()
        with mock.patch(
            "ceilometer.coordination.PartitionCoordinator" "._get_members", return_value=["harry", "lloyd"]
        ):
            with mock.patch("uuid.uuid4", return_value="harry"):
                self.srv.start()
            with mock.patch("uuid.uuid4", return_value="lloyd"):
                self.srv2.start()

        notifier = messaging.get_notifier(self.transport, "compute.vagrant-precise")
        payload1 = TEST_NOTICE_PAYLOAD.copy()
        payload1["instance_id"] = "0"
        notifier.info(context.RequestContext(), "compute.instance.create.end", payload1)
        payload2 = TEST_NOTICE_PAYLOAD.copy()
        payload2["instance_id"] = "1"
        notifier.info(context.RequestContext(), "compute.instance.create.end", payload2)
        self.expected_samples = 4
        start = timeutils.utcnow()
        with mock.patch("six.moves.builtins.hash", lambda x: int(x)):
            while timeutils.delta_seconds(start, timeutils.utcnow()) < 60:
                if len(self.publisher.samples + self.publisher2.samples) >= self.expected_samples:
                    break
                eventlet.sleep(0)
            self.srv.stop()
            self.srv2.stop()

        self.assertEqual(2, len(self.publisher.samples))
        self.assertEqual(2, len(self.publisher2.samples))
        self.assertEqual(1, len(set(s.resource_id for s in self.publisher.samples)))
        self.assertEqual(1, len(set(s.resource_id for s in self.publisher2.samples)))
 def _await_cldb(self, cluster_context, instances=None, timeout=600):
     instances = instances or cluster_context.get_instances()
     cldb_node = cluster_context.get_instance(mfs.CLDB)
     start_time = timeutils.utcnow()
     retry_count = 0
     with cldb_node.remote() as r:
         LOG.debug("Waiting {count} seconds for CLDB initialization".format(
             count=timeout))
         while timeutils.delta_seconds(start_time,
                                       timeutils.utcnow()) < timeout:
             ec, out = r.execute_command(NODE_LIST_CMD,
                                         raise_when_error=False)
             resp = json.loads(out)
             status = resp['status']
             if str(status).lower() == 'ok':
                 ips = [n['ip'] for n in resp['data']]
                 retry_count += 1
                 for i in instances:
                     if (i.management_ip not in ips
                             and retry_count > DEFAULT_RETRY_COUNT):
                         raise ex.HadoopProvisionError(_(
                             "Node failed to connect to CLDB: %s") %
                             i.management_ip)
                 break
             else:
                 context.sleep(DELAY)
         else:
             raise ex.HadoopProvisionError(_("CLDB failed to start"))
Beispiel #26
0
 def get_min_interval(self):
     try:
         t1 = self.compute_next_time(datetime.now())
         t2 = self.compute_next_time(t1)
         return timeutils.delta_seconds(t1, t2)
     except Exception:
         return None
Beispiel #27
0
    def handle_sample(self, s):
        """Handle a sample, converting if necessary."""
        LOG.debug("handling sample %s", s)
        key = s.name + s.resource_id
        prev = self.cache.get(key)
        timestamp = timeutils.parse_isotime(s.timestamp)
        self.cache[key] = (s.volume, timestamp)

        if prev:
            prev_volume = prev[0]
            prev_timestamp = prev[1]
            time_delta = timeutils.delta_seconds(prev_timestamp, timestamp)
            # disallow violations of the arrow of time
            if time_delta < 0:
                LOG.warning(_("dropping out of time order sample: %s"), (s,))
                # Reset the cache to the newer sample.
                self.cache[key] = prev
                return None
            # we only allow negative volume deltas for noncumulative
            # samples, whereas for cumulative we assume that a reset has
            # occurred in the interim so that the current volume gives a
            # lower bound on growth
            volume_delta = (
                s.volume - prev_volume if (prev_volume <= s.volume or s.type != sample.TYPE_CUMULATIVE) else s.volume
            )
            rate_of_change = (1.0 * volume_delta / time_delta) if time_delta else 0.0

            s = self._convert(s, rate_of_change)
            LOG.debug("converted to: %s", s)
        else:
            LOG.warning(_("dropping sample with no predecessor: %s"), (s,))
            s = None
        return s
Beispiel #28
0
    def get_min_interval(self):
        """Get minimum interval of two adjacent time points

        :return int(seconds) or None
        """
        gen = self.rrule_obj
        kwargs = FREQ_TO_KWARGS[self.min_freq]
        endtime = self.dtstart + timedelta(**kwargs)

        deltas = []
        t0 = None
        for dt in gen:
            if dt > endtime:
                break
            t1 = t0
            t0 = dt
            if t1 is None or t0 is None or dt <= self.dtstart:
                continue
            delta = timeutils.delta_seconds(t1, t0)
            if delta:
                deltas.append(delta)
        if len(deltas):
            return min(deltas)
        else:
            return None
Beispiel #29
0
    def discover(self, manager, param=None):
        """Discover resources to monitor."""
        secs_from_last_update = 0
        if self.last_run:
            secs_from_last_update = timeutils.delta_seconds(
                self.last_run, timeutils.utcnow(True))

        instances = []
        # NOTE(ityaptin) we update make a nova request only if
        # it's a first discovery or resources expired
        if not self.last_run or secs_from_last_update >= self.expiration_time:
            try:
                utc_now = timeutils.utcnow(True)
                since = self.last_run.isoformat() if self.last_run else None
                instances = self.nova_cli.instance_get_all_by_host(
                    self.conf.host, since)
                self.last_run = utc_now
            except Exception:
                # NOTE(zqfan): instance_get_all_by_host is wrapped and will log
                # exception when there is any error. It is no need to raise it
                # again and print one more time.
                return []

        for instance in instances:
            if getattr(instance, 'OS-EXT-STS:vm_state', None) in ['deleted',
                                                                  'error']:
                self.instances.pop(instance.id, None)
            else:
                self.instances[instance.id] = instance

        return self.instances.values()
Beispiel #30
0
def get_time_since_last_update(cluster):
    cluster_updated_at = timeutils.normalize_time(
        timeutils.parse_isotime(cluster.updated_at))
    current_time = timeutils.utcnow()
    spacing = timeutils.delta_seconds(cluster_updated_at,
                                      current_time)
    return spacing
Beispiel #31
0
def iter_period(start, end, period):
    """Split a time from start to end in periods of a number of seconds.

    This function yields the (start, end) time for each period composing the
    time passed as argument.

    :param start: When the period set start.
    :param end: When the period end starts.
    :param period: The duration of the period.
    """
    period_start = start
    increment = datetime.timedelta(seconds=period)
    for i in moves.xrange(
            int(math.ceil(timeutils.delta_seconds(start, end) /
                          float(period)))):
        next_start = period_start + increment
        yield (period_start, next_start)
        period_start = next_start
Beispiel #32
0
 def wait_for_event_with_reason(self, stack_identifier, reason,
                                rsrc_name=None, num_expected=1):
     build_timeout = self.conf.build_timeout
     build_interval = self.conf.build_interval
     start = timeutils.utcnow()
     while timeutils.delta_seconds(start,
                                   timeutils.utcnow()) < build_timeout:
         try:
             rsrc_events = self.client.events.list(stack_identifier,
                                                   resource_name=rsrc_name)
         except heat_exceptions.HTTPNotFound:
             LOG.debug("No events yet found for %s" % rsrc_name)
         else:
             matched = [e for e in rsrc_events
                        if e.resource_status_reason == reason]
             if len(matched) == num_expected:
                 return matched
         time.sleep(build_interval)
Beispiel #33
0
    def await_samples(self, metric, query):
        """
        This method is to wait for sample to add it to database.
        There are long time delays when using Postgresql (or Mysql)
        database as ceilometer backend
        """
        timeout = CONF.compute.build_timeout
        start = timeutils.utcnow()
        while timeutils.delta_seconds(start, timeutils.utcnow()) < timeout:
            body = self.telemetry_client.list_samples(metric, query)
            if body:
                return body
            time.sleep(CONF.compute.build_interval)

        raise exceptions.TimeoutException(
            'Sample for metric:%s with query:%s has not been added to the '
            'database within %d seconds' %
            (metric, query, CONF.compute.build_timeout))
Beispiel #34
0
    def _check_notification_service(self):
        self.srv.start()

        notifier = messaging.get_notifier(self.transport,
                                          "compute.vagrant-precise")
        notifier.info({}, 'compute.instance.create.end', TEST_NOTICE_PAYLOAD)
        start = timeutils.utcnow()
        while timeutils.delta_seconds(start, timeutils.utcnow()) < 600:
            if (len(self.publisher.samples) >= self.expected_samples
                    and len(self.publisher.events) >= self.expected_events):
                break
        self.assertNotEqual(self.srv.listeners, self.srv.pipeline_listeners)
        self.srv.stop()

        resources = list(set(s.resource_id for s in self.publisher.samples))
        self.assertEqual(self.expected_samples, len(self.publisher.samples))
        self.assertEqual(self.expected_events, len(self.publisher.events))
        self.assertEqual(["9f9d01b9-4a58-4271-9e27-398b21ab20d1"], resources)
Beispiel #35
0
def convert_image(source, dest, out_format, bps_limit=None, run_as_root=True):
    """Convert image to other format."""

    cmd = ('qemu-img', 'convert', '-O', out_format, source, dest)

    # Check whether O_DIRECT is supported and set '-t none' if it is
    # This is needed to ensure that all data hit the device before
    # it gets unmapped remotely from the host for some backends
    # Reference Bug: #1363016

    # NOTE(jdg): In the case of file devices qemu does the
    # flush properly and more efficiently than would be done
    # setting O_DIRECT, so check for that and skip the
    # setting for non BLK devs
    if (utils.is_blk_device(dest) and volume_utils.check_for_odirect_support(
            source, dest, 'oflag=direct')):
        cmd = ('qemu-img', 'convert', '-t', 'none', '-O', out_format, source,
               dest)

    start_time = timeutils.utcnow()
    cgcmd = volume_utils.setup_blkio_cgroup(source, dest, bps_limit)
    if cgcmd:
        cmd = tuple(cgcmd) + cmd
    utils.execute(*cmd, run_as_root=run_as_root)

    duration = timeutils.delta_seconds(start_time, timeutils.utcnow())

    # NOTE(jdg): use a default of 1, mostly for unit test, but in
    # some incredible event this is 0 (cirros image?) don't barf
    if duration < 1:
        duration = 1
    fsz_mb = os.stat(source).st_size / units.Mi
    mbps = (fsz_mb / duration)
    msg = ("Image conversion details: src %(src)s, size %(sz).2f MB, "
           "duration %(duration).2f sec, destination %(dest)s")
    LOG.debug(msg % {
        "src": source,
        "sz": fsz_mb,
        "duration": duration,
        "dest": dest
    })

    msg = _("Converted %(sz).2f MB image at %(mbps).2f MB/s")
    LOG.info(msg % {"sz": fsz_mb, "mbps": mbps})
Beispiel #36
0
def cancel_job(job_execution_id):
    ctx = context.ctx()
    job_execution = conductor.job_execution_get(ctx, job_execution_id)
    if job_execution.info['status'] in edp.JOB_STATUSES_TERMINATED:
        LOG.info("Job execution is already finished and shouldn't be canceled")
        return job_execution
    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster is None:
        LOG.info("Can not cancel this job on a non-existant cluster.")
        return job_execution
    engine = get_job_engine(cluster, job_execution)
    if engine is not None:
        job_execution = conductor.job_execution_update(
            ctx, job_execution_id,
            {'info': {'status': edp.JOB_STATUS_TOBEKILLED}})

        timeout = CONF.job_canceling_timeout
        s_time = timeutils.utcnow()
        while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
            if job_execution.info['status'] not in edp.JOB_STATUSES_TERMINATED:
                try:
                    job_info = engine.cancel_job(job_execution)
                except Exception as ex:
                    job_info = None
                    LOG.warning("Error during cancel of job execution: "
                                "{error}".format(error=ex))
                if job_info is not None:
                    job_execution = _write_job_status(job_execution, job_info)
                    LOG.info("Job execution was canceled successfully")
                    return job_execution
                context.sleep(3)
                job_execution = conductor.job_execution_get(
                    ctx, job_execution_id)
                if not job_execution:
                    LOG.info("Job execution was deleted. "
                             "Canceling current operation.")
                    return job_execution
            else:
                LOG.info("Job execution status: {status}").format(
                    status=job_execution.info['status'])
                return job_execution
        else:
            raise e.CancelingFailed(_('Job execution %s was not canceled')
                                    % job_execution.id)
Beispiel #37
0
def check_and_fix_integrity(wf_ex):
    check_after_seconds = CONF.engine.execution_integrity_check_delay

    if check_after_seconds < 0:
        # Never check integrity if it's a negative value.
        return

    # To break cyclic dependency.
    from mistral.engine import task_handler

    running_task_execs = db_api.get_task_executions(
        workflow_execution_id=wf_ex.id, state=states.RUNNING)

    for t_ex in running_task_execs:
        # The idea is that we take the latest known timestamp of the task
        # execution and consider it eligible for checking and fixing only
        # if some minimum period of time elapsed since the last update.
        timestamp = t_ex.updated_at or t_ex.created_at

        delta = timeutils.delta_seconds(timestamp, timeutils.utcnow())

        if delta < check_after_seconds:
            continue

        child_executions = t_ex.executions

        if not child_executions:
            continue

        all_finished = all(
            [states.is_completed(c_ex.state) for c_ex in child_executions])

        if all_finished:
            # We found a task execution in RUNNING state for which all
            # child executions are finished. We need to call
            # "schedule_on_action_complete" on the task handler for any of
            # the child executions so that the task state is calculated and
            # updated properly.
            LOG.warning(
                "Found a task execution that is likely stuck in RUNNING state"
                " because all child executions are finished,"
                " will try to recover [task_execution=%s]", t_ex.id)

            task_handler.schedule_on_action_complete(child_executions[-1])
Beispiel #38
0
    def _wait_for_stack_status(self, stack_identifier, status,
                               failure_pattern=None,
                               success_on_not_found=False,
                               signal_required=False,
                               resources_to_signal=None):
        """Waits for a Stack to reach a given status.

        Note this compares the full $action_$status, e.g
        CREATE_COMPLETE, not just COMPLETE which is exposed
        via the status property of Stack in heatclient
        """
        if failure_pattern:
            fail_regexp = re.compile(failure_pattern)
        elif 'FAILED' in status:
            # If we're looking for e.g CREATE_FAILED, COMPLETE is unexpected.
            fail_regexp = re.compile('^.*_COMPLETE$')
        else:
            fail_regexp = re.compile('^.*_FAILED$')
        build_timeout = self.conf.build_timeout
        build_interval = self.conf.build_interval

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start,
                                      timeutils.utcnow()) < build_timeout:
            try:
                stack = self.client.stacks.get(stack_identifier,
                                               resolve_outputs=False)
            except heat_exceptions.HTTPNotFound:
                if success_on_not_found:
                    return
                # ignore this, as the resource may not have
                # been created yet
            else:
                if self._verify_status(stack, stack_identifier, status,
                                       fail_regexp):
                    return
            if signal_required:
                self.signal_resources(resources_to_signal)
            time.sleep(build_interval)

        message = ('Stack %s failed to reach %s status within '
                   'the required time (%s s).' %
                   (stack_identifier, status, build_timeout))
        raise exceptions.TimeoutException(message)
Beispiel #39
0
    def delete_timeout(self,
                       resource_client,
                       resource_id,
                       timeout=CONF.data_processing.cluster_timeout):

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start, timeutils.utcnow()) < timeout:
            try:
                resource_client.get(resource_id)
            except sab.APIException as sahara_api_exception:
                if 'not found' in sahara_api_exception.message:
                    return
                raise sahara_api_exception

            time.sleep(CONF.data_processing.request_timeout)

        raise exceptions.TimeoutException(
            'Failed to delete resource "%s" in %d seconds.' %
            (resource_id, timeout))
Beispiel #40
0
 def assert_resource_is_a_stack(self, stack_identifier, res_name,
                                wait=False):
     build_timeout = self.conf.build_timeout
     build_interval = self.conf.build_interval
     start = timeutils.utcnow()
     while timeutils.delta_seconds(start,
                                   timeutils.utcnow()) < build_timeout:
         time.sleep(build_interval)
         try:
             nested_identifier = self._get_nested_identifier(
                 stack_identifier, res_name)
         except Exception:
             # We may have to wait, if the create is in-progress
             if wait:
                 time.sleep(build_interval)
             else:
                 raise
         else:
             return nested_identifier
Beispiel #41
0
    def process_notification(self, message):

        period_start = timeutils.normalize_time(
            timeutils.parse_isotime(
                message['payload']['audit_period_beginning']))
        period_end = timeutils.normalize_time(
            timeutils.parse_isotime(message['payload']['audit_period_ending']))

        period_difference = timeutils.delta_seconds(period_start, period_end)

        yield sample.Sample.from_notification(
            name=message['event_type'],
            type=sample.TYPE_CUMULATIVE,
            unit='s',
            volume=period_difference,
            resource_id=message['payload']['instance_id'],
            user_id=message['payload']['user_id'],
            project_id=message['payload']['tenant_id'],
            message=message)
Beispiel #42
0
    def wait_for_deploy_physical_id(self, stack):
        build_timeout = self.conf.build_timeout
        build_interval = self.conf.build_interval

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start,
                                      timeutils.utcnow()) < build_timeout:
            created = True
            for res in self.client.resources.list(stack, nested_depth='2'):
                if not res.physical_resource_id:
                    created = False
                    break
            if created:
                return
            time.sleep(build_interval)

        message = ('Deployment resources failed to be created within '
                   'the required time (%s s).' % (build_timeout))
        raise exceptions.TimeoutException(message)
Beispiel #43
0
def fetch(context, image_service, image_id, path, _user_id, _project_id):
    # TODO(vish): Improve context handling and add owner and auth data
    #             when it is added to glance.  Right now there is no
    #             auth checking in glance, so we assume that access was
    #             checked before we got here.
    start_time = timeutils.utcnow()
    with fileutils.remove_path_on_error(path):
        with open(path, "wb") as image_file:
            try:
                image_service.download(context, image_id,
                                       tpool.Proxy(image_file))
            except IOError as e:
                if e.errno == errno.ENOSPC:
                    params = {'path': os.path.dirname(path),
                              'image': image_id}
                    reason = _("No space left in image_conversion_dir "
                               "path (%(path)s) while fetching "
                               "image %(image)s.") % params
                    LOG.exception(reason)
                    raise exception.ImageTooBig(image_id=image_id,
                                                reason=reason)

                reason = ("IOError: %(errno)s %(strerror)s" %
                          {'errno': e.errno, 'strerror': e.strerror})
                LOG.error(reason)
                raise exception.ImageDownloadFailed(image_href=image_id,
                                                    reason=reason)

    duration = timeutils.delta_seconds(start_time, timeutils.utcnow())

    # NOTE(jdg): use a default of 1, mostly for unit test, but in
    # some incredible event this is 0 (cirros image?) don't barf
    if duration < 1:
        duration = 1
    fsz_mb = os.stat(image_file.name).st_size / units.Mi
    mbps = (fsz_mb / duration)
    msg = ("Image fetch details: dest %(dest)s, size %(sz).2f MB, "
           "duration %(duration).2f sec")
    LOG.debug(msg, {"dest": image_file.name,
                    "sz": fsz_mb,
                    "duration": duration})
    msg = "Image download %(sz).2f MB at %(mbps).2f MB/s"
    LOG.info(msg, {"sz": fsz_mb, "mbps": mbps})
Beispiel #44
0
def clear_volume(volume_size, volume_path, volume_clear=None,
                 volume_clear_size=None, volume_clear_ionice=None,
                 throttle=None):
    """Unprovision old volumes to prevent data leaking between users."""
    if volume_clear is None:
        volume_clear = CONF.volume_clear

    if volume_clear_size is None:
        volume_clear_size = CONF.volume_clear_size

    if volume_clear_size == 0:
        volume_clear_size = volume_size

    if volume_clear_ionice is None:
        volume_clear_ionice = CONF.volume_clear_ionice

    LOG.info(_LI("Performing secure delete on volume: %s") % volume_path)

    if volume_clear == 'zero':
        return copy_volume('/dev/zero', volume_path, volume_clear_size,
                           CONF.volume_dd_blocksize,
                           sync=True, execute=utils.execute,
                           ionice=volume_clear_ionice,
                           throttle=throttle)
    elif volume_clear == 'shred':
        clear_cmd = ['shred', '-n3']
        if volume_clear_size:
            clear_cmd.append('-s%dMiB' % volume_clear_size)
    else:
        raise exception.InvalidConfigurationValue(
            option='volume_clear',
            value=volume_clear)

    clear_cmd.append(volume_path)
    start_time = timeutils.utcnow()
    utils.execute(*clear_cmd, run_as_root=True)
    duration = timeutils.delta_seconds(start_time, timeutils.utcnow())

    # NOTE(jdg): use a default of 1, mostly for unit test, but in
    # some incredible event this is 0 (cirros image?) don't barf
    if duration < 1:
        duration = 1
    LOG.info(_LI('Elapsed time for clear volume: %.2f sec') % duration)
Beispiel #45
0
    def _check_cluster_delete(self, cluster_id):
        self.client.clusters.delete(cluster_id)

        # check that cluster moved to deleting state
        cluster = self.client.clusters.get(cluster_id)
        self.assertEqual('Deleting', cluster.status)

        timeout = TEMPEST_CONF.data_processing.cluster_timeout
        s_time = timeutils.utcnow()
        while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
            try:
                self.client.clusters.get(cluster_id)
            except sab.APIException:
                # cluster is deleted
                return
            time.sleep(TEMPEST_CONF.data_processing.request_timeout)

        raise exceptions.TimeoutException('Cluster failed to terminate'
                                          'in %d seconds.' % timeout)
Beispiel #46
0
    def handle_sample(self, s):
        """Handle a sample, converting if necessary."""
        # LOG.debug('handling sample %s', s)
        key = s.name + s.resource_id
        prev = self.cache.get(key)
        timestamp = timeutils.parse_isotime(s.timestamp)
        self.cache[key] = (s.volume, timestamp, s.monotonic_time)

        if prev:
            prev_volume = prev[0]
            prev_timestamp = prev[1]
            prev_monotonic_time = prev[2]
            if (prev_monotonic_time is not None and
                    s.monotonic_time is not None):
                # NOTE(sileht): Prefer high precision timer
                time_delta = s.monotonic_time - prev_monotonic_time
            else:
                time_delta = timeutils.delta_seconds(prev_timestamp, timestamp)
            # disallow violations of the arrow of time
            if time_delta < 0:
                LOG.warning(_('dropping out of time order sample: %s'), (s,))
                # Reset the cache to the newer sample.
                self.cache[key] = prev
                return None
            # we only allow negative volume deltas for noncumulative
            # samples, whereas for cumulative we assume that a reset has
            # occurred in the interim so that the current volume gives a
            # lower bound on growth
            volume_delta = (s.volume - prev_volume
                            if (prev_volume <= s.volume or
                                s.type != sample.TYPE_CUMULATIVE)
                            else s.volume)
            rate_of_change = ((1.0 * volume_delta / time_delta)
                              if time_delta else 0.0)

            s = self._convert(s, rate_of_change)
            # LOG.debug('converted to: %s', s)
        else:
            # LOG.warning(_('dropping sample with no predecessor: %s'),
            #            (s,))
            s = None
        return s
Beispiel #47
0
def _check_decommission(cluster, instances, check_func, timeout):
    s_time = timeutils.utcnow()
    while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
        statuses = check_func(cluster)
        dec_ok = True
        for instance in instances:
            if statuses[instance.fqdn()] != 'decommissioned':
                dec_ok = False

        if dec_ok:
            return
        else:
            context.sleep(5)
    else:
        ex.DecommissionError(
            _("Cannot finish decommission of cluster %(cluster)s in "
              "%(seconds)d seconds") % {
                  "cluster": cluster,
                  "seconds": timeout
              })
Beispiel #48
0
def decommission_dn(nn, inst_to_be_deleted, survived_inst):
    with remote.get_remote(nn) as r:
        r.write_file_to('/etc/hadoop/dn.excl',
                        utils.generate_fqdn_host_names(inst_to_be_deleted))
        run.refresh_nodes(remote.get_remote(nn), "dfsadmin")
        context.sleep(3)

        timeout = config_helper.get_decommissioning_timeout(
            nn.node_group.cluster)
        s_time = timeutils.utcnow()
        all_found = False

        while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
            cmd = r.execute_command(
                "sudo su -c 'hadoop dfsadmin -report' hadoop")
            all_found = True
            datanodes_info = parse_dfs_report(cmd[1])
            for i in inst_to_be_deleted:
                for dn in datanodes_info:
                    if (dn["Name"].startswith(i.internal_ip)) and (
                            dn["Decommission Status"] != "Decommissioned"):
                        all_found = False
                        break

            if all_found:
                r.write_files_to({
                    '/etc/hadoop/dn.incl':
                    utils.generate_fqdn_host_names(survived_inst),
                    '/etc/hadoop/dn.excl':
                    "",
                })
                break
            context.sleep(3)

        if not all_found:
            ex.DecommissionError(
                _("Cannot finish decommission of cluster %(cluster)s in "
                  "%(seconds)d seconds") % {
                      "cluster": nn.node_group.cluster,
                      "seconds": timeout
                  })
    def _check_notifications(self, fake_publisher_cls):
        fake_publisher_cls.side_effect = [self.publisher, self.publisher2]

        self.srv = notification.NotificationService()
        self.srv2 = notification.NotificationService()
        with mock.patch(
                'ceilometer.coordination.PartitionCoordinator'
                '._get_members',
                return_value=['harry', 'lloyd']):
            with mock.patch('uuid.uuid4', return_value='harry'):
                self.srv.start()
            with mock.patch('uuid.uuid4', return_value='lloyd'):
                self.srv2.start()

        notifier = messaging.get_notifier(self.transport,
                                          "compute.vagrant-precise")
        payload1 = TEST_NOTICE_PAYLOAD.copy()
        payload1['instance_id'] = '0'
        notifier.info(context.RequestContext(), 'compute.instance.create.end',
                      payload1)
        payload2 = TEST_NOTICE_PAYLOAD.copy()
        payload2['instance_id'] = '1'
        notifier.info(context.RequestContext(), 'compute.instance.create.end',
                      payload2)
        self.expected_samples = 4
        start = timeutils.utcnow()
        with mock.patch('six.moves.builtins.hash', lambda x: int(x)):
            while timeutils.delta_seconds(start, timeutils.utcnow()) < 60:
                if (len(self.publisher.samples + self.publisher2.samples) >=
                        self.expected_samples):
                    break
                eventlet.sleep(0)
            self.srv.stop()
            self.srv2.stop()

        self.assertEqual(2, len(self.publisher.samples))
        self.assertEqual(2, len(self.publisher2.samples))
        self.assertEqual(
            1, len(set(s.resource_id for s in self.publisher.samples)))
        self.assertEqual(
            1, len(set(s.resource_id for s in self.publisher2.samples)))
Beispiel #50
0
    def _wait_for_resource_status(self,
                                  stack_identifier,
                                  resource_name,
                                  status,
                                  failure_pattern='^.*_FAILED$',
                                  success_on_not_found=False):
        """Waits for a Resource to reach a given status."""
        fail_regexp = re.compile(failure_pattern)
        build_timeout = self.conf.build_timeout
        build_interval = self.conf.build_interval
        res = None
        start = timeutils.utcnow()
        while timeutils.delta_seconds(start,
                                      timeutils.utcnow()) < build_timeout:
            try:
                res = self.client.resources.get(stack_identifier,
                                                resource_name)
            except heat_exceptions.HTTPNotFound:
                if success_on_not_found:
                    return
                # ignore this, as the resource may not have
                # been created yet
            else:
                if res.resource_status == status:
                    return
                wait_for_action = status.split('_')[0]
                resource_action = res.resource_status.split('_')[0]
                if (resource_action == wait_for_action
                        and fail_regexp.search(res.resource_status)):
                    raise exceptions.StackResourceBuildErrorException(
                        resource_name=res.resource_name,
                        stack_identifier=stack_identifier,
                        resource_status=res.resource_status,
                        resource_status_reason=res.resource_status_reason)
            time.sleep(build_interval)

        message = ('Resource %s from stack %s failed to reach %s status '
                   'within the required time (%s s). Current resource '
                   'status: %s.' % (resource_name, stack_identifier, status,
                                    build_timeout, res.resource_status))
        raise exceptions.TimeoutException(message)
Beispiel #51
0
    def _update_meter_stats(stat, meter):
        """Do the stats calculation on a requested time bucket in stats dict

        :param stats: dict where aggregated stats are kept
        :param index: time bucket index in stats
        :param meter: meter record as returned from HBase
        :param start_time: query start time
        :param period: length of the time bucket
        """
        vol = meter['counter_volume']
        ts = meter['timestamp']
        stat.unit = meter['counter_unit']
        stat.min = min(vol, stat.min or vol)
        stat.max = max(vol, stat.max)
        stat.sum = vol + (stat.sum or 0)
        stat.count += 1
        stat.avg = (stat.sum / float(stat.count))
        stat.duration_start = min(ts, stat.duration_start or ts)
        stat.duration_end = max(ts, stat.duration_end or ts)
        stat.duration = (timeutils.delta_seconds(stat.duration_start,
                                                 stat.duration_end))
Beispiel #52
0
    def _handle_expired_request(request):
        """Handle expired request.

        When request has expired it is removed from the requests cache and
        the `RequestTimeout` exception is set as a request result.
        """
        if request.transition_and_log_error(pr.FAILURE, logger=LOG):
            # Raise an exception (and then catch it) so we get a nice
            # traceback that the request will get instead of it getting
            # just an exception with no traceback...
            try:
                request_age = timeutils.delta_seconds(request.created_on,
                                                      timeutils.utcnow())
                raise exc.RequestTimeout(
                    "Request '%s' has expired after waiting for %0.2f"
                    " seconds for it to transition out of (%s) states" %
                    (request, request_age, ", ".join(pr.WAITING_STATES)))
            except exc.RequestTimeout:
                with misc.capture_failure() as failure:
                    LOG.debug(failure.exception_str)
                    request.set_result(failure)
Beispiel #53
0
    def _wait_for_stack_status(self,
                               stack_identifier,
                               status,
                               failure_pattern='^.*_FAILED$',
                               success_on_not_found=False):
        """
        Waits for a Stack to reach a given status.

        Note this compares the full $action_$status, e.g
        CREATE_COMPLETE, not just COMPLETE which is exposed
        via the status property of Stack in heatclient
        """
        fail_regexp = re.compile(failure_pattern)
        build_timeout = self.conf.build_timeout
        build_interval = self.conf.build_interval

        start = timeutils.utcnow()
        while timeutils.delta_seconds(start,
                                      timeutils.utcnow()) < build_timeout:
            try:
                stack = self.client.stacks.get(stack_identifier)
            except heat_exceptions.HTTPNotFound:
                if success_on_not_found:
                    return
                # ignore this, as the resource may not have
                # been created yet
            else:
                if stack.stack_status == status:
                    return
                if fail_regexp.search(stack.stack_status):
                    raise exceptions.StackBuildErrorException(
                        stack_identifier=stack_identifier,
                        stack_status=stack.stack_status,
                        stack_status_reason=stack.stack_status_reason)
            time.sleep(build_interval)

        message = ('Stack %s failed to reach %s status within '
                   'the required time (%s s).' %
                   (stack_identifier, status, build_timeout))
        raise exceptions.TimeoutException(message)
Beispiel #54
0
    def verify_message_stats(self, message):
        """Verifies the oldest & newest message stats

        :param message: oldest (or) newest message returned by
                        queue_name/stats.
        """
        expected_keys = ['age', 'created', 'href']

        response_keys = message.keys()
        response_keys = sorted(response_keys)
        self.assertEqual(expected_keys, response_keys)

        # Verify that age has valid values
        age = message['age']
        msg = 'Invalid Age {0}'.format(age)
        self.assertLessEqual(0, age, msg)
        self.assertLessEqual(age, self.limits.max_message_ttl, msg)

        # Verify that GET on href returns 200
        path = message['href']
        result = self.client.get(path)
        self.assertEqual(200, result.status_code)

        # Verify that created time falls within the last 10 minutes
        # NOTE(malini): The messages are created during the test.
        created_time = message['created']
        created_time = timeutils.normalize_time(
            timeutils.parse_isotime(created_time))
        now = timeutils.utcnow()

        delta = timeutils.delta_seconds(before=created_time, after=now)
        # NOTE(malini): The 'int()' below is a work around  for the small time
        # difference between julianday & UTC.
        # (needed to pass this test on sqlite driver)
        delta = int(delta)

        msg = ('Invalid Time Delta {0}, Created time {1}, Now {2}'.format(
            delta, created_time, now))
        self.assertLessEqual(0, delta, msg)
        self.assertLessEqual(delta, 6000, msg)
Beispiel #55
0
def fetch(context, image_service, image_id, path, _user_id, _project_id):
    # TODO(vish): Improve context handling and add owner and auth data
    #             when it is added to glance.  Right now there is no
    #             auth checking in glance, so we assume that access was
    #             checked before we got here.
    start_time = timeutils.utcnow()
    with fileutils.remove_path_on_error(path):
        with open(path, "wb") as image_file:
            try:
                image_service.download(context, image_id, image_file)
            except IOError as e:
                with excutils.save_and_reraise_exception():
                    if e.errno == errno.ENOSPC:
                        # TODO(eharney): Fire an async error message for this
                        LOG.error(
                            _LE("No space left in image_conversion_dir "
                                "path (%(path)s) while fetching "
                                "image %(image)s."), {
                                    'path': os.path.dirname(path),
                                    'image': image_id
                                })

    duration = timeutils.delta_seconds(start_time, timeutils.utcnow())

    # NOTE(jdg): use a default of 1, mostly for unit test, but in
    # some incredible event this is 0 (cirros image?) don't barf
    if duration < 1:
        duration = 1
    fsz_mb = os.stat(image_file.name).st_size / units.Mi
    mbps = (fsz_mb / duration)
    msg = ("Image fetch details: dest %(dest)s, size %(sz).2f MB, "
           "duration %(duration).2f sec")
    LOG.debug(msg, {
        "dest": image_file.name,
        "sz": fsz_mb,
        "duration": duration
    })
    msg = _LI("Image download %(sz).2f MB at %(mbps).2f MB/s")
    LOG.info(msg, {"sz": fsz_mb, "mbps": mbps})
    def test_multiple_agents(self, fake_publisher_cls):
        fake_publisher_cls.return_value = self.publisher

        self.srv2 = notification.NotificationService()
        with mock.patch(
                'ceilometer.coordination.PartitionCoordinator'
                '._get_members',
                return_value=['harry', 'lloyd']):
            with mock.patch('uuid.uuid4', return_value='harry'):
                self.srv.start()
            with mock.patch('uuid.uuid4', return_value='lloyd'):
                self.srv2.start()

        notifier = messaging.get_notifier(self.transport,
                                          "compute.vagrant-precise")
        payload1 = TEST_NOTICE_PAYLOAD.copy()
        payload1['instance_id'] = '0'
        notifier.info(context.RequestContext(), 'compute.instance.create.end',
                      payload1)
        payload2 = TEST_NOTICE_PAYLOAD.copy()
        payload2['instance_id'] = '1'
        notifier.info(context.RequestContext(), 'compute.instance.create.end',
                      payload2)
        self.expected_samples = 4
        self.expected_events = 2
        start = timeutils.utcnow()
        while timeutils.delta_seconds(start, timeutils.utcnow()) < 60:
            if (len(self.publisher.samples) >= self.expected_samples
                    and len(self.publisher.events) >= self.expected_events):
                break
            eventlet.sleep(0)
        self.srv.stop()
        self.srv2.stop()

        resources = set(s.resource_id for s in self.publisher.samples)
        self.assertEqual(self.expected_samples, len(self.publisher.samples))
        self.assertEqual(self.expected_events, len(self.publisher.events))
        self.assertEqual(set(['1', '0']), resources)
Beispiel #57
0
    def await_agents(self, instances):
        api = self.get_api_client(instances[0].cluster)
        timeout = 300
        LOG.debug("Waiting %(timeout)s seconds for agent connected to manager"
                  % {'timeout': timeout})
        s_time = timeutils.utcnow()
        while timeutils.delta_seconds(s_time, timeutils.utcnow()) < timeout:
            hostnames = [i.fqdn() for i in instances]
            hostnames_to_manager = [h.hostname for h in
                                    api.get_all_hosts('full')]
            is_ok = True
            for hostname in hostnames:
                if hostname not in hostnames_to_manager:
                    is_ok = False
                    break

            if not is_ok:
                context.sleep(5)
            else:
                break
        else:
            raise ex.HadoopProvisionError(_("Cloudera agents failed to connect"
                                            " to Cloudera Manager"))
Beispiel #58
0
    def run_periodic_tasks(self, context, raise_on_error=False):
        """Tasks to be run at a periodic interval."""
        idle_for = DEFAULT_INTERVAL
        for task_name, task in self._periodic_tasks:  # pylint: disable=no-member
            full_task_name = '.'.join([self.__class__.__name__, task_name])

            now = timeutils.utcnow()
            spacing = self._periodic_spacing[task_name]  # pylint: disable=no-member
            last_run = self._periodic_last_run[task_name]  # pylint: disable=no-member

            # If a periodic task is _nearly_ due, then we'll run it early
            if spacing is not None and last_run is not None:
                due = last_run + datetime.timedelta(seconds=spacing)
                if not timeutils.is_soon(due, 0.2):
                    idle_for = min(idle_for, timeutils.delta_seconds(now, due))
                    continue

            if spacing is not None:
                idle_for = min(idle_for, spacing)

            LOG.debug(_("Running periodic task %(full_task_name)s"),
                      {"full_task_name": full_task_name})
            self._periodic_last_run[task_name] = timeutils.utcnow()  # pylint: disable=no-member

            try:
                task(self, context)
            except Exception as e:
                if raise_on_error:
                    raise
                LOG.exception(_("Error during %(full_task_name)s: %(e)s"), {
                    "full_task_name": full_task_name,
                    "e": e
                })
            time.sleep(0)

        return idle_for
Beispiel #59
0
def fetch(context, image_service, image_id, path, _user_id, _project_id):
    # TODO(vish): Improve context handling and add owner and auth data
    #             when it is added to glance.  Right now there is no
    #             auth checking in glance, so we assume that access was
    #             checked before we got here.
    start_time = timeutils.utcnow()
    with fileutils.remove_path_on_error(path):
        with open(path, "wb") as image_file:
            image_service.download(context, image_id, image_file)
    duration = timeutils.delta_seconds(start_time, timeutils.utcnow())

    # NOTE(jdg): use a default of 1, mostly for unit test, but in
    # some incredible event this is 0 (cirros image?) don't barf
    if duration < 1:
        duration = 1
    fsz_mb = os.stat(image_file.name).st_size / units.Mi
    mbps = (fsz_mb / duration)
    msg = ("Image fetch details: dest %(dest)s, size %(sz).2f MB, "
           "duration %(duration).2f sec")
    LOG.debug(msg, {"dest": image_file.name,
                    "sz": fsz_mb,
                    "duration": duration})
    msg = _LI("Image download %(sz).2f MB at %(mbps).2f MB/s")
    LOG.info(msg, {"sz": fsz_mb, "mbps": mbps})
Beispiel #60
0
    def discover(self, manager, param=None):
        """Discover resources to monitor."""
        secs_from_last_update = 0
        if self.last_run:
            secs_from_last_update = timeutils.delta_seconds(
                self.last_run, timeutils.utcnow(True))

        instances = []
        # NOTE(ityaptin) we update make a nova request only if
        # it's a first discovery or resources expired
        if not self.last_run or secs_from_last_update >= self.expiration_time:
            try:
                utc_now = timeutils.utcnow(True)
                since = self.last_run.isoformat() if self.last_run else None
                instances = self.nova_cli.instance_get_all_by_host(
                    cfg.CONF.host, since)
                self.last_run = utc_now
            except Exception:
                # NOTE(zqfan): instance_get_all_by_host is wrapped and will log
                # exception when there is any error. It is no need to raise it
                # again and print one more time.
                # NOTE(idegtiarov) it could happen in deployment process
                # that nova-client is initialized with incorrect endpoint
                # parameters, to avoid using broken client we initialized
                # a new one.
                self.nova_cli = nova_client.Client()
                return []

        for instance in instances:
            if getattr(instance, 'OS-EXT-STS:vm_state',
                       None) in ['deleted', 'error']:
                self.instances.pop(instance.id, None)
            else:
                self.instances[instance.id] = instance

        return self.instances.values()