Example #1
0
    def _receive_blocking(self, timeout_in_secs=5):
        """
        Receive a message from the TCP connection (blocking)
        """
        start_ms = timers.get_monotonic_timestamp_in_ms()

        while self._socket is not None:
            read_objs = [self._socket.fileno()]
            try:
                readable, writeable, in_error \
                    = select.select(read_objs, [], [], timeout_in_secs)

                for selobj in readable:
                    if selobj == self._socket.fileno():
                        msg = self._receive_non_blocking()
                        if msg is not None:
                            return msg

            except (OSError, socket.error, select.error) as e:
                if errno.EINTR != e.args[0]:
                    pass

            now_ms = timers.get_monotonic_timestamp_in_ms()
            secs_expired = (now_ms - start_ms) / 1000
            if timeout_in_secs <= secs_expired:
                DLOG.info("Timed out waiting for a message.")
                break
            else:
                timeout_in_secs -= secs_expired

        return None
Example #2
0
 def auto_commit(self):
     timer_id = (yield)
     if timer_id == self._commit_timer_id:
         start_ms = timers.get_monotonic_timestamp_in_ms()
         self._session.commit()
         elapsed_ms = timers.get_monotonic_timestamp_in_ms() - start_ms
         histogram.add_histogram_data("database-commits (periodic)",
                                      elapsed_ms / 100, "decisecond")
         self._commit_timer_id = None
Example #3
0
 def commit(self):
     if self._commit_inline:
         start_ms = timers.get_monotonic_timestamp_in_ms()
         self._session.commit()
         elapsed_ms = timers.get_monotonic_timestamp_in_ms() - start_ms
         histogram.add_histogram_data("database-commits (inline)",
                                      elapsed_ms / 100, "decisecond")
     else:
         if self._commit_timer_id is None:
             self._commit_timer_id \
                 = timers.timers_create_timer('db-auto-commit', 1, 1,
                                              self.auto_commit)
Example #4
0
    def handle_event(self, host, event, event_data=None):
        """
        Handle event while in the deleting state
        """
        if HOST_EVENT.DELETE == event:
            if not host.task.inprogress():
                host.task = DeleteHostTask(host)
                host.task.start()

            elif host.task.is_failed() or host.task.timed_out():
                host.task.start()

        elif HOST_EVENT.TASK_COMPLETED == event:
            return HOST_STATE.DELETED

        elif HOST_EVENT.TASK_FAILED == event:
            DLOG.info("Delete failed for %s." % host.name)
            return HOST_STATE.DELETING_FAILED

        elif HOST_EVENT.AUDIT == event:
            if config.section_exists('host-configuration'):
                section = config.CONF['host-configuration']
                max_wait = int(section.get('max_host_deleting_wait_in_secs',
                                           60))
            else:
                max_wait = 60

            if not host.fsm_start_time:
                host.fsm_start_time = timers.get_monotonic_timestamp_in_ms()

            now_ms = timers.get_monotonic_timestamp_in_ms()
            secs_expired = (now_ms - host.fsm_start_time) / 1000

            if max_wait > secs_expired:
                if not host.task.inprogress():
                    host.task = DeleteHostTask(host)
                    host.task.start()
                elif host.task.is_failed() or host.task.timed_out():
                    host.task.start()
            else:
                DLOG.info("Timed out waiting for delete completion of %s."
                          % host.name)
                return HOST_STATE.CONFIGURE

        elif HOST_EVENT.TASK_TIMEOUT == event:
            DLOG.info("Delete timed out for %s." % host.name)

        else:
            DLOG.verbose("Ignoring %s event for %s." % (event, host.name))

        return self.name
Example #5
0
def _task_coroutine_with_timer(future, arg1, callback):
    assert (arg1 == 'arg1')
    timer_id = future.timer('timer-test', 2)
    start_ms = timers.get_monotonic_timestamp_in_ms()
    future.result = (yield)
    end_ms = timers.get_monotonic_timestamp_in_ms()
    if future.result.is_complete():
        if future.result.is_timer:
            if future.result.data == timer_id:
                elapsed_secs = (end_ms - start_ms) / 1000
                if 2 < elapsed_secs:
                    callback.send("FUNCTION PASSED")
                    return
    callback.send(None)
Example #6
0
    def _state_change_callback(self, prev_state, state, event):
        """
        Host state change callback
        """
        from nfv_vim import directors

        DLOG.info(
            "Host %s FSM State-Change: prev_state=%s, state=%s, event=%s." %
            (self.name, prev_state, state, event))

        self._elapsed_time_in_state = 0
        self._last_state_timestamp = timers.get_monotonic_timestamp_in_ms()

        if self.is_locking() and host_fsm.HOST_STATE.DISABLED == self.state:
            if nfvi.objects.v1.HOST_ADMIN_STATE.LOCKED \
                    == self.nfvi_host.admin_state:
                self._action = self._ACTION_NONE

        if self.is_unlocking():
            if nfvi.objects.v1.HOST_ADMIN_STATE.UNLOCKED \
                    == self.nfvi_host.admin_state:
                self._action = self._ACTION_NONE

        self._persist()

        host_director = directors.get_host_director()
        host_director.host_state_change_notify(self)
Example #7
0
 def stall_elapsed_secs(self):
     """
     Returns the elapsed time in seconds that the thread has been stalled
     """
     if self._stall_timestamp_ms is not None:
         now = timers.get_monotonic_timestamp_in_ms()
         return int((now - self._stall_timestamp_ms) / 1000)
     return 0
Example #8
0
 def enter(self, instance):
     """
     Entering cold migrate state
     """
     DLOG.info("Entering state (%s) for %s." % (self.name, instance.name))
     instance.action_fsm.start_time = timers.get_monotonic_timestamp_in_ms()
     instance.action_fsm.wait_time = 0
     instance.action_fsm.from_host_name = instance.host_name
     instance.task = ColdMigrateTask(instance)
     instance.task.start()
def _audit_dump_debug_info(do_dump=True):
    """
    Dump Audit Debug Information
    """
    global _audit_debug_dump_back_off_ms, _last_audit_debug_dump_ms

    elapsed_ms = timers.get_monotonic_timestamp_in_ms() - _last_audit_debug_dump_ms

    if do_dump:
        if 30000 + _audit_debug_dump_back_off_ms <= elapsed_ms:
            histogram.display_histogram_data(pretty_format=False)
            _last_audit_debug_dump_ms = timers.get_monotonic_timestamp_in_ms()
            _audit_debug_dump_back_off_ms += 20000
            if 600000 < _audit_debug_dump_back_off_ms:
                _audit_debug_dump_back_off_ms = 600000
    else:
        _audit_debug_dump_back_off_ms -= 20000
        if 0 > _audit_debug_dump_back_off_ms:
            _audit_debug_dump_back_off_ms = 0
Example #10
0
    def enter(self, host):
        """
        Entering deleting state
        """
        DLOG.info("Entering state (%s) for %s." % (self.name, host.name))
        host.fsm_start_time = timers.get_monotonic_timestamp_in_ms()

        host.clear_reason()
        host.task = DeleteHostTask(host)
        host.task.start()
Example #11
0
    def elapsed_time_in_state(self):
        """
        Returns the elapsed time this host has been in the current state
        """
        elapsed_time_in_state = self._elapsed_time_in_state

        if 0 != self._last_state_timestamp:
            now_ms = timers.get_monotonic_timestamp_in_ms()
            secs_expired = (now_ms - self._last_state_timestamp) / 1000
            elapsed_time_in_state += int(secs_expired)

        return elapsed_time_in_state
Example #12
0
    def __init__(self,
                 nfvi_host,
                 initial_state=None,
                 action=None,
                 elapsed_time_in_state=0,
                 upgrade_inprogress=False,
                 recover_instances=True,
                 host_services_locked=False):
        super(Host, self).__init__('1.0.0')

        if initial_state is None:
            initial_state = host_fsm.HOST_STATE.INITIAL

        if action is None:
            action = self._ACTION_NONE

        self._elapsed_time_in_state = int(elapsed_time_in_state)
        self._task = state_machine.StateTask('EmptyTask', list())
        self._action = action
        self._reason = ''
        self._upgrade_inprogress = upgrade_inprogress
        self._recover_instances = recover_instances
        self._host_services_locked = host_services_locked
        self._nfvi_host = nfvi_host
        self._fsm = host_fsm.HostStateMachine(self, initial_state)
        self._fsm.register_state_change_callback(self._state_change_callback)
        self._last_state_timestamp = timers.get_monotonic_timestamp_in_ms()
        self._fail_notification_required = False
        self._fsm_start_time = None
        self._host_service_state = dict()

        if self.host_service_configured(HOST_SERVICES.COMPUTE):
            self._host_service_state[HOST_SERVICES.COMPUTE] = \
                HOST_SERVICE_STATE.ENABLED if self.is_enabled() else \
                HOST_SERVICE_STATE.DISABLED
        if self.host_service_configured(HOST_SERVICES.NETWORK):
            self._host_service_state[HOST_SERVICES.NETWORK] = \
                HOST_SERVICE_STATE.ENABLED if self.is_enabled() else \
                HOST_SERVICE_STATE.DISABLED
        if self.host_service_configured(HOST_SERVICES.GUEST):
            self._host_service_state[HOST_SERVICES.GUEST] = \
                HOST_SERVICE_STATE.ENABLED if self.is_enabled() else \
                HOST_SERVICE_STATE.DISABLED
        if self.host_service_configured(HOST_SERVICES.CONTAINER):
            self._host_service_state[HOST_SERVICES.CONTAINER] = \
                HOST_SERVICE_STATE.ENABLED if self.is_enabled() else \
                HOST_SERVICE_STATE.DISABLED

        self._alarms = list()
        self._events = list()
Example #13
0
def _dor_timer():
    """
    DOR timer
    """
    global _alarm_data
    global _dor_stabilized, _dor_completed
    global _system_state_get_inprogress

    while not _dor_completed:
        (yield)

        if _dor_completed:
            break

        if os.path.exists(NFV_VIM_DOR_COMPLETE_FILE):
            _dor_stabilized = True
            _dor_completed = True
            if _alarm_data is not None:
                alarm.clear_general_alarm(_alarm_data)
                event_log.issue_general_log(
                    event_log.EVENT_ID.MULTI_NODE_RECOVERY_MODE_EXIT)
                _alarm_data = None
            DLOG.info("DOR completed.")
            break

        if local_uptime_in_secs() > _dor_complete_uptime:
            open(NFV_VIM_DOR_COMPLETE_FILE, 'w').close()
            _dor_stabilized = True
            _dor_completed = True
            if _alarm_data is not None:
                alarm.clear_general_alarm(_alarm_data)
                event_log.issue_general_log(
                    event_log.EVENT_ID.MULTI_NODE_RECOVERY_MODE_EXIT)
                _alarm_data = None
            DLOG.info("DOR completed.")
            break

        now_ms = timers.get_monotonic_timestamp_in_ms()
        elapsed_secs = (now_ms - _process_start_timestamp_ms) / 1000

        if not _dor_stabilized and elapsed_secs > _dor_stabilize_uptime:
            _dor_stabilized = True
            DLOG.info("DOR stabilized.")

        if not (_system_state_get_inprogress or _system_state_gathered):
            nfvi.nfvi_get_system_state(_system_state_query_callback())
            _system_state_get_inprogress = True
Example #14
0
    def get_task_work_result(self):
        """
        Returns the result of task work completed
        """
        result = self._worker.get_result()

        if hasattr(result.ancillary_result_data, 'execution_time'):
            histogram.add_histogram_data(
                result.name + ' [worker-execution-time]',
                result.ancillary_result_data.execution_time, 'secs')

        now_ms = timers.get_monotonic_timestamp_in_ms()
        elapsed_secs = (now_ms - result.create_timestamp_ms) / 1000
        histogram.add_histogram_data(result.name + ' [execution-time]',
                                     elapsed_secs, 'secs')

        return result
Example #15
0
    def __init__(self, timeout, target, *args, **kwargs):
        """
        Create task work
        """
        self._id = TaskWork._id
        self._name = target.__name__
        self._task_id = None
        self._target = target
        self._timeout_in_secs = timeout
        self._args = list(args)
        self._kwargs = dict(kwargs)
        self._result = None
        self._ancillary_result_data = None
        self._create_timestamp_ms = timers.get_monotonic_timestamp_in_ms()

        DLOG.debug("TaskWork created, id=%s, name=%s, timeout_in_secs=%i." %
                   (self._id, self._name, self._timeout_in_secs))
        TaskWork._id += 1
Example #16
0
    def do_check(self):
        """
        Check the Thread for progress
        """
        while True:
            (yield)
            if self._last_marker_value is not None:
                if self._last_marker_value == self._progress_marker.value:
                    if self._stall_timestamp_ms is None:
                        self._stall_timestamp_ms = \
                            timers.get_monotonic_timestamp_in_ms()

                    DLOG.error("Thread %s stalled, progress_marker=%s, "
                               "elapsed_secs=%s." %
                               (self._name, self._progress_marker.value,
                                self.stall_elapsed_secs))
                else:
                    self._stall_timestamp_ms = None

            self._last_marker_value = self._progress_marker.value
Example #17
0
def selobj_dispatch(timeout_in_ms):
    """
    Dispatch selection objects that have become readable or writeable
    within the given time period
    """
    from nfv_common import histogram
    from nfv_common import timers

    global _read_callbacks, _write_callbacks, _error_callbacks

    read_objs = _read_callbacks.keys()
    write_objs = _write_callbacks.keys()

    try:
        readable, writeable, in_error = select.select(read_objs, write_objs,
                                                      [],
                                                      timeout_in_ms / 1000.0)

        for selobj in readable:
            callback = _read_callbacks.get(selobj, None)
            if callback is not None:
                start_ms = timers.get_monotonic_timestamp_in_ms()
                try:
                    callback.send(selobj)
                except StopIteration:
                    _read_callbacks.pop(selobj)
                elapsed_ms = timers.get_monotonic_timestamp_in_ms() - start_ms
                histogram.add_histogram_data(
                    "selobj read: " + callback.__name__, elapsed_ms / 100,
                    "decisecond")

        for selobj in writeable:
            callback = _write_callbacks.get(selobj, None)
            if callback is not None:
                start_ms = timers.get_monotonic_timestamp_in_ms()
                try:
                    callback.send(selobj)
                except StopIteration:
                    _write_callbacks.pop(selobj)
                elapsed_ms = timers.get_monotonic_timestamp_in_ms() - start_ms
                histogram.add_histogram_data(
                    "selobj write: " + callback.__name__, elapsed_ms / 100,
                    "decisecond")

        for selobj in in_error:
            callback = _error_callbacks.get(selobj, None)
            if callback is not None:
                start_ms = timers.get_monotonic_timestamp_in_ms()
                try:
                    callback.send(selobj)
                except StopIteration:
                    _error_callbacks.pop(selobj)
                elapsed_ms = timers.get_monotonic_timestamp_in_ms() - start_ms
                histogram.add_histogram_data(
                    "selobj error: " + callback.__name__, elapsed_ms / 100,
                    "decisecond")

            if selobj in _read_callbacks.keys():
                _read_callbacks.pop(selobj)

            if selobj in _write_callbacks.keys():
                _write_callbacks.pop(selobj)

    except (OSError, socket.error, select.error) as e:
        if errno.EINTR == e.args[0]:
            pass
Example #18
0
    def handle_event(self, instance, event, event_data=None):
        """
        Handle event while in the cold migrate state
        """
        from nfv_vim import directors
        instance_director = directors.get_instance_director()

        if event_data is not None:
            reason = event_data.get('reason', '')
        else:
            reason = ''

        if instance.task.inprogress():
            if instance.task.handle_event(event, event_data):
                return self.name

        if INSTANCE_EVENT.TASK_STOP == event:
            return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.NFVI_RESIZED == event:
            from_host_name = instance.action_fsm.from_host_name
            instance_director.instance_migrate_complete(
                instance, from_host_name)
            return INSTANCE_STATE.COLD_MIGRATE_CONFIRM

        elif INSTANCE_EVENT.TASK_COMPLETED == event:
            if instance.action_fsm is not None:
                action_data = instance.action_fsm_data
                if action_data is not None:
                    if action_data.initiated_from_cli():
                        DLOG.debug("Cold-Migrate complete for %s, initiated "
                                   "from cli." % instance.name)
                        return INSTANCE_STATE.INITIAL

            DLOG.debug("Cold-Migrate inprogress for %s." % instance.name)

        elif INSTANCE_EVENT.TASK_FAILED == event:
            DLOG.info("Cold-Migrate failed for %s." % instance.name)
            instance.fail_action(instance.action_fsm_action_type, reason)
            from_host_name = instance.action_fsm.from_host_name
            instance_director.instance_migrate_complete(instance,
                                                        from_host_name,
                                                        failed=True)
            return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.TASK_TIMEOUT == event:
            DLOG.info("Cold-Migrate timed out for %s." % instance.name)

        elif INSTANCE_EVENT.AUDIT == event:
            if not (instance.task.inprogress() or instance.is_resizing()):
                if 0 == instance.action_fsm.wait_time:
                    instance.action_fsm.wait_time \
                        = timers.get_monotonic_timestamp_in_ms()

                now_ms = timers.get_monotonic_timestamp_in_ms()
                secs_expired = (now_ms - instance.action_fsm.wait_time) / 1000
                if 60 <= secs_expired:
                    instance.fail_action(instance.action_fsm_action_type,
                                         'timeout')
                    instance_director.instance_evacuate_complete(
                        instance,
                        instance.action_fsm.from_host_name,
                        failed=False,
                        timed_out=True)
                    return INSTANCE_STATE.INITIAL

            else:
                now_ms = timers.get_monotonic_timestamp_in_ms()
                secs_expired = (now_ms - instance.action_fsm.start_time) / 1000
                if instance.max_cold_migrate_wait_in_secs <= secs_expired:
                    instance.fail_action(instance.action_fsm_action_type,
                                         'timeout')
                    instance_director.instance_migrate_complete(
                        instance,
                        instance.action_fsm.from_host_name,
                        failed=False,
                        timed_out=True)
                    return INSTANCE_STATE.INITIAL

                elif instance.task.timed_out():
                    instance.fail_action(instance.action_fsm_action_type,
                                         'timeout')
                    instance_director.instance_migrate_complete(
                        instance,
                        instance.action_fsm.from_host_name,
                        failed=False,
                        timed_out=True)
                    return INSTANCE_STATE.INITIAL

        else:
            DLOG.verbose("Ignoring %s event for %s." % (event, instance.name))

        return self.name
Example #19
0
def _rest_api_request(token_id,
                      method,
                      api_cmd,
                      api_cmd_headers=None,
                      api_cmd_payload=None):
    """
    Internal: make a rest-api request
    """
    headers_per_hop = [
        'connection', 'keep-alive', 'proxy-authenticate',
        'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade'
    ]

    start_ms = timers.get_monotonic_timestamp_in_ms()

    try:
        request_info = urllib.request.Request(api_cmd)
        request_info.get_method = lambda: method
        request_info.add_header("X-Auth-Token", token_id)
        request_info.add_header("Accept", "application/json")

        if api_cmd_headers is not None:
            for header_type, header_value in api_cmd_headers.items():
                request_info.add_header(header_type, header_value)

        if api_cmd_payload is not None:
            request_info.add_data(api_cmd_payload)

        DLOG.verbose("Rest-API method=%s, api_cmd=%s, api_cmd_headers=%s, "
                     "api_cmd_payload=%s" %
                     (method, api_cmd, api_cmd_headers, api_cmd_payload))

        # Enable Debug
        # handler = urllib.request.HTTPHandler(debuglevel=1)
        # opener = urllib.request.build_opener(handler)
        # urllib.request.install_opener(opener)

        request = urllib.request.urlopen(request_info)

        headers = list()  # list of tuples
        for key, value in request.info().items():
            if key not in headers_per_hop:
                cap_key = '-'.join((ck.capitalize() for ck in key.split('-')))
                headers.append((cap_key, value))

        response_raw = request.read()
        if response_raw == "":
            response = dict()
        else:
            response = json.loads(response_raw)

        request.close()

        now_ms = timers.get_monotonic_timestamp_in_ms()
        elapsed_ms = now_ms - start_ms
        elapsed_secs = elapsed_ms / 1000

        DLOG.verbose("Rest-API code=%s, headers=%s, response=%s" %
                     (request.code, headers, response))

        log_info(
            "Rest-API status=%s, %s, %s, hdrs=%s, payload=%s, elapsed_ms=%s" %
            (request.code, method, api_cmd, api_cmd_headers, api_cmd_payload,
             int(elapsed_ms)))

        return Result(
            response,
            Object(status_code=request.code,
                   headers=headers,
                   response=response_raw,
                   execution_time=elapsed_secs))

    except urllib.error.HTTPError as e:
        headers = list()
        response_raw = dict()

        if e.fp is not None:
            headers = list()  # list of tuples
            for key, value in e.fp.info().items():
                if key not in headers_per_hop:
                    cap_key = '-'.join(
                        (ck.capitalize() for ck in key.split('-')))
                    headers.append((cap_key, value))

            response_raw = e.fp.read()

        now_ms = timers.get_monotonic_timestamp_in_ms()
        elapsed_ms = now_ms - start_ms

        log_error(
            "Rest-API status=%s, %s, %s, hdrs=%s, payload=%s, elapsed_ms=%s" %
            (e.code, method, api_cmd, api_cmd_headers, api_cmd_payload,
             int(elapsed_ms)))

        if httplib.FOUND == e.code:
            return Result(
                response_raw,
                Object(status_code=e.code,
                       headers=headers,
                       response=response_raw))

        # Attempt to get the reason for the http error from the response
        reason = ''
        for header, value in headers:
            if 'Content-Type' == header:
                if 'application/json' == value.split(';')[0]:
                    try:
                        response = json.loads(response_raw)

                        compute_fault = response.get('computeFault', None)
                        if compute_fault is not None:
                            message = compute_fault.get('message', None)
                            if message is not None:
                                reason = str(message.lower().rstrip('.'))

                        if not reason:
                            bad_request = response.get('badRequest', None)
                            if bad_request is not None:
                                message = bad_request.get('message', None)
                                if message is not None:
                                    reason = str(message.lower().rstrip('.'))

                        if not reason:
                            error_message = response.get('error_message', None)
                            if error_message is not None:
                                error_message = json.loads(error_message)
                                message = error_message.get(
                                    'faultstring', None)
                                if message is not None:
                                    reason = str(message.lower().rstrip('.'))

                    except ValueError:
                        pass

        raise OpenStackRestAPIException(method, api_cmd, api_cmd_headers,
                                        api_cmd_payload, e.code, str(e),
                                        str(e), headers, response_raw, reason)

    except urllib.error.URLError as e:
        now_ms = timers.get_monotonic_timestamp_in_ms()
        elapsed_ms = now_ms - start_ms

        log_error(
            "Rest-API status=ERR, %s, %s, hdrs=%s, payload=%s, elapsed_ms=%s" %
            (method, api_cmd, api_cmd_headers, api_cmd_payload,
             int(elapsed_ms)))

        raise OpenStackException(method, api_cmd, api_cmd_headers,
                                 api_cmd_payload, str(e), str(e))
Example #20
0
from nfv_vim import tables

DLOG = debug.debug_get_logger('nfv_vim.dor')

_alarm_data = None
_minimum_hosts = 0
_dor_stabilized = False
_dor_completed = False
_dor_process_uptime = 0
_dor_stabilize_uptime = 0
_dor_complete_uptime = 0
_dor_complete_percentage = 0
_system_state_get_inprogress = False
_system_state_gathered = False

_process_start_timestamp_ms = timers.get_monotonic_timestamp_in_ms()

NFV_VIM_DOR_COMPLETE_FILE = '/var/run/.nfv-vim.dor_complete'


@coroutine
def _system_state_query_callback():
    """
    System state query callback
    """
    global _alarm_data
    global _minimum_hosts, _dor_stabilized, _dor_completed
    global _dor_complete_percentage
    global _system_state_get_inprogress, _system_state_gathered

    response = (yield)
Example #21
0
    def handle_event(self, instance, event, event_data=None):
        """
        Handle event while in the evacuate state
        """
        from nfv_vim import directors
        instance_director = directors.get_instance_director()

        if event_data is not None:
            reason = event_data.get('reason', '')
        else:
            reason = ''

        if instance.task.inprogress():
            if instance.task.handle_event(event, event_data):
                return self.name

        if INSTANCE_EVENT.TASK_STOP == event:
            return INSTANCE_STATE.INITIAL

        elif event in [INSTANCE_EVENT.NFVI_ENABLED, INSTANCE_EVENT.NFVI_DISABLED,
                       INSTANCE_EVENT.NFVI_HOST_CHANGED]:
            if instance.action_fsm.from_host_name != instance.host_name and \
                    not instance.is_rebuilding():
                instance_director.instance_evacuate_complete(
                    instance, instance.action_fsm.from_host_name)
                return INSTANCE_STATE.INITIAL
            elif INSTANCE_EVENT.NFVI_DISABLED == event:
                if instance.is_rebuilding():
                    if not instance._evacuate_started:
                        DLOG.info("Evacuate starting for %s." % instance.name)
                        # Evacuate has started
                        instance._evacuate_started = True
                elif instance._evacuate_started and \
                        instance.action_fsm.from_host_name == instance.host_name:
                    DLOG.info("Evacuate no longer in progress for %s." %
                              instance.name)
                    # Evacuate was in progress once, but is no longer and
                    # the host has not changed. Nova does this (for example) if
                    # it fails to schedule a destination host for the evacuate.
                    # Look at me - I'm evacuating. Oh - guess I decided not to.
                    # Stupid nova.
                    # Tell the instance director that the evacuate failed so it
                    # can update any host operation that may be in progress.
                    instance_director.instance_evacuate_complete(
                        instance, instance.action_fsm.from_host_name,
                        failed=True)
                    return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.TASK_COMPLETED == event:
            DLOG.debug("Evacuate inprogress for %s." % instance.name)

        elif INSTANCE_EVENT.TASK_FAILED == event:
            DLOG.info("Evacuate failed for %s." % instance.name)
            instance.fail_action(instance.action_fsm_action_type, reason)
            instance_director.instance_evacuate_complete(
                instance, instance.action_fsm.from_host_name, failed=True)
            return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.TASK_TIMEOUT == event:
            DLOG.info("Evacuate timed out for %s." % instance.name)

        elif INSTANCE_EVENT.AUDIT == event:
            if instance.action_fsm.from_host_name != instance.host_name and \
                    not instance.is_rebuilding():
                instance_director.instance_evacuate_complete(
                    instance, instance.action_fsm.from_host_name)
                return INSTANCE_STATE.INITIAL

            elif not (instance.task.inprogress() or instance.is_rebuilding()):
                if 0 == instance.action_fsm.wait_time:
                    instance.action_fsm.wait_time \
                        = timers.get_monotonic_timestamp_in_ms()

                now_ms = timers.get_monotonic_timestamp_in_ms()
                secs_expired = (now_ms - instance.action_fsm.wait_time) / 1000
                if 120 <= secs_expired:
                    instance.fail_action(instance.action_fsm_action_type, 'timeout')
                    instance_director.instance_evacuate_complete(
                        instance, instance.action_fsm.from_host_name,
                        failed=False, timed_out=True)
                    return INSTANCE_STATE.INITIAL

            else:
                now_ms = timers.get_monotonic_timestamp_in_ms()
                secs_expired = (now_ms - instance.action_fsm.start_time) / 1000
                if instance.max_evacuate_wait_in_secs <= secs_expired:
                    instance.fail_action(instance.action_fsm_action_type, 'timeout')
                    instance_director.instance_evacuate_complete(
                        instance, instance.action_fsm.from_host_name,
                        failed=False, timed_out=True)
                    return INSTANCE_STATE.INITIAL

                elif instance.task.timed_out():
                    instance.fail_action(instance.action_fsm_action_type, 'timeout')
                    instance_director.instance_evacuate_complete(
                        instance, instance.action_fsm.from_host_name,
                        failed=False, timed_out=True)
                    return INSTANCE_STATE.INITIAL

        else:
            DLOG.verbose("Ignoring %s event for %s." % (event, instance.name))

        return self.name
Example #22
0
    def nfvi_host_state_change(self,
                               nfvi_admin_state,
                               nfvi_oper_state,
                               nfvi_avail_status,
                               nfvi_data=None):
        """
        NFVI Host State Change
        """
        if nfvi_data is not None:
            self._nfvi_host.nfvi_data = nfvi_data
            self._persist()

        if nfvi.objects.v1.HOST_ADMIN_STATE.UNKNOWN == nfvi_admin_state:
            DLOG.info("Ignoring unknown administrative state change for %s." %
                      self._nfvi_host.name)
            return

        if nfvi.objects.v1.HOST_OPER_STATE.UNKNOWN == nfvi_oper_state:
            DLOG.info("Ignoring unknown operation state change for %s." %
                      self._nfvi_host.name)
            return

        if nfvi_admin_state != self._nfvi_host.admin_state \
                or nfvi_oper_state != self._nfvi_host.oper_state \
                or nfvi_avail_status != self._nfvi_host.avail_status:
            DLOG.debug("Host State-Change detected: nfvi_admin_state=%s "
                       "host_admin_state=%s, nfvi_oper_state=%s "
                       "host_oper_state=%s, nfvi_avail_state=%s "
                       "host_avail_status=%s, locking=%s unlocking=%s "
                       "fsm current_state=%s for %s." %
                       (nfvi_admin_state, self._nfvi_host.admin_state,
                        nfvi_oper_state, self._nfvi_host.oper_state,
                        nfvi_avail_status, self._nfvi_host.avail_status,
                        self.is_locking(), self.is_unlocking(),
                        self._fsm.current_state.name, self._nfvi_host.name))

            notify_offline = False
            if nfvi.objects.v1.HOST_AVAIL_STATUS.OFFLINE == nfvi_avail_status:
                if nfvi.objects.v1.HOST_AVAIL_STATUS.OFFLINE \
                        != self._nfvi_host.avail_status:
                    notify_offline = True

            self._nfvi_host.admin_state = nfvi_admin_state
            self._nfvi_host.oper_state = nfvi_oper_state
            self._nfvi_host.avail_status = nfvi_avail_status
            self._persist()
            self._nfvi_host_handle_state_change()

            if notify_offline:
                from nfv_vim import directors

                host_director = directors.get_host_director()
                host_director.host_offline(self)

        elif host_fsm.HOST_STATE.INITIAL == self._fsm.current_state.name:
            self._fsm.handle_event(host_fsm.HOST_EVENT.ADD)
            return

        elif host_fsm.HOST_STATE.CONFIGURE == self._fsm.current_state.name:
            self._fsm.handle_event(host_fsm.HOST_EVENT.ADD)
            return

        elif host_fsm.HOST_STATE.ENABLED == self._fsm.current_state.name \
                and nfvi.objects.v1.HOST_OPER_STATE.DISABLED == nfvi_oper_state:
            self._fsm.handle_event(host_fsm.HOST_EVENT.DISABLE)
            return

        elif host_fsm.HOST_STATE.DISABLED == self._fsm.current_state.name \
                and nfvi.objects.v1.HOST_OPER_STATE.ENABLED == nfvi_oper_state:
            self._fsm.handle_event(host_fsm.HOST_EVENT.ENABLE)
            return

        else:
            now_ms = timers.get_monotonic_timestamp_in_ms()
            secs_expired = (now_ms - self._last_state_timestamp) / 1000
            if 30 <= secs_expired:
                if 0 != self._last_state_timestamp:
                    self._elapsed_time_in_state += int(secs_expired)
                self._last_state_timestamp = now_ms
                self._persist()
                self._fsm.handle_event(host_fsm.HOST_EVENT.AUDIT)
Example #23
0
    def handle_event(self, instance, event, event_data=None):
        """
        Handle event while in the live migrate state
        """
        from nfv_vim import directors
        instance_director = directors.get_instance_director()

        if event_data is not None:
            reason = event_data.get('reason', '')
        else:
            reason = ''

        if instance.task.inprogress():
            if instance.task.handle_event(event, event_data):
                return self.name

        if INSTANCE_EVENT.TASK_STOP == event:
            return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.NFVI_HOST_CHANGED == event:
            if instance.action_fsm.from_host_name != instance.host_name:
                DLOG.info("Live-Migrate for %s from host %s to host %s." %
                          (instance.name, instance.action_fsm.from_host_name,
                           instance.host_name))

                instance_director.instance_migrate_complete(
                    instance, instance.action_fsm.from_host_name)

                guest_services = instance.guest_services
                if guest_services.are_provisioned():
                    return INSTANCE_STATE.LIVE_MIGRATE_FINISH
                else:
                    return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.LIVE_MIGRATE_ROLLBACK == event:
            DLOG.info("Live-Migrate rollback for %s." % instance.name)

            guest_services = instance.guest_services
            # Tell the instance director that the live migrate failed so it
            # can update any host operation that may be in progress.
            instance_director.instance_migrate_complete(
                instance, instance.action_fsm.from_host_name, failed=True)
            if guest_services.are_provisioned():
                return INSTANCE_STATE.LIVE_MIGRATE_FINISH
            else:
                return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.TASK_COMPLETED == event:
            DLOG.debug("Live-Migrate inprogress for %s." % instance.name)

        elif INSTANCE_EVENT.TASK_FAILED == event:
            DLOG.info("Live-Migrate failed for %s." % instance.name)
            instance.fail_action(instance.action_fsm_action_type, reason)
            instance_director.instance_migrate_complete(
                instance, instance.action_fsm.from_host_name, failed=True)
            return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.TASK_TIMEOUT == event:
            DLOG.info("Live-Migrate timed out for %s." % instance.name)

        elif INSTANCE_EVENT.NFVI_ENABLED == event:
            if instance.is_migrating():
                if not instance._live_migration_started:
                    DLOG.info("Live-Migrate starting for %s." % instance.name)
                    # Live migration has started
                    instance._live_migration_started = True
            elif instance._live_migration_started and \
                    instance.action_fsm.from_host_name == instance.host_name:
                DLOG.info("Live-Migrate no longer in progress for %s." %
                          instance.name)
                # Live migration was in progress once, but is no longer and
                # the host has not changed. Nova does this (for example) if it
                # fails to schedule a destination host for the live migration.
                # Look at me - I'm migrating. Oh - guess I decided not to.
                # Stupid nova.
                # Tell the instance director that the live migrate failed so it
                # can update any host operation that may be in progress.
                guest_services = instance.guest_services
                instance_director.instance_migrate_complete(
                    instance, instance.action_fsm.from_host_name, failed=True)
                if guest_services.are_provisioned():
                    return INSTANCE_STATE.LIVE_MIGRATE_FINISH
                else:
                    return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.AUDIT == event:
            if instance.action_fsm.from_host_name != instance.host_name:
                instance_director.instance_migrate_complete(
                    instance, instance.action_fsm.from_host_name)

                guest_services = instance.guest_services
                if guest_services.are_provisioned():
                    return INSTANCE_STATE.LIVE_MIGRATE_FINISH
                else:
                    return INSTANCE_STATE.INITIAL

            elif not (instance.task.inprogress() or instance.is_migrating()):
                if 0 == instance.action_fsm.wait_time:
                    instance.action_fsm.wait_time \
                        = timers.get_monotonic_timestamp_in_ms()

                now_ms = timers.get_monotonic_timestamp_in_ms()
                secs_expired = (now_ms - instance.action_fsm.wait_time) / 1000
                if 60 <= secs_expired:
                    instance.fail_action(instance.action_fsm_action_type,
                                         'timeout')
                    instance_director.instance_migrate_complete(
                        instance,
                        instance.action_fsm.from_host_name,
                        failed=False,
                        timed_out=True)
                    return INSTANCE_STATE.INITIAL

            else:
                now_ms = timers.get_monotonic_timestamp_in_ms()
                secs_expired = (now_ms - instance.action_fsm.start_time) / 1000
                max_live_migrate_wait_in_secs = \
                    instance.max_live_migrate_wait_in_secs
                if 0 != max_live_migrate_wait_in_secs:
                    # Add 60 seconds buffer on top of nova timeout value
                    max_wait = max_live_migrate_wait_in_secs + 60
                    if max_wait <= secs_expired:
                        instance.fail_action(instance.action_fsm_action_type,
                                             'timeout')
                        instance_director.instance_migrate_complete(
                            instance,
                            instance.action_fsm.from_host_name,
                            failed=False,
                            timed_out=True)
                        return INSTANCE_STATE.INITIAL

                elif instance.task.timed_out():
                    instance.fail_action(instance.action_fsm_action_type,
                                         'timeout')
                    instance_director.instance_migrate_complete(
                        instance,
                        instance.action_fsm.from_host_name,
                        failed=False,
                        timed_out=True)
                    return INSTANCE_STATE.INITIAL

        else:
            DLOG.verbose("Ignoring %s event for %s." % (event, instance.name))

        return self.name
Example #24
0
    def handle_event(self, instance, event, event_data=None):
        """
        Handle event while in the start state
        """
        from nfv_vim import directors
        instance_director = directors.get_instance_director()

        if event_data is not None:
            reason = event_data.get('reason', '')
        else:
            reason = ''

        if instance.task.inprogress():
            if instance.task.handle_event(event, event_data):
                return self.name

        if INSTANCE_EVENT.TASK_STOP == event:
            instance_director.instance_start_complete(instance,
                                                      instance.host_name,
                                                      failed=False,
                                                      timed_out=False,
                                                      cancelled=True)
            return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.TASK_COMPLETED == event:
            DLOG.debug("Start inprogress for %s." % instance.name)
            instance.action_fsm.wait_time = \
                timers.get_monotonic_timestamp_in_ms()

        elif INSTANCE_EVENT.TASK_FAILED == event:
            DLOG.info("Start failed for %s." % instance.name)
            instance.fail_action(instance.action_fsm_action_type, reason)
            instance_director.instance_start_complete(instance,
                                                      instance.host_name,
                                                      failed=True)
            return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.TASK_TIMEOUT == event:
            DLOG.info("Start timed out for %s." % instance.name)
            instance.fail_action(instance.action_fsm_action_type, 'timeout')
            instance_director.instance_start_complete(instance,
                                                      instance.host_name,
                                                      failed=False,
                                                      timed_out=True)
            return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.NFVI_ENABLED == event:
            instance_director.instance_start_complete(instance,
                                                      instance.host_name)
            return INSTANCE_STATE.INITIAL

        elif INSTANCE_EVENT.AUDIT == event:
            if not instance.task.inprogress():
                if instance.is_enabled():
                    instance_director.instance_start_complete(
                        instance, instance.host_name)
                    return INSTANCE_STATE.INITIAL
                else:
                    now_ms = timers.get_monotonic_timestamp_in_ms()
                    secs_expired = \
                        (now_ms - instance.action_fsm.wait_time) / 1000
                    # Only wait 60 seconds for the instance to start.
                    if 60 <= secs_expired:
                        instance.fail_action(instance.action_fsm_action_type,
                                             'timeout')
                        instance_director.instance_start_complete(
                            instance,
                            instance.host_name,
                            failed=False,
                            timed_out=True)
                        return INSTANCE_STATE.INITIAL

        else:
            DLOG.verbose("Ignoring %s event for %s." % (event, instance.name))

        return self.name
Example #25
0
def process_main():
    """
    Virtual Infrastructure Manager - Main
    """
    def _force_exit():
        os._exit(-1)

    global do_reload, dump_data_captured, reset_data_captured

    process_start_time = timers.get_monotonic_timestamp_in_ms()

    try:
        # signal.signal(signal.SIGTERM, process_signal_handler)
        signal.signal(signal.SIGINT, process_signal_handler)
        signal.signal(signal.SIGHUP, process_signal_handler)
        signal.signal(signal.SIGUSR1, process_signal_handler)
        signal.signal(signal.SIGUSR2, process_signal_handler)

        parser = argparse.ArgumentParser()
        parser.add_argument('-c', '--config', help='configuration file')
        parser.add_argument('-t', '--tox', action="store_true",
                            help='tox test environment')
        args = parser.parse_args()
        config.load(args.config)

        if args.tox:
            # Append the tox root directory to the system path to get
            # the config.ini and debug.ini files.
            debug_ini = sys.prefix + '/' + config.CONF['debug']['config_file']
            config.CONF['debug']['config_file'] = debug_ini

        init_complete = process_initialize()
        last_init_time = timers.get_monotonic_timestamp_in_ms()

        DLOG.info("Started")

        while stay_on:
            selobj.selobj_dispatch(PROCESS_TICK_INTERVAL_IN_MS)
            timers.timers_schedule()

            if not alarm.alarm_subsystem_sane():
                DLOG.error("Alarm subsystem is not sane, exiting")
                break

            if not event_log.event_log_subsystem_sane():
                DLOG.error("Event-Log subsystem is not sane, exiting")
                break

            if do_reload:
                DLOG.info("Reload signalled.")
                debug.debug_reload_config()
                DLOG.info("Reload complete.")
                do_reload = False

            if dump_data_captured:
                DLOG.info("Dump captured data signalled.")
                histogram.display_histogram_data()
                profiler.profile_memory_dump()
                DLOG.info("Dump captured data complete.")
                dump_data_captured = False

            if reset_data_captured:
                DLOG.info("Reset captured data signalled.")
                histogram.reset_histogram_data()
                profiler.profile_memory_set_reference()
                DLOG.info("Reset captured data complete.")
                reset_data_captured = False

            if not init_complete:
                # Retry initialization for up to 3 minutes.
                now_ms = timers.get_monotonic_timestamp_in_ms()
                secs_expired = (now_ms - process_start_time) / 1000
                if secs_expired < 180:
                    time_since_init = (now_ms - last_init_time) / 1000
                    # Reattempt initialization every 10 seconds.
                    if time_since_init > 10:
                        init_complete = process_reinitialize()
                        last_init_time = timers.get_monotonic_timestamp_in_ms()
                else:
                    DLOG.warn("Initialization failed - exiting.")
                    sys.exit(200)

    except KeyboardInterrupt:
        print("Keyboard Interrupt received.")

    except Exception as e:
        DLOG.exception("%s" % e)
        sys.exit(200)

    finally:
        open(PROCESS_NOT_RUNNING_FILE, 'w').close()
        # Allow up to 10 seconds for the process to shut down. If the
        # process_finalize hangs, we will do a hard exit.
        signal.signal(signal.SIGALRM, _force_exit)
        signal.alarm(10)
        process_finalize()