def put_vm_status(self, status_blob, sas_url, config_blob_type=None):
        """
        Try to upload the VM status via the host plugin /status channel
        :param sas_url: the blob SAS url to pass to the host plugin
        :param config_blob_type: the blob type from the extension config
        :type status_blob: StatusBlob
        """
        if not self.ensure_initialized():
            raise ProtocolError("HostGAPlugin: HostGAPlugin is not available")

        if status_blob is None or status_blob.vm_status is None:
            raise ProtocolError("HostGAPlugin: Status blob was not provided")

        logger.verbose("HostGAPlugin: Posting VM status")
        try:
            blob_type = status_blob.type if status_blob.type else config_blob_type

            if blob_type == "BlockBlob":
                self._put_block_blob_status(sas_url, status_blob)
            else:
                self._put_page_blob_status(sas_url, status_blob)

            if not HostPluginProtocol.is_default_channel():
                logger.info("HostGAPlugin: Setting host plugin as default channel")
                HostPluginProtocol.set_default_channel(True)
        except Exception as e:
            message = "HostGAPlugin: Exception Put VM status: {0}".format(e)
            logger.error(message)
            from azurelinuxagent.common.event import WALAEventOperation, report_event
            report_event(op=WALAEventOperation.ReportStatus,
                         is_success=False,
                         message=message)
            logger.warn("HostGAPlugin: resetting default channel")
            HostPluginProtocol.set_default_channel(False)
Exemple #2
0
 def _load_error(self):
     try:
         self.error = GuestAgentError(self.get_agent_error_file())
         self.error.load()
         logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error))
     except Exception as e:
         logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e))
Exemple #3
0
    def test_console_appender(self):
        logger.add_logger_appender(logger.AppenderType.CONSOLE,
                                   logger.LogLevel.WARNING,
                                   path=self.log_file)

        logger.verbose("test-verbose")
        with open(self.log_file) as logfile:
            logcontent = logfile.readlines()
            # Levels are honored and Verbose should not be written.
            self.assertEqual(0, len(logcontent))

        logger.info("test-info")
        with open(self.log_file) as logfile:
            logcontent = logfile.readlines()
            # Levels are honored and Info should not be written.
            self.assertEqual(0, len(logcontent))

        # As console has a mode of w, it'll always only have 1 line only.
        logger.warn("test-warn")
        with open(self.log_file) as logfile:
            logcontent = logfile.readlines()
            self.assertEqual(1, len(logcontent))
            self.assertRegex(logcontent[0], r"(.*WARNING\s\w+\s*test-warn.*)")

        logger.error("test-error")
        with open(self.log_file) as logfile:
            logcontent = logfile.readlines()
            # Levels are honored and Info, Verbose should not be written.
            self.assertEqual(1, len(logcontent))
            self.assertRegex(logcontent[0], r"(.*ERROR\s\w+\s*test-error.*)")
Exemple #4
0
    def __init__(self, path=None, pkg=None, host=None):
        self.pkg = pkg
        self.host = host
        version = None
        if path is not None:
            m = AGENT_DIR_PATTERN.match(path)
            if m == None:
                raise UpdateError(u"Illegal agent directory: {0}".format(path))
            version = m.group(1)
        elif self.pkg is not None:
            version = pkg.version

        if version == None:
            raise UpdateError(u"Illegal agent version: {0}".format(version))
        self.version = FlexibleVersion(version)

        location = u"disk" if path is not None else u"package"
        logger.verbose(u"Instantiating Agent {0} from {1}", self.name, location)

        self.error = None
        self.supported = None

        self._load_error()
        self._load_supported()

        self._ensure_downloaded()
        return
Exemple #5
0
    def send_host_plugin_heartbeat(self):
        """
        Send a health signal every HOST_PLUGIN_HEARTBEAT_PERIOD. The signal is 'Healthy' when we have been able to
        communicate with HostGAPlugin at least once in the last HOST_PLUGIN_HEALTH_PERIOD.
        """
        if self.last_host_plugin_heartbeat is None:
            self.last_host_plugin_heartbeat = datetime.datetime.utcnow() - MonitorHandler.HOST_PLUGIN_HEARTBEAT_PERIOD

        if datetime.datetime.utcnow() >= (self.last_host_plugin_heartbeat + MonitorHandler.HOST_PLUGIN_HEARTBEAT_PERIOD):
            try:
                host_plugin = self.protocol.client.get_host_plugin()
                host_plugin.ensure_initialized()
                is_currently_healthy = host_plugin.get_health()

                if is_currently_healthy:
                    self.host_plugin_errorstate.reset()
                else:
                    self.host_plugin_errorstate.incr()

                is_healthy = self.host_plugin_errorstate.is_triggered() is False
                logger.verbose("HostGAPlugin health: {0}", is_healthy)

                self.health_service.report_host_plugin_heartbeat(is_healthy)

            except Exception as e:
                msg = "Exception sending host plugin heartbeat: {0}".format(ustr(e))
                add_event(
                    name=AGENT_NAME,
                    version=CURRENT_VERSION,
                    op=WALAEventOperation.HostPluginHeartbeat,
                    is_success=False,
                    message=msg,
                    log_event=False)

            self.last_host_plugin_heartbeat = datetime.datetime.utcnow()
Exemple #6
0
def run_get_output(cmd, chk_err=True, log_cmd=True, expected_errors=[]):
    """
    Wrapper for subprocess.check_output.
    Execute 'cmd'.  Returns return code and STDOUT, trapping expected
    exceptions.
    Reports exceptions to Error if chk_err parameter is True
    """
    if log_cmd:
        logger.verbose(u"Command: [{0}]", cmd)
    try:
        output = subprocess.check_output(cmd,
                                         stderr=subprocess.STDOUT,
                                         shell=True)
        output = ustr(output,
                      encoding='utf-8',
                      errors="backslashreplace")
    except subprocess.CalledProcessError as e:
        output = ustr(e.output,
                      encoding='utf-8',
                      errors="backslashreplace")
        if chk_err:
            msg = u"Command: [{0}], " \
                  u"return code: [{1}], " \
                  u"result: [{2}]".format(cmd, e.returncode, output)
            if e.returncode in expected_errors:
                logger.info(msg)
            else:
                logger.error(msg)
        return e.returncode, output
    except Exception as e:
        if chk_err:
            logger.error(u"Command [{0}] raised unexpected exception: [{1}]"
                         .format(cmd, ustr(e)))
        return -1, ustr(e)
    return 0, output
Exemple #7
0
def run_get_output(cmd, chk_err=True, log_cmd=True):
    """
    Wrapper for subprocess.check_output.
    Execute 'cmd'.  Returns return code and STDOUT, trapping expected
    exceptions.
    Reports exceptions to Error if chk_err parameter is True
    """
    if log_cmd:
        logger.verbose(u"Run '{0}'", cmd)
    try:
        output = subprocess.check_output(cmd,
                                         stderr=subprocess.STDOUT,
                                         shell=True)
        output = ustr(output,
                      encoding='utf-8',
                      errors="backslashreplace")
    except Exception as e:
        if type(e) is subprocess.CalledProcessError:
            output = ustr(e.output,
                        encoding='utf-8',
                        errors="backslashreplace")
            if chk_err:
                if log_cmd:
                    logger.error(u"Command: '{0}'", e.cmd)
                logger.error(u"Return code: {0}", e.returncode)
                logger.error(u"Result: {0}", output)
            return e.returncode, output
        else:
            logger.error(
                u"'{0}' raised unexpected exception: '{1}'".format(
                    cmd, ustr(e)))
            return -1, ustr(e)
    return 0, output
Exemple #8
0
def validate_dhcp_resp(request, response):
    bytes_recv = len(response)
    if bytes_recv < 0xF6:
        logger.error("HandleDhcpResponse: Too few bytes received:{0}",
                     bytes_recv)
        return False

    logger.verbose("BytesReceived:{0}", hex(bytes_recv))
    logger.verbose("DHCP response:{0}", hex_dump(response, bytes_recv))

    # check transactionId, cookie, MAC address cookie should never mismatch
    # transactionId and MAC address may mismatch if we see a response
    # meant from another machine
    if not compare_bytes(request, response, 0xEC, 4):
        logger.verbose("Cookie not match:\nsend={0},\nreceive={1}",
                       hex_dump3(request, 0xEC, 4),
                       hex_dump3(response, 0xEC, 4))
        raise DhcpError("Cookie in dhcp respones doesn't match the request")

    if not compare_bytes(request, response, 4, 4):
        logger.verbose("TransactionID not match:\nsend={0},\nreceive={1}",
                       hex_dump3(request, 4, 4),
                       hex_dump3(response, 4, 4))
        raise DhcpError("TransactionID in dhcp respones "
                        "doesn't match the request")

    if not compare_bytes(request, response, 0x1C, 6):
        logger.verbose("Mac Address not match:\nsend={0},\nreceive={1}",
                       hex_dump3(request, 0x1C, 6),
                       hex_dump3(response, 0x1C, 6))
        raise DhcpError("Mac Addr in dhcp respones "
                        "doesn't match the request")
def run_get_output(cmd, chk_err=True, log_cmd=True):
    """
    Wrapper for subprocess.check_output.
    Execute 'cmd'.  Returns return code and STDOUT, trapping expected
    exceptions.
    Reports exceptions to Error if chk_err parameter is True
    """
    if log_cmd:
        logger.verbose(u"Run '{0}'", cmd)
    try:
        output = subprocess.check_output(cmd,
                                         stderr=subprocess.STDOUT,
                                         shell=True)
        output = ustr(output,
                      encoding='utf-8',
                      errors="backslashreplace")
    except subprocess.CalledProcessError as e:
        output = ustr(e.output,
                      encoding='utf-8',
                      errors="backslashreplace")
        if chk_err:
            if log_cmd:
                logger.error(u"Command: '{0}'", e.cmd)
            logger.error(u"Return code: {0}", e.returncode)
            logger.error(u"Result: {0}", output)
        return e.returncode, output
    return 0, output
Exemple #10
0
    def report_ext_handlers_status(self):
        """Go through handler_state dir, collect and report status"""
        vm_status = VMStatus(status="Ready", message="Guest Agent is running")
        if self.ext_handlers is not None:
            for ext_handler in self.ext_handlers.extHandlers:
                try:
                    self.report_ext_handler_status(vm_status, ext_handler)
                except ExtensionError as e:
                    add_event(AGENT_NAME,
                              version=CURRENT_VERSION,
                              op=WALAEventOperation.ExtensionProcessing,
                              is_success=False,
                              message=ustr(e))

        logger.verbose("Report vm agent status")
        try:
            self.protocol.report_vm_status(vm_status)
            if self.log_report:
                logger.verbose("Completed vm agent status report")
        except ProtocolError as e:
            message = "Failed to report vm agent status: {0}".format(e)
            add_event(AGENT_NAME,
                      version=CURRENT_VERSION,
                      op=WALAEventOperation.ExtensionProcessing,
                      is_success=False,
                      message=message)
Exemple #11
0
    def report_ext_handlers_status(self):
        """Go thru handler_state dir, collect and report status"""
        vm_status = VMStatus()
        vm_status.vmAgent.version = str(CURRENT_VERSION)
        vm_status.vmAgent.status = "Ready"
        vm_status.vmAgent.message = "Guest Agent is running"

        if self.ext_handlers is not None:
            for ext_handler in self.ext_handlers.extHandlers:
                try:
                    self.report_ext_handler_status(vm_status, ext_handler)
                except ExtensionError as e:
                    add_event(
                        AGENT_NAME,
                        version=CURRENT_VERSION,
                        is_success=False,
                        message=ustr(e))
        
        logger.verbose("Report vm agent status")
        try:
            self.protocol.report_vm_status(vm_status)
            if self.log_report:
                logger.verbose("Successfully reported vm agent status")
        except ProtocolError as e:
            message = "Failed to report vm agent status: {0}".format(e)
            add_event(AGENT_NAME, version=CURRENT_VERSION, is_success=False, message=message)
    def _parse_extensions_config(self, xml_text):
        xml_doc = parse_doc(xml_text)

        ga_families_list = find(xml_doc, "GAFamilies")
        ga_families = findall(ga_families_list, "GAFamily")

        for ga_family in ga_families:
            family = findtext(ga_family, "Name")
            uris_list = find(ga_family, "Uris")
            uris = findall(uris_list, "Uri")
            manifest = VMAgentManifest()
            manifest.family = family
            for uri in uris:
                manifest_uri = VMAgentManifestUri(uri=gettext(uri))
                manifest.versionsManifestUris.append(manifest_uri)
            self.vmagent_manifests.vmAgentManifests.append(manifest)

        self.__parse_plugins_and_settings_and_populate_ext_handlers(xml_doc)

        required_features_list = find(xml_doc, "RequiredFeatures")
        if required_features_list is not None:
            self._parse_required_features(required_features_list)

        self._status_upload_blob = findtext(xml_doc, "StatusUploadBlob")
        self.artifacts_profile_blob = findtext(xml_doc,
                                               "InVMArtifactsProfileBlob")

        status_upload_node = find(xml_doc, "StatusUploadBlob")
        self._status_upload_blob_type = getattrib(status_upload_node,
                                                  "statusBlobType")
        logger.verbose("Extension config shows status blob type as [{0}]",
                       self._status_upload_blob_type)

        self.in_vm_gs_metadata.parse_node(
            find(xml_doc, "InVMGoalStateMetaData"))
Exemple #13
0
    def set_cpu_limit(self, limit=None):
        """
        Limit this cgroup to a percentage of a single core. limit=10 means 10% of one core; 150 means 150%, which
        is useful only in multi-core systems.
        To limit a cgroup to utilize 10% of a single CPU, use the following commands:
            # echo 10000 > /cgroup/cpu/red/cpu.cfs_quota_us
            # echo 100000 > /cgroup/cpu/red/cpu.cfs_period_us

        :param limit:
        """
        if not CGroups.enabled():
            return

        if limit is None:
            return

        if 'cpu' in self.cgroups:
            total_units = float(self.get_parameter('cpu', 'cpu.cfs_period_us'))
            limit_units = int(
                self._convert_cpu_limit_to_fraction(limit) * total_units)
            cpu_shares_file = self._get_cgroup_file('cpu', 'cpu.cfs_quota_us')
            logger.verbose("writing {0} to {1}".format(limit_units,
                                                       cpu_shares_file))
            fileutil.write_file(cpu_shares_file, '{0}\n'.format(limit_units))
        else:
            raise CGroupsException(
                "CPU hierarchy not available in this cgroup")
Exemple #14
0
    def put_vm_status(self, status_blob, sas_url, config_blob_type=None):
        """
        Try to upload the VM status via the host plugin /status channel
        :param sas_url: the blob SAS url to pass to the host plugin
        :param config_blob_type: the blob type from the extension config
        :type status_blob: StatusBlob
        """
        if not self.ensure_initialized():
            raise ProtocolError("HostGAPlugin: HostGAPlugin is not available")

        if status_blob is None or status_blob.vm_status is None:
            raise ProtocolError("HostGAPlugin: Status blob was not provided")

        logger.verbose("HostGAPlugin: Posting VM status")
        try:

            blob_type = status_blob.type if status_blob.type else config_blob_type

            if blob_type == "BlockBlob":
                self._put_block_blob_status(sas_url, status_blob)
            else:
                self._put_page_blob_status(sas_url, status_blob)

        except Exception as e:
            # If the HostPlugin rejects the request,
            # let the error continue, but set to use the HostPlugin
            if isinstance(e, ResourceGoneError):
                logger.verbose(
                    "HostGAPlugin: Setting host plugin as default channel")
                HostPluginProtocol.set_default_channel(True)

            raise
Exemple #15
0
    def __init__(self, path=None, pkg=None, host=None):
        self.pkg = pkg
        self.host = host
        version = None
        if path is not None:
            m = AGENT_DIR_PATTERN.match(path)
            if m == None:
                raise UpdateError(u"Illegal agent directory: {0}".format(path))
            version = m.group(1)
        elif self.pkg is not None:
            version = pkg.version

        if version == None:
            raise UpdateError(u"Illegal agent version: {0}".format(version))
        self.version = FlexibleVersion(version)

        location = u"disk" if path is not None else u"package"
        logger.verbose(u"Instantiating Agent {0} from {1}", self.name,
                       location)

        self.error = None
        self.supported = None

        self._load_error()
        self._load_supported()

        self._ensure_downloaded()
        return
Exemple #16
0
 def _load_error(self):
     try:
         self.error = GuestAgentError(self.get_agent_error_file())
         self.error.load()
         logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error))
     except Exception as e:
         logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e))
Exemple #17
0
    def _download(self):
        for uri in self.pkg.uris:
            if not HostPluginProtocol.is_default_channel() and self._fetch(
                    uri.uri):
                break
            elif self.host is not None and self.host.ensure_initialized():
                if not HostPluginProtocol.is_default_channel():
                    logger.warn(
                        "Download unsuccessful, falling back to host plugin")
                else:
                    logger.verbose("Using host plugin as default channel")

                uri, headers = self.host.get_artifact_request(
                    uri.uri, self.host.manifest_uri)
                if self._fetch(uri, headers=headers):
                    if not HostPluginProtocol.is_default_channel():
                        logger.verbose(
                            "Setting host plugin as default channel")
                        HostPluginProtocol.set_default_channel(True)
                    break
                else:
                    logger.warn("Host plugin download unsuccessful")
            else:
                logger.error("No download channels available")

        if not os.path.isfile(self.get_agent_pkg_path()):
            msg = u"Unable to download Agent {0} from any URI".format(
                self.name)
            add_event(AGENT_NAME,
                      op=WALAEventOperation.Download,
                      version=CURRENT_VERSION,
                      is_success=False,
                      message=msg)
            raise UpdateError(msg)
        return
Exemple #18
0
def validate_dhcp_resp(request, response):  # pylint: disable=R1710
    bytes_recv = len(response)
    if bytes_recv < 0xF6:
        logger.error("HandleDhcpResponse: Too few bytes received:{0}",
                     bytes_recv)
        return False

    logger.verbose("BytesReceived:{0}", hex(bytes_recv))
    logger.verbose("DHCP response:{0}", hex_dump(response, bytes_recv))

    # check transactionId, cookie, MAC address cookie should never mismatch
    # transactionId and MAC address may mismatch if we see a response
    # meant from another machine
    if not compare_bytes(request, response, 0xEC, 4):
        logger.verbose("Cookie not match:\nsend={0},\nreceive={1}",
                       hex_dump3(request, 0xEC, 4),
                       hex_dump3(response, 0xEC, 4))
        raise DhcpError("Cookie in dhcp respones doesn't match the request")

    if not compare_bytes(request, response, 4, 4):
        logger.verbose("TransactionID not match:\nsend={0},\nreceive={1}",
                       hex_dump3(request, 4, 4), hex_dump3(response, 4, 4))
        raise DhcpError("TransactionID in dhcp respones "
                        "doesn't match the request")

    if not compare_bytes(request, response, 0x1C, 6):
        logger.verbose("Mac Address not match:\nsend={0},\nreceive={1}",
                       hex_dump3(request, 0x1C, 6),
                       hex_dump3(response, 0x1C, 6))
        raise DhcpError("Mac Addr in dhcp respones "
                        "doesn't match the request")
Exemple #19
0
    def put_vm_status(self, status_blob, sas_url, config_blob_type=None):
        """
        Try to upload the VM status via the host plugin /status channel
        :param sas_url: the blob SAS url to pass to the host plugin
        :param config_blob_type: the blob type from the extension config
        :type status_blob: StatusBlob
        """
        if not self.ensure_initialized():
            raise ProtocolError("HostGAPlugin: HostGAPlugin is not available")

        if status_blob is None or status_blob.vm_status is None:
            raise ProtocolError("HostGAPlugin: Status blob was not provided")

        logger.verbose("HostGAPlugin: Posting VM status")
        try:
            blob_type = status_blob.type if status_blob.type else config_blob_type

            if blob_type == "BlockBlob":
                self._put_block_blob_status(sas_url, status_blob)
            else:
                self._put_page_blob_status(sas_url, status_blob)

            if not HostPluginProtocol.is_default_channel():
                logger.info(
                    "HostGAPlugin: Setting host plugin as default channel")
                HostPluginProtocol.set_default_channel(True)
        except Exception as e:
            message = "HostGAPlugin: Exception Put VM status: {0}, {1}".format(
                e, traceback.format_exc())
            from azurelinuxagent.common.event import WALAEventOperation, report_event
            report_event(op=WALAEventOperation.ReportStatus,
                         is_success=False,
                         message=message)
            logger.warn("HostGAPlugin: resetting default channel")
            HostPluginProtocol.set_default_channel(False)
Exemple #20
0
    def _purge_agents(self):
        """
        Remove from disk all directories and .zip files of unknown agents
        (without removing the current, running agent).
        """
        path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))

        known_versions = [agent.version for agent in self.agents]
        if CURRENT_VERSION not in known_versions:
            logger.verbose(
                u"Running Agent {0} was not found in the agent manifest - adding to list",
                CURRENT_VERSION)
            known_versions.append(CURRENT_VERSION)

        for agent_path in glob.iglob(path):
            try:
                name = fileutil.trim_ext(agent_path, "zip")
                m = AGENT_DIR_PATTERN.match(name)
                if m is not None and FlexibleVersion(m.group(1)) not in known_versions:
                    if os.path.isfile(agent_path):
                        logger.info(u"Purging outdated Agent file {0}", agent_path)
                        os.remove(agent_path)
                    else:
                        logger.info(u"Purging outdated Agent directory {0}", agent_path)
                        shutil.rmtree(agent_path)
            except Exception as e:
                logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e))
        return
Exemple #21
0
    def report_ext_handlers_status(self):
        """Go thru handler_state dir, collect and report status"""
        vm_status = VMStatus()
        vm_status.vmAgent.version = str(CURRENT_VERSION)
        vm_status.vmAgent.status = "Ready"
        vm_status.vmAgent.message = "Guest Agent is running"

        if self.ext_handlers is not None:
            for ext_handler in self.ext_handlers.extHandlers:
                try:
                    self.report_ext_handler_status(vm_status, ext_handler)
                except ExtensionError as e:
                    add_event(
                        AGENT_NAME,
                        version=CURRENT_VERSION,
                        is_success=False,
                        message=ustr(e))
        
        logger.verbose("Report vm agent status")
        try:
            self.protocol.report_vm_status(vm_status)
            if self.log_report:
                logger.verbose("Successfully reported vm agent status")
        except ProtocolError as e:
            message = "Failed to report vm agent status: {0}".format(e)
            add_event(AGENT_NAME, version=CURRENT_VERSION, is_success=False, message=message)
Exemple #22
0
    def get_nic_state(self):
        """
        Capture NIC state (IPv4 and IPv6 addresses plus link state).

        :return: Dictionary of NIC state objects, with the NIC name as key
        :rtype: dict(str,NetworkInformationCard)
        """
        state = {}
        status, output = shellutil.run_get_output("ip -o link", chk_err=False, log_cmd=False)

        if status != 0:
            logger.verbose("Could not fetch NIC link info; status {0}, {1}".format(status, output))
            return {}

        for entry in output.splitlines():
            result = self.ip_command_output.match(entry)
            if result:
                name = result.group(1)
                state[name] = NetworkInterfaceCard(name, result.group(2))


        self._update_nic_state(state, "ip -o -f inet address", NetworkInterfaceCard.add_ipv4, "an IPv4 address")
        self._update_nic_state(state, "ip -o -f inet6 address", NetworkInterfaceCard.add_ipv6, "an IPv6 address")

        return state
Exemple #23
0
    def get_api_versions(self):
        url = URI_FORMAT_GET_API_VERSIONS.format(self.endpoint,
                                                 HOST_PLUGIN_PORT)
        logger.verbose(
            "HostGAPlugin: Getting API versions at [{0}]".format(url))
        return_val = []
        error_response = ''
        is_healthy = False
        try:
            headers = {HEADER_CONTAINER_ID: self.container_id}
            response = restutil.http_get(url, headers)
            if restutil.request_failed(response):
                error_response = restutil.read_response_error(response)
                logger.error(
                    "HostGAPlugin: Failed Get API versions: {0}".format(
                        error_response))
            else:
                return_val = ustr(remove_bom(response.read()),
                                  encoding='utf-8')
                is_healthy = True
        except HttpError as e:
            logger.error(
                "HostGAPlugin: Exception Get API versions: {0}".format(e))

        self.health_service.report_host_plugin_versions(
            is_healthy=is_healthy, response=error_response)

        return return_val
Exemple #24
0
def setup_rdma_device():
    logger.verbose("Parsing SharedConfig XML contents for RDMA details")
    xml_doc = parse_doc(
        fileutil.read_file(os.path.join(conf.get_lib_dir(), SHARED_CONF_FILE_NAME)))
    if xml_doc is None:
        logger.error("Could not parse SharedConfig XML document")
        return
    instance_elem = find(xml_doc, "Instance")
    if not instance_elem:
        logger.error("Could not find <Instance> in SharedConfig document")
        return

    rdma_ipv4_addr = getattrib(instance_elem, "rdmaIPv4Address")
    if not rdma_ipv4_addr:
        logger.error(
            "Could not find rdmaIPv4Address attribute on Instance element of SharedConfig.xml document")
        return

    rdma_mac_addr = getattrib(instance_elem, "rdmaMacAddress")
    if not rdma_mac_addr:
        logger.error(
            "Could not find rdmaMacAddress attribute on Instance element of SharedConfig.xml document")
        return

    # add colons to the MAC address (e.g. 00155D33FF1D ->
    # 00:15:5D:33:FF:1D)
    rdma_mac_addr = ':'.join([rdma_mac_addr[i:i+2]
                              for i in range(0, len(rdma_mac_addr), 2)])
    logger.info("Found RDMA details. IPv4={0} MAC={1}".format(
        rdma_ipv4_addr, rdma_mac_addr))

    # Set up the RDMA device with collected informatino
    RDMADeviceHandler(rdma_ipv4_addr, rdma_mac_addr).start()
    logger.info("RDMA: device is set up")
    return
Exemple #25
0
def setup_rdma_device(nd_version, shared_conf):
    logger.verbose("Parsing SharedConfig XML contents for RDMA details")
    xml_doc = parse_doc(shared_conf.xml_text)
    if xml_doc is None:
        logger.error("Could not parse SharedConfig XML document")
        return
    instance_elem = find(xml_doc, "Instance")
    if not instance_elem:
        logger.error("Could not find <Instance> in SharedConfig document")
        return

    rdma_ipv4_addr = getattrib(instance_elem, "rdmaIPv4Address")
    if not rdma_ipv4_addr:
        logger.error(
            "Could not find rdmaIPv4Address attribute on Instance element of SharedConfig.xml document")
        return

    rdma_mac_addr = getattrib(instance_elem, "rdmaMacAddress")
    if not rdma_mac_addr:
        logger.error(
            "Could not find rdmaMacAddress attribute on Instance element of SharedConfig.xml document")
        return

    # add colons to the MAC address (e.g. 00155D33FF1D ->
    # 00:15:5D:33:FF:1D)
    rdma_mac_addr = ':'.join([rdma_mac_addr[i:i + 2]
                              for i in range(0, len(rdma_mac_addr), 2)])
    logger.info("Found RDMA details. IPv4={0} MAC={1}".format(
        rdma_ipv4_addr, rdma_mac_addr))

    # Set up the RDMA device with collected informatino
    RDMADeviceHandler(rdma_ipv4_addr, rdma_mac_addr, nd_version).start()
    logger.info("RDMA: device is set up")
    return
Exemple #26
0
 def add_user(self, username, encrypted_password, account_expiration):
     try:
         expiration_date = (account_expiration +
                            timedelta(days=1)).strftime(DATE_FORMAT)
         logger.verbose("Adding user {0} with expiration date {1}".format(
             username, expiration_date))
         self.os_util.useradd(username, expiration_date,
                              REMOTE_ACCESS_ACCOUNT_COMMENT)
     except Exception as e:
         raise RemoteAccessError("Error adding user {0}. {1}".format(
             username, ustr(e)))
     try:
         prv_key = os.path.join(conf.get_lib_dir(), TRANSPORT_PRIVATE_CERT)
         pwd = self.cryptUtil.decrypt_secret(encrypted_password, prv_key)
         self.os_util.chpasswd(username, pwd, conf.get_password_cryptid(),
                               conf.get_password_crypt_salt_len())
         self.os_util.conf_sudoer(username)
         logger.info(
             "User '{0}' added successfully with expiration in {1}".format(
                 username, expiration_date))
     except Exception as e:
         error = "Error adding user {0}. {1} ".format(username, str(e))
         try:
             self.handle_failed_create(username)
             error += "cleanup successful"
         except RemoteAccessError as rae:
             error += "and error cleaning up {0}".format(str(rae))
         raise RemoteAccessError(
             "Error adding user {0} cleanup successful".format(username),
             ustr(e))
Exemple #27
0
    def _purge_agents(self):
        """
        Remove from disk all directories and .zip files of unknown agents
        (without removing the current, running agent).
        """
        path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))

        known_versions = [agent.version for agent in self.agents]
        if CURRENT_VERSION not in known_versions:
            logger.verbose(
                u"Running Agent {0} was not found in the agent manifest - adding to list",
                CURRENT_VERSION)
            known_versions.append(CURRENT_VERSION)

        for agent_path in glob.iglob(path):
            try:
                name = fileutil.trim_ext(agent_path, "zip")
                m = AGENT_DIR_PATTERN.match(name)
                if m is not None and FlexibleVersion(
                        m.group(1)) not in known_versions:
                    if os.path.isfile(agent_path):
                        logger.info(u"Purging outdated Agent file {0}",
                                    agent_path)
                        os.remove(agent_path)
                    else:
                        logger.info(u"Purging outdated Agent directory {0}",
                                    agent_path)
                        shutil.rmtree(agent_path)
            except Exception as e:
                logger.warn(u"Purging {0} raised exception: {1}", agent_path,
                            ustr(e))
        return
Exemple #28
0
    def send_host_plugin_heartbeat(self):
        """
        Send a health signal every HOST_PLUGIN_HEARTBEAT_PERIOD. The signal is 'Healthy' when we have been able to
        communicate with HostGAPlugin at least once in the last HOST_PLUGIN_HEALTH_PERIOD.
        """
        if self.last_host_plugin_heartbeat is None:
            self.last_host_plugin_heartbeat = datetime.datetime.utcnow() - MonitorHandler.HOST_PLUGIN_HEARTBEAT_PERIOD

        if datetime.datetime.utcnow() >= (self.last_host_plugin_heartbeat + MonitorHandler.HOST_PLUGIN_HEARTBEAT_PERIOD):
            try:
                host_plugin = self.protocol.client.get_host_plugin()
                host_plugin.ensure_initialized()
                is_currently_healthy = host_plugin.get_health()

                if is_currently_healthy:
                    self.host_plugin_errorstate.reset()
                else:
                    self.host_plugin_errorstate.incr()

                is_healthy = self.host_plugin_errorstate.is_triggered() is False
                logger.verbose("HostGAPlugin health: {0}", is_healthy)

                self.health_service.report_host_plugin_heartbeat(is_healthy)

            except Exception as e:
                msg = "Exception sending host plugin heartbeat: {0}".format(ustr(e))
                add_event(
                    name=AGENT_NAME,
                    version=CURRENT_VERSION,
                    op=WALAEventOperation.HostPluginHeartbeat,
                    is_success=False,
                    message=msg,
                    log_event=False)

            self.last_host_plugin_heartbeat = datetime.datetime.utcnow()
Exemple #29
0
    def _get_net_info():
        """
        There is no SIOCGIFCONF
        on freeBSD - just parse ifconfig.
        Returns strings: iface, inet4_addr, and mac
        or 'None,None,None' if unable to parse.
        We will sleep and retry as the network must be up.
        """
        iface = ''
        inet = ''
        mac = ''

        err, output = shellutil.run_get_output('ifconfig -l ether',
                                               chk_err=False)
        if err:
            raise OSUtilError("Can't find ether interface:{0}".format(output))
        ifaces = output.split()
        if not ifaces:
            raise OSUtilError("Can't find ether interface.")
        iface = ifaces[0]

        err, output = shellutil.run_get_output('ifconfig ' + iface,
                                               chk_err=False)
        if err:
            raise OSUtilError("Can't get info for interface:{0}".format(iface))

        for line in output.split('\n'):
            if line.find('inet ') != -1:
                inet = line.split()[1]
            elif line.find('ether ') != -1:
                mac = line.split()[1]
        logger.verbose("Interface info: ({0},{1},{2})", iface, inet, mac)

        return iface, inet, mac
Exemple #30
0
    def send_imds_heartbeat(self):
        """
        Send a health signal every IMDS_HEARTBEAT_PERIOD. The signal is 'Healthy' when we have
        successfully called and validated a response in the last IMDS_HEALTH_PERIOD.
        """
        try:
            is_currently_healthy, response = self.imds_client.validate()

            if is_currently_healthy:
                self.imds_errorstate.reset()
            else:
                self.imds_errorstate.incr()

            is_healthy = self.imds_errorstate.is_triggered() is False
            logger.verbose("IMDS health: {0} [{1}]", is_healthy, response)

            self.health_service.report_imds_status(is_healthy, response)

        except Exception as e:
            msg = "Exception sending imds heartbeat: {0}".format(ustr(e))
            add_event(name=AGENT_NAME,
                      version=CURRENT_VERSION,
                      op=WALAEventOperation.ImdsHeartbeat,
                      is_success=False,
                      message=msg,
                      log_event=False)
 def add_user(self, username, encrypted_password, account_expiration):
     try:
         expiration_date = (account_expiration + timedelta(days=1)).strftime(DATE_FORMAT)
         logger.verbose("Adding user {0} with expiration date {1}"
                        .format(username, expiration_date))
         self.os_util.useradd(username, expiration_date, REMOTE_ACCESS_ACCOUNT_COMMENT)
     except OSError as oe:
         logger.error("Error adding user {0}. {1}"
                      .format(username, oe.strerror))
         return
     except Exception as e:
         logger.error("Error adding user {0}. {1}".format(username, ustr(e)))
         return
     try:
         prv_key = os.path.join(conf.get_lib_dir(), TRANSPORT_PRIVATE_CERT)
         pwd = self.cryptUtil.decrypt_secret(encrypted_password, prv_key)
         self.os_util.chpasswd(username, pwd, conf.get_password_cryptid(), conf.get_password_crypt_salt_len())
         self.os_util.conf_sudoer(username)
         logger.info("User '{0}' added successfully with expiration in {1}"
                     .format(username, expiration_date))
         return
     except OSError as oe:
         self.handle_failed_create(username, oe.strerror)
     except Exception as e:
         self.handle_failed_create(username, ustr(e))
Exemple #32
0
    def _get_net_info():
        """
        There is no SIOCGIFCONF
        on freeBSD - just parse ifconfig.
        Returns strings: iface, inet4_addr, and mac
        or 'None,None,None' if unable to parse.
        We will sleep and retry as the network must be up.
        """
        iface = ''
        inet = ''
        mac = ''

        err, output = shellutil.run_get_output('ifconfig -l ether', chk_err=False)
        if err:
            raise OSUtilError("Can't find ether interface:{0}".format(output))
        ifaces = output.split()
        if not ifaces:
            raise OSUtilError("Can't find ether interface.")
        iface = ifaces[0]

        err, output = shellutil.run_get_output('ifconfig ' + iface, chk_err=False)
        if err:
            raise OSUtilError("Can't get info for interface:{0}".format(iface))

        for line in output.split('\n'):
            if line.find('inet ') != -1:
                inet = line.split()[1]
            elif line.find('ether ') != -1:
                mac = line.split()[1]
        logger.verbose("Interface info: ({0},{1},{2})", iface, inet, mac)

        return iface, inet, mac
Exemple #33
0
    def _unpack(self):
        try:
            if os.path.isdir(self.get_agent_dir()):
                shutil.rmtree(self.get_agent_dir())

            zipfile.ZipFile(self.get_agent_pkg_path()).extractall(
                self.get_agent_dir())

        except Exception as e:
            fileutil.clean_ioerror(
                e, paths=[self.get_agent_dir(),
                          self.get_agent_pkg_path()])

            msg = u"Exception unpacking Agent {0} from {1}: {2}".format(
                self.name, self.get_agent_pkg_path(), ustr(e))
            raise UpdateError(msg)

        if not os.path.isdir(self.get_agent_dir()):
            msg = u"Unpacking Agent {0} failed to create directory {1}".format(
                self.name, self.get_agent_dir())
            raise UpdateError(msg)

        logger.verbose(u"Agent {0} unpacked successfully to {1}", self.name,
                       self.get_agent_dir())
        return
Exemple #34
0
    def _download(self):
        for uri in self.pkg.uris:
            if not HostPluginProtocol.is_default_channel() and self._fetch(uri.uri):
                break
            elif self.host is not None and self.host.ensure_initialized():
                if not HostPluginProtocol.is_default_channel():
                    logger.warn("Download unsuccessful, falling back to host plugin")
                else:
                    logger.verbose("Using host plugin as default channel")

                uri, headers = self.host.get_artifact_request(uri.uri, self.host.manifest_uri)
                if self._fetch(uri, headers=headers):
                    if not HostPluginProtocol.is_default_channel():
                        logger.verbose("Setting host plugin as default channel")
                        HostPluginProtocol.set_default_channel(True)
                    break
                else:
                    logger.warn("Host plugin download unsuccessful")
            else:
                logger.error("No download channels available")

        if not os.path.isfile(self.get_agent_pkg_path()):
            msg = u"Unable to download Agent {0} from any URI".format(self.name)
            add_event(
                AGENT_NAME,
                op=WALAEventOperation.Download,
                version=CURRENT_VERSION,
                is_success=False,
                message=msg)
            raise UpdateError(msg)
        return
Exemple #35
0
    def _unpack(self):
        try:
            if os.path.isdir(self.get_agent_dir()):
                shutil.rmtree(self.get_agent_dir())

            zipfile.ZipFile(self.get_agent_pkg_path()).extractall(self.get_agent_dir())

        except Exception as e:
            msg = u"Exception unpacking Agent {0} from {1}: {2}".format(
                self.name,
                self.get_agent_pkg_path(),
                ustr(e))
            raise UpdateError(msg)

        if not os.path.isdir(self.get_agent_dir()):
            msg = u"Unpacking Agent {0} failed to create directory {1}".format(
                self.name,
                self.get_agent_dir())
            raise UpdateError(msg)

        logger.verbose(
            u"Agent {0} unpacked successfully to {1}",
            self.name,
            self.get_agent_dir())
        return
Exemple #36
0
    def send_imds_heartbeat(self):
        """
        Send a health signal every IMDS_HEARTBEAT_PERIOD. The signal is 'Healthy' when we have
        successfully called and validated a response in the last IMDS_HEALTH_PERIOD.
        """

        if self.last_imds_heartbeat is None:
            self.last_imds_heartbeat = datetime.datetime.utcnow() - MonitorHandler.IMDS_HEARTBEAT_PERIOD

        if datetime.datetime.utcnow() >= (self.last_imds_heartbeat + MonitorHandler.IMDS_HEARTBEAT_PERIOD):
            try:
                is_currently_healthy, response = self.imds_client.validate()

                if is_currently_healthy:
                    self.imds_errorstate.reset()
                else:
                    self.imds_errorstate.incr()

                is_healthy = self.imds_errorstate.is_triggered() is False
                logger.verbose("IMDS health: {0} [{1}]", is_healthy, response)

                self.health_service.report_imds_status(is_healthy, response)

            except Exception as e:
                msg = "Exception sending imds heartbeat: {0}".format(ustr(e))
                add_event(
                    name=AGENT_NAME,
                    version=CURRENT_VERSION,
                    op=WALAEventOperation.ImdsHeartbeat,
                    is_success=False,
                    message=msg,
                    log_event=False)

            self.last_imds_heartbeat = datetime.datetime.utcnow()
Exemple #37
0
    def get_installed_version(self):
        lastest_version = None

        for path in glob.iglob(os.path.join(conf.get_lib_dir(), self.ext_handler.name + "-*")):
            if not os.path.isdir(path):
                continue

            separator = path.rfind('-')
            version = FlexibleVersion(path[separator+1:])
            existing_state = os.path.join(path, 'config', 'HandlerState')
            should_remove = False

            if not os.path.exists(existing_state):
                should_remove = True
            else:
                with open(existing_state) as fh:
                    existing_state_text = fh.read()
                    if existing_state_text is None or \
                       existing_state == ExtHandlerState.NotInstalled:
                        should_remove = True

            if should_remove:
                logger.warn("Extension directory does not contain a valid "
                            "status, removing [{0}]".format(path))
                shutil.rmtree(path, ignore_errors=True)
                continue
            else:
                logger.verbose("Extension directory contains valid status "
                               "[{0}]".format(path))

            if lastest_version is None or lastest_version < version:
                lastest_version = version

        return str(lastest_version) if lastest_version is not None else None
Exemple #38
0
    def _upgrade_available(self, protocol, base_version=CURRENT_VERSION):
        # Ignore new agents if updating is disabled
        if not conf.get_autoupdate_enabled():
            return False

        now = time.time()
        if self.last_attempt_time is not None:
            next_attempt_time = self.last_attempt_time + \
                                    conf.get_autoupdate_frequency()
        else:
            next_attempt_time = now
        if next_attempt_time > now:
            return False

        family = conf.get_autoupdate_gafamily()
        logger.info("Checking for agent updates (family: {0})", family)

        self.last_attempt_time = now

        try:
            manifest_list, etag = protocol.get_vmagent_manifests()

            manifests = [m for m in manifest_list.vmAgentManifests \
                            if m.family == family and len(m.versionsManifestUris) > 0]
            if len(manifests) == 0:
                logger.verbose(u"Incarnation {0} has no {1} agent updates",
                               etag, family)
                return False

            pkg_list = protocol.get_vmagent_pkgs(manifests[0])

            # Set the agents to those available for download at least as
            # current as the existing agent and remove from disk any agent
            # no longer reported to the VM.
            # Note:
            #  The code leaves on disk available, but blacklisted, agents
            #  so as to preserve the state. Otherwise, those agents could be
            #  again downloaded and inappropriately retried.
            host = self._get_host_plugin(protocol=protocol)
            self._set_agents(
                [GuestAgent(pkg=pkg, host=host) for pkg in pkg_list.versions])

            self._purge_agents()
            self._filter_blacklisted_agents()

            # Return True if current agent is no longer available or an
            # agent with a higher version number is available
            return not self._is_version_eligible(base_version) \
                or (len(self.agents) > 0 and self.agents[0].version > base_version)

        except Exception as e:
            msg = u"Exception retrieving agent manifests: {0}".format(
                ustr(traceback.format_exc()))
            add_event(AGENT_NAME,
                      op=WALAEventOperation.Download,
                      version=CURRENT_VERSION,
                      is_success=False,
                      message=msg)
            return False
Exemple #39
0
    def enable_firewall(self, dst_ip=None, uid=None):
        # If a previous attempt failed, do not retry
        global _enable_firewall
        if not _enable_firewall:
            return False

        try:
            if dst_ip is None or uid is None:
                msg = "Missing arguments to enable_firewall"
                logger.warn(msg)
                raise Exception(msg)

            wait = self.get_firewall_will_wait()

            # If the DROP rule exists, make no changes
            drop_rule = FIREWALL_DROP.format(wait, "C", dst_ip)
            rc = shellutil.run(drop_rule, chk_err=False)
            if rc == 0:
                logger.verbose("Firewall appears established")
                return True
            elif rc == 2:
                self.remove_firewall(dst_ip, uid)
                msg = "please upgrade iptables to a version that supports the -C option"
                logger.warn(msg)
                raise Exception(msg)

            # Otherwise, append both rules
            accept_rule = FIREWALL_ACCEPT.format(wait, "A", dst_ip, uid)
            drop_rule = FIREWALL_DROP.format(wait, "A", dst_ip)

            if shellutil.run(accept_rule) != 0:
                msg = "Unable to add ACCEPT firewall rule '{0}'".format(
                    accept_rule)
                logger.warn(msg)
                raise Exception(msg)

            if shellutil.run(drop_rule) != 0:
                msg = "Unable to add DROP firewall rule '{0}'".format(
                    drop_rule)
                logger.warn(msg)
                raise Exception(msg)

            logger.info("Successfully added Azure fabric firewall rules")

            rc, output = shellutil.run_get_output(FIREWALL_LIST.format(wait))
            if rc == 0:
                logger.info("Firewall rules:\n{0}".format(output))
            else:
                logger.warn(
                    "Listing firewall rules failed: {0}".format(output))

            return True

        except Exception as e:
            _enable_firewall = False
            logger.info("Unable to establish firewall -- "
                        "no further attempts will be made: "
                        "{0}".format(ustr(e)))
            return False
Exemple #40
0
    def send_cgroup_telemetry(self):
        if self.last_cgroup_telemetry is None:
            self.last_cgroup_telemetry = datetime.datetime.utcnow()

        if datetime.datetime.utcnow() >= (
                self.last_telemetry_heartbeat +
                MonitorHandler.CGROUP_TELEMETRY_PERIOD):
            try:
                metric_reported, metric_threshold = CGroupsTelemetry.collect_all_tracked(
                )
                for cgroup_name, metrics in metric_reported.items():
                    thresholds = metric_threshold[cgroup_name]

                    for metric_group, metric_name, value in metrics:
                        if value > 0:
                            report_metric(metric_group, metric_name,
                                          cgroup_name, value)

                        if metric_group == "Memory":
                            if value >= thresholds["memory"]:
                                msg = "CGroup {0}: Crossed the Memory Threshold. Current Value:{1}, Threshold:{2}.".format(
                                    cgroup_name, value, thresholds["memory"])
                                add_event(
                                    name=AGENT_NAME,
                                    version=CURRENT_VERSION,
                                    op=WALAEventOperation.CGroupsLimitsCrossed,
                                    is_success=True,
                                    message=msg,
                                    log_event=True)

                        if metric_group == "Process":
                            if value >= thresholds["cpu"]:
                                msg = "CGroup {0}: Crossed the Processor Threshold. Current Value:{1}, Threshold:{2}.".format(
                                    cgroup_name, value, thresholds["cpu"])
                                add_event(
                                    name=AGENT_NAME,
                                    version=CURRENT_VERSION,
                                    op=WALAEventOperation.CGroupsLimitsCrossed,
                                    is_success=True,
                                    message=msg,
                                    log_event=True)

            except Exception as e:
                logger.warn(
                    "Monitor: failed to collect cgroups performance metrics: {0}",
                    ustr(e))
                logger.verbose(traceback.format_exc())

            # Look for extension cgroups we're not already tracking and track them
            try:
                CGroupsTelemetry.update_tracked(
                    self.protocol.client.get_current_handlers())
            except Exception as e:
                logger.warn(
                    "Monitor: failed to update cgroups tracked extensions: {0}",
                    ustr(e))
                logger.verbose(traceback.format_exc())

            self.last_cgroup_telemetry = datetime.datetime.utcnow()
Exemple #41
0
 def set_memory_limit(self, limit=None, unit='megabytes'):
     if 'memory' in self.cgroups:
         value = self._format_memory_value(unit, limit)
         memory_limit_file = self._get_cgroup_file('memory', 'memory.limit_in_bytes')
         logger.verbose("writing {0} to {1}".format(value, memory_limit_file))
         fileutil.write_file(memory_limit_file, '{0}\n'.format(value))
     else:
         raise CGroupsException("Memory hierarchy not available in this cgroup")
Exemple #42
0
    def test_telemetry_appender(self, mock_add_log_event, *_):
        logger.add_logger_appender(logger.AppenderType.TELEMETRY, logger.LogLevel.WARNING, path=add_log_event)
        logger.verbose("test-verbose")
        logger.info("test-info")
        logger.warn("test-warn")
        logger.error("test-error")

        self.assertEqual(2, mock_add_log_event.call_count)
Exemple #43
0
 def add_sysinfo(self, event):
     sysinfo_names = [v.name for v in self.sysinfo]
     for param in event.parameters:
         if param.name in sysinfo_names:
             logger.verbose("Remove existing event parameter: [{0}:{1}]",
                            param.name, param.value)
             event.parameters.remove(param)
     event.parameters.extend(self.sysinfo)
Exemple #44
0
    def enable_firewall(self, dst_ip=None, uid=None):
        # If a previous attempt failed, do not retry
        global _enable_firewall
        if not _enable_firewall:
            return False

        try:
            if dst_ip is None or uid is None:
                msg = "Missing arguments to enable_firewall"
                logger.warn(msg)
                raise Exception(msg)

            wait = self.get_firewall_will_wait()

            # If the DROP rule exists, make no changes
            drop_rule = FIREWALL_DROP.format(wait, "C", dst_ip)
            rc = shellutil.run(drop_rule, chk_err=False)
            if rc == 0:
                logger.verbose("Firewall appears established")
                return True
            elif rc == 2:
                self.remove_firewall(dst_ip, uid)
                msg = "please upgrade iptables to a version that supports the -C option"
                logger.warn(msg)
                raise Exception(msg)

            # Otherwise, append both rules
            accept_rule = FIREWALL_ACCEPT.format(wait, "A", dst_ip, uid)
            drop_rule = FIREWALL_DROP.format(wait, "A", dst_ip)

            if shellutil.run(accept_rule) != 0:
                msg = "Unable to add ACCEPT firewall rule '{0}'".format(
                    accept_rule)
                logger.warn(msg)
                raise Exception(msg)

            if shellutil.run(drop_rule) != 0:
                msg = "Unable to add DROP firewall rule '{0}'".format(
                    drop_rule)
                logger.warn(msg)
                raise Exception(msg)

            logger.info("Successfully added Azure fabric firewall rules")

            rc, output = shellutil.run_get_output(FIREWALL_LIST.format(wait))
            if rc == 0:
                logger.info("Firewall rules:\n{0}".format(output))
            else:
                logger.warn("Listing firewall rules failed: {0}".format(output))

            return True

        except Exception as e:
            _enable_firewall = False
            logger.info("Unable to establish firewall -- "
                        "no further attempts will be made: "
                        "{0}".format(ustr(e)))
            return False
Exemple #45
0
    def _put_page_blob_status(self, sas_url, status_blob):
        url = URI_FORMAT_PUT_VM_STATUS.format(self.endpoint, HOST_PLUGIN_PORT)

        # Convert the status into a blank-padded string whose length is modulo 512
        status = bytearray(status_blob.data, encoding='utf-8')
        status_size = int((len(status) + 511) / 512) * 512
        status = bytearray(status_blob.data.ljust(status_size), encoding='utf-8')

        # First, initialize an empty blob
        response = restutil.http_put(url,
                                     data=self._build_status_data(
                                         sas_url,
                                         status_blob.get_page_blob_create_headers(status_size)),
                                     headers=self._build_status_headers())

        if restutil.request_failed(response):
            error_response = restutil.read_response_error(response)
            is_healthy = not restutil.request_failed_at_hostplugin(response)
            self.report_status_health(is_healthy=is_healthy, response=error_response)
            raise HttpError("HostGAPlugin: Failed PageBlob clean-up: {0}"
                            .format(error_response))
        else:
            self.report_status_health(is_healthy=True)
            logger.verbose("HostGAPlugin: PageBlob clean-up succeeded")
        
        # Then, upload the blob in pages
        if sas_url.count("?") <= 0:
            sas_url = "{0}?comp=page".format(sas_url)
        else:
            sas_url = "{0}&comp=page".format(sas_url)

        start = 0
        end = 0
        while start < len(status):
            # Create the next page
            end = start + min(len(status) - start, MAXIMUM_PAGEBLOB_PAGE_SIZE)
            page_size = int((end - start + 511) / 512) * 512
            buf = bytearray(page_size)
            buf[0: end - start] = status[start: end]

            # Send the page
            response = restutil.http_put(url,
                                         data=self._build_status_data(
                                             sas_url,
                                             status_blob.get_page_blob_page_headers(start, end),
                                             buf),
                                         headers=self._build_status_headers())

            if restutil.request_failed(response):
                error_response = restutil.read_response_error(response)
                is_healthy = not restutil.request_failed_at_hostplugin(response)
                self.report_status_health(is_healthy=is_healthy, response=error_response)
                raise HttpError(
                    "HostGAPlugin Error: Put PageBlob bytes "
                    "[{0},{1}]: {2}".format(start, end, error_response))

            # Advance to the next page (if any)
            start = end
Exemple #46
0
    def _put_page_blob_status(self, sas_url, status_blob):
        url = URI_FORMAT_PUT_VM_STATUS.format(self.endpoint, HOST_PLUGIN_PORT)

        # Convert the status into a blank-padded string whose length is modulo 512
        status = bytearray(status_blob.data, encoding='utf-8')
        status_size = int((len(status) + 511) / 512) * 512
        status = bytearray(status_blob.data.ljust(status_size), encoding='utf-8')

        # First, initialize an empty blob
        response = restutil.http_put(url,
                                     data=self._build_status_data(
                                         sas_url,
                                         status_blob.get_page_blob_create_headers(status_size)),
                                     headers=self._build_status_headers())

        if restutil.request_failed(response):
            error_response = restutil.read_response_error(response)
            is_healthy = not restutil.request_failed_at_hostplugin(response)
            self.report_status_health(is_healthy=is_healthy, response=error_response)
            raise HttpError("HostGAPlugin: Failed PageBlob clean-up: {0}"
                            .format(error_response))
        else:
            self.report_status_health(is_healthy=True)
            logger.verbose("HostGAPlugin: PageBlob clean-up succeeded")
        
        # Then, upload the blob in pages
        if sas_url.count("?") <= 0:
            sas_url = "{0}?comp=page".format(sas_url)
        else:
            sas_url = "{0}&comp=page".format(sas_url)

        start = 0
        end = 0
        while start < len(status):
            # Create the next page
            end = start + min(len(status) - start, MAXIMUM_PAGEBLOB_PAGE_SIZE)
            page_size = int((end - start + 511) / 512) * 512
            buf = bytearray(page_size)
            buf[0: end - start] = status[start: end]

            # Send the page
            response = restutil.http_put(url,
                                         data=self._build_status_data(
                                             sas_url,
                                             status_blob.get_page_blob_page_headers(start, end),
                                             buf),
                                         headers=self._build_status_headers())

            if restutil.request_failed(response):
                error_response = restutil.read_response_error(response)
                is_healthy = not restutil.request_failed_at_hostplugin(response)
                self.report_status_health(is_healthy=is_healthy, response=error_response)
                raise HttpError(
                    "HostGAPlugin Error: Put PageBlob bytes "
                    "[{0},{1}]: {2}".format(start, end, error_response))

            # Advance to the next page (if any)
            start = end
Exemple #47
0
    def cleanup_outdated_handlers(self):
        handlers = []
        pkgs = []

        # Build a collection of uninstalled handlers and orphaned packages
        # Note:
        # -- An orphaned package is one without a corresponding handler
        #    directory
        for item in os.listdir(conf.get_lib_dir()):
            path = os.path.join(conf.get_lib_dir(), item)

            if version.is_agent_package(path) or version.is_agent_path(path):
                continue

            if os.path.isdir(path):
                if re.match(HANDLER_NAME_PATTERN, item) is None:
                    continue
                try:
                    eh = ExtHandler()

                    separator = item.rfind('-')

                    eh.name = item[0:separator]
                    eh.properties.version = str(FlexibleVersion(item[separator+1:]))

                    handler = ExtHandlerInstance(eh, self.protocol)
                except Exception:
                    continue
                if handler.get_handler_state() != ExtHandlerState.NotInstalled:
                    continue
                handlers.append(handler)

            elif os.path.isfile(path) and \
                    not os.path.isdir(path[0:-len(HANDLER_PKG_EXT)]):
                if not re.match(HANDLER_PKG_PATTERN, item):
                    continue
                pkgs.append(path)

        # Then, remove the orphaned packages
        for pkg in pkgs:
            try:
                os.remove(pkg)
                logger.verbose("Removed orphaned extension package {0}".format(pkg))
            except OSError as e:
                logger.warn("Failed to remove orphaned package {0}: {1}".format(pkg, e.strerror))

        # Finally, remove the directories and packages of the
        # uninstalled handlers
        for handler in handlers:
            handler.rm_ext_handler_dir()
            pkg = os.path.join(conf.get_lib_dir(), handler.get_full_name() + HANDLER_PKG_EXT)
            if os.path.isfile(pkg):
                try:
                    os.remove(pkg)
                    logger.verbose("Removed extension package {0}".format(pkg))
                except OSError as e:
                    logger.warn("Failed to remove extension package {0}: {1}".format(pkg, e.strerror))
    def cleanup_outdated_handlers(self):
        handlers = []
        pkgs = []

        # Build a collection of uninstalled handlers and orphaned packages
        # Note:
        # -- An orphaned package is one without a corresponding handler
        #    directory
        for item in os.listdir(conf.get_lib_dir()):
            path = os.path.join(conf.get_lib_dir(), item)

            if version.is_agent_package(path) or version.is_agent_path(path):
                continue

            if os.path.isdir(path):
                if re.match(HANDLER_NAME_PATTERN, item) is None:
                    continue
                try:
                    eh = ExtHandler()

                    separator = item.rfind('-')

                    eh.name = item[0:separator]
                    eh.properties.version = str(FlexibleVersion(item[separator+1:]))

                    handler = ExtHandlerInstance(eh, self.protocol)
                except Exception:
                    continue
                if handler.get_handler_state() != ExtHandlerState.NotInstalled:
                    continue
                handlers.append(handler)

            elif os.path.isfile(path) and \
                    not os.path.isdir(path[0:-len(HANDLER_PKG_EXT)]):
                if not re.match(HANDLER_PKG_PATTERN, item):
                    continue
                pkgs.append(path)

        # Then, remove the orphaned packages
        for pkg in pkgs:
            try:
                os.remove(pkg)
                logger.verbose("Removed orphaned extension package {0}".format(pkg))
            except OSError as e:
                logger.warn("Failed to remove orphaned package {0}: {1}".format(pkg, e.strerror))

        # Finally, remove the directories and packages of the
        # uninstalled handlers
        for handler in handlers:
            handler.rm_ext_handler_dir()
            pkg = os.path.join(conf.get_lib_dir(), handler.get_full_name() + HANDLER_PKG_EXT)
            if os.path.isfile(pkg):
                try:
                    os.remove(pkg)
                    logger.verbose("Removed extension package {0}".format(pkg))
                except OSError as e:
                    logger.warn("Failed to remove extension package {0}: {1}".format(pkg, e.strerror))
Exemple #49
0
 def get_health(self):
     """
     Call the /health endpoint
     :return: True if 200 received, False otherwise
     """
     url = URI_FORMAT_HEALTH.format(self.endpoint, HOST_PLUGIN_PORT)
     logger.verbose("HostGAPlugin: Getting health from [{0}]", url)
     response = restutil.http_get(url, max_retry=1)
     return restutil.request_succeeded(response)
Exemple #50
0
 def add_sysinfo(self, event):
     sysinfo_names = [v.name for v in self.sysinfo]
     for param in event.parameters:
         if param.name in sysinfo_names:
             logger.verbose("Remove existing event parameter: [{0}:{1}]",
                            param.name,
                            param.value)
             event.parameters.remove(param)
     event.parameters.extend(self.sysinfo)
Exemple #51
0
 def get_health(self):
     """
     Call the /health endpoint
     :return: True if 200 received, False otherwise
     """
     url = URI_FORMAT_HEALTH.format(self.endpoint,
                                    HOST_PLUGIN_PORT)
     logger.verbose("HostGAPlugin: Getting health from [{0}]", url)
     response = restutil.http_get(url, max_retry=1)
     return restutil.request_succeeded(response)
 def _report(self):
     logger.verbose('HealthService: report observations')
     try:
         restutil.http_post(self.endpoint, self.as_json, headers={'Content-Type': 'application/json'})
         logger.verbose('HealthService: Reported observations to {0}: {1}', self.endpoint, self.as_json)
     except HttpError as e:
         logger.warn("HealthService: could not report observations: {0}", ustr(e))
     finally:
         # these signals are not timestamped, so there is no value in persisting data
         del self.observations[:]
Exemple #53
0
    def send_cgroup_telemetry(self):
        if self.last_cgroup_telemetry is None:
            self.last_cgroup_telemetry = datetime.datetime.utcnow()

        if datetime.datetime.utcnow() >= (self.last_telemetry_heartbeat + MonitorHandler.CGROUP_TELEMETRY_PERIOD):
            try:
                metric_reported, metric_threshold = CGroupsTelemetry.collect_all_tracked()
                for cgroup_name, metrics in metric_reported.items():
                    thresholds = metric_threshold[cgroup_name]

                    for metric_group, metric_name, value in metrics:
                        if value > 0:
                            report_metric(metric_group, metric_name, cgroup_name, value)

                        if metric_group == "Memory":
                            # Memory is collected in bytes, and limit is set in megabytes.
                            if value >= CGroups._format_memory_value('megabytes', thresholds.memory_limit):
                                msg = "CGroup {0}: Crossed the Memory Threshold. " \
                                      "Current Value: {1} bytes, Threshold: {2} megabytes." \
                                       .format(cgroup_name, value, thresholds.memory_limit)

                                logger.warn(msg)
                                add_event(name=AGENT_NAME,
                                          version=CURRENT_VERSION,
                                          op=WALAEventOperation.CGroupsLimitsCrossed,
                                          is_success=True,
                                          message=msg,
                                          log_event=True)

                        if metric_group == "Process":
                            if value >= thresholds.cpu_limit:
                                msg = "CGroup {0}: Crossed the Processor Threshold. " \
                                      "Current Value: {1}, Threshold: {2}." \
                                       .format(cgroup_name, value, thresholds.cpu_limit)

                                logger.warn(msg)
                                add_event(name=AGENT_NAME,
                                          version=CURRENT_VERSION,
                                          op=WALAEventOperation.CGroupsLimitsCrossed,
                                          is_success=True,
                                          message=msg,
                                          log_event=True)

            except Exception as e:
                logger.warn("Monitor: failed to collect cgroups performance metrics: {0}", ustr(e))
                logger.verbose(traceback.format_exc())

            # Look for extension cgroups we're not already tracking and track them
            try:
                CGroupsTelemetry.update_tracked(self.protocol.client.get_current_handlers())
            except Exception as e:
                logger.warn("Monitor: failed to update cgroups tracked extensions: {0}", ustr(e))
                logger.verbose(traceback.format_exc())

            self.last_cgroup_telemetry = datetime.datetime.utcnow()
Exemple #54
0
 def init_cgroups():
     # Track metrics for the roll-up cgroup and for the agent cgroup
     try:
         CGroupsTelemetry.track_cgroup(CGroups.for_extension(""))
         CGroupsTelemetry.track_agent()
     except Exception as e:
         # when a hierarchy is not mounted, we raise an exception
         # and we should therefore only issue a warning, since this
         # is not unexpected
         logger.warn("Monitor: cgroups not initialized: {0}", ustr(e))
         logger.verbose(traceback.format_exc())
Exemple #55
0
 def __init__(self, xml_text):
     if xml_text is None:
         raise ValueError("ovf-env is None")
     logger.verbose("Load ovf-env.xml")
     self.hostname = None
     self.username = None
     self.user_password = None
     self.customdata = None
     self.disable_ssh_password_auth = True
     self.ssh_pubkeys = []
     self.ssh_keypairs = []
     self.parse(xml_text)
Exemple #56
0
 def collect_event(self, evt_file_name):
     try:
         logger.verbose("Found event file: {0}", evt_file_name)
         with open(evt_file_name, "rb") as evt_file:
             # if fail to open or delete the file, throw exception
             data_str = evt_file.read().decode("utf-8", 'ignore')
         logger.verbose("Processed event file: {0}", evt_file_name)
         os.remove(evt_file_name)
         return data_str
     except IOError as e:
         msg = "Failed to process {0}, {1}".format(evt_file_name, e)
         raise EventError(msg)
Exemple #57
0
 def _report_failures(self):
     try:
         logger.verbose("HealthService: report failures as telemetry")
         from azurelinuxagent.common.event import add_event, WALAEventOperation
         for o in self.observations:
             if not o.is_healthy:
                 add_event(AGENT_NAME,
                           version=CURRENT_VERSION,
                           op=WALAEventOperation.HealthObservation,
                           is_success=False,
                           message=json.dumps(o.as_obj))
     except Exception as e:
         logger.verbose("HealthService: could not report failures: {0}".format(ustr(e)))
Exemple #58
0
    def __init__(self, path=None, pkg=None, host=None):
        self.pkg = pkg
        self.host = host
        version = None
        if path is not None:
            m = AGENT_DIR_PATTERN.match(path)
            if m == None:
                raise UpdateError(u"Illegal agent directory: {0}".format(path))
            version = m.group(1)
        elif self.pkg is not None:
            version = pkg.version

        if version == None:
            raise UpdateError(u"Illegal agent version: {0}".format(version))
        self.version = FlexibleVersion(version)

        location = u"disk" if path is not None else u"package"
        logger.verbose(u"Loading Agent {0} from {1}", self.name, location)

        self.error = GuestAgentError(self.get_agent_error_file())
        self.error.load()

        try:
            self._ensure_downloaded()
            self._ensure_loaded()
        except Exception as e:
            if isinstance(e, ResourceGoneError):
                raise

            # The agent was improperly blacklisting versions due to a timeout
            # encountered while downloading a later version. Errors of type
            # socket.error are IOError, so this should provide sufficient
            # protection against a large class of I/O operation failures.
            if isinstance(e, IOError):
                raise

            # Note the failure, blacklist the agent if the package downloaded
            # - An exception with a downloaded package indicates the package
            #   is corrupt (e.g., missing the HandlerManifest.json file)
            self.mark_failure(is_fatal=os.path.isfile(self.get_agent_pkg_path()))

            msg = u"Agent {0} install failed with exception: {1}".format(
                        self.name, ustr(e))
            logger.warn(msg)
            add_event(
                AGENT_NAME,
                version=self.version,
                op=WALAEventOperation.Install,
                is_success=False,
                message=msg)
    def run(self):
        self.ext_handlers, etag = None, None
        try:
            self.protocol = self.protocol_util.get_protocol()
            self.ext_handlers, etag = self.protocol.get_ext_handlers()
            self.get_artifact_error_state.reset()
        except Exception as e:
            msg = u"Exception retrieving extension handlers: {0}".format(ustr(e))
            detailed_msg = '{0} {1}'.format(msg, traceback.format_exc())

            self.get_artifact_error_state.incr()

            if self.get_artifact_error_state.is_triggered():
                add_event(AGENT_NAME,
                          version=CURRENT_VERSION,
                          op=WALAEventOperation.GetArtifactExtended,
                          is_success=False,
                          message="Failed to get extension artifact for over "
                                  "{0): {1}".format(self.get_artifact_error_state.min_timedelta, msg))
                self.get_artifact_error_state.reset()
            else:
                logger.warn(msg)

            add_event(AGENT_NAME,
                      version=CURRENT_VERSION,
                      op=WALAEventOperation.ExtensionProcessing,
                      is_success=False,
                      message=detailed_msg)
            return

        try:
            msg = u"Handle extensions updates for incarnation {0}".format(etag)
            logger.verbose(msg)
            # Log status report success on new config
            self.log_report = True
            self.handle_ext_handlers(etag)
            self.last_etag = etag

            self.report_ext_handlers_status()
            self.cleanup_outdated_handlers()
        except Exception as e:
            msg = u"Exception processing extension handlers: {0}".format(
                ustr(e))
            logger.warn(msg)
            add_event(AGENT_NAME,
                      version=CURRENT_VERSION,
                      op=WALAEventOperation.ExtensionProcessing,
                      is_success=False,
                      message=msg)
            return