Esempio n. 1
0
    def copy_ovf_env(self):
        """
        Copy ovf env file from dvd to hard disk.
        Remove password before save it to the disk
        """
        dvd_mount_point = conf.get_dvd_mount_point()
        ovf_file_path_on_dvd = os.path.join(dvd_mount_point, OVF_FILE_NAME)
        tag_file_path_on_dvd = os.path.join(dvd_mount_point, TAG_FILE_NAME)
        try:
            self.osutil.mount_dvd()
            ovfxml = fileutil.read_file(ovf_file_path_on_dvd, remove_bom=True)
            ovfenv = OvfEnv(ovfxml)
            ovfxml = re.sub("<UserPassword>.*?<", "<UserPassword>*<", ovfxml)
            ovf_file_path = os.path.join(conf.get_lib_dir(), OVF_FILE_NAME)
            fileutil.write_file(ovf_file_path, ovfxml)
            
            if os.path.isfile(tag_file_path_on_dvd):
                logger.info("Found {0} in provisioning ISO", TAG_FILE_NAME)
                tag_file_path = os.path.join(conf.get_lib_dir(), TAG_FILE_NAME)
                shutil.copyfile(tag_file_path_on_dvd, tag_file_path) 

        except (OSUtilError, IOError) as e:
            raise ProtocolError(ustr(e))

        try:
            self.osutil.umount_dvd()
            self.osutil.eject_dvd()
        except OSUtilError as e:
            logger.warn(ustr(e))

        return ovfenv
Esempio n. 2
0
    def _write_pid_file(self):
        pid_files = self._get_pid_files()

        pid_dir, pid_name, pid_re = self._get_pid_parts()

        previous_pid_file = None \
                        if len(pid_files) <= 0 \
                        else pid_files[-1]
        pid_index = -1 \
                    if previous_pid_file is None \
                    else int(pid_re.match(os.path.basename(previous_pid_file)).group(1))
        pid_file = os.path.join(pid_dir, "{0}_{1}".format(pid_index+1, pid_name))

        try:
            fileutil.write_file(pid_file, ustr(os.getpid()))
            logger.info(u"{0} running as process {1}", CURRENT_AGENT, ustr(os.getpid()))
        except Exception as e:
            pid_file = None
            logger.warn(
                u"Expection writing goal state agent {0} pid to {1}: {2}",
                CURRENT_AGENT,
                pid_file,
                ustr(e))

        return pid_files, pid_file
Esempio n. 3
0
    def run(self):
        # If provision is enabled, run default provision handler
        if conf.get_provision_enabled():
            logger.warn("Provisioning flag is enabled, this is not typical"
                        "in Ubuntu, please ensure your config is correct.")
            super(UbuntuProvisionHandler, self).run()
            return

        provisioned = os.path.join(conf.get_lib_dir(), "provisioned")
        if os.path.isfile(provisioned):
            logger.info("Provisioning already completed, skipping.")
            return

        logger.info("Running Ubuntu provisioning handler")
        self.wait_for_ovfenv()
        self.protocol_util.get_protocol()
        self.report_not_ready("Provisioning", "Starting")
        try:
            thumbprint = self.wait_for_ssh_host_key()
            fileutil.write_file(provisioned, "")
            logger.info("Finished provisioning")
        except ProvisionError as e:
            logger.error("Provisioning failed: {0}", ustr(e))
            self.report_not_ready("ProvisioningFailed", ustr(e))
            self.report_event(ustr(e))
            return

        self.report_ready(thumbprint)
        self.report_event("Provision succeed", is_success=True)
Esempio n. 4
0
    def run(self, child_args=None):
        logger.info("{0} Version:{1}", AGENT_LONG_NAME, AGENT_VERSION)
        logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION)
        logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR,
                    PY_VERSION_MICRO)

        self.check_pid()
        self.initialize_environment()

        CGroups.setup()

        # If FIPS is enabled, set the OpenSSL environment variable
        # Note:
        # -- Subprocesses inherit the current environment
        if conf.get_fips_enabled():
            os.environ[OPENSSL_FIPS_ENVIRONMENT] = '1'

        while self.running:
            try:
                self.daemon(child_args)
            except Exception as e:
                err_msg = traceback.format_exc()
                add_event(name=AGENT_NAME, is_success=False, message=ustr(err_msg),
                          op=WALAEventOperation.UnhandledError)
                logger.warn("Daemon ended with exception -- Sleep 15 seconds and restart daemon")
                time.sleep(15)
Esempio n. 5
0
    def remove_firewall(self, dst_ip=None, uid=None):
        # If a previous attempt failed, do not retry
        global _enable_firewall
        if not _enable_firewall:
            return False

        try:
            if dst_ip is None or uid is None:
                msg = "Missing arguments to enable_firewall"
                logger.warn(msg)
                raise Exception(msg)

            wait = self.get_firewall_will_wait()

            # This rule was <= 2.2.25 only, and may still exist on some VMs.  Until 2.2.25
            # has aged out, keep this cleanup in place.
            self._delete_rule(FIREWALL_DELETE_CONNTRACK_ACCEPT.format(wait, dst_ip))

            self._delete_rule(FIREWALL_DELETE_OWNER_ACCEPT.format(wait, dst_ip, uid))
            self._delete_rule(FIREWALL_DELETE_CONNTRACK_DROP.format(wait, dst_ip))

            return True

        except Exception as e:
            _enable_firewall = False
            logger.info("Unable to remove firewall -- "
                        "no further attempts will be made: "
                        "{0}".format(ustr(e)))
            return False
Esempio n. 6
0
    def _get_all_interfaces(self):
        """
        Return a dictionary mapping from interface name to IPv4 address.
        Interfaces without a name are ignored.
        """
        expected=16 # how many devices should I expect...
        struct_size = DefaultOSUtil._get_struct_ifconf_size()
        array_size = expected * struct_size

        buff = array.array('B', b'\0' * array_size)
        param = struct.pack('iL', array_size, buff.buffer_info()[0])

        sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)
        ret = fcntl.ioctl(sock.fileno(), IOCTL_SIOCGIFCONF, param)
        retsize = (struct.unpack('iL', ret)[0])
        sock.close()

        if retsize == array_size:
            logger.warn(('SIOCGIFCONF returned more than {0} up '
                         'network interfaces.'), expected)

        ifconf_buff = buff.tostring()

        ifaces = {}
        for i in range(0, array_size, struct_size):
            iface = ifconf_buff[i:i+IFNAMSIZ].split(b'\0', 1)[0]
            if len(iface) > 0:
                iface_name = iface.decode('latin-1')
                if iface_name not in ifaces:
                    ifaces[iface_name] = socket.inet_ntoa(ifconf_buff[i+20:i+24])
        return ifaces
Esempio n. 7
0
    def get_firewall_dropped_packets(self, dst_ip=None):
        # If a previous attempt failed, do not retry
        global _enable_firewall
        if not _enable_firewall:
            return 0

        try:
            wait = self.get_firewall_will_wait()

            rc, output = shellutil.run_get_output(FIREWALL_PACKETS.format(wait), log_cmd=False)
            if rc == 3:
                # Transient error  that we ignore.  This code fires every loop
                # of the daemon (60m), so we will get the value eventually.
                return 0

            if rc != 0:
                return -1

            pattern = re.compile(PACKET_PATTERN.format(dst_ip))
            for line in output.split('\n'):
                m = pattern.match(line)
                if m is not None:
                    return int(m.group(1))
            
            return 0

        except Exception as e:
            _enable_firewall = False
            logger.warn("Unable to retrieve firewall packets dropped"
                        "{0}".format(ustr(e)))
            return -1
Esempio n. 8
0
 def device_for_ide_port(self, port_id):
     """
     Return device name attached to ide port 'n'.
     """
     if port_id > 3:
         return None
     g0 = "00000000"
     if port_id > 1:
         g0 = "00000001"
         port_id = port_id - 2
     device = None
     path = "/sys/bus/vmbus/devices/"
     if os.path.exists(path):
         try:
             for vmbus in os.listdir(path):
                 deviceid = fileutil.read_file(os.path.join(path, vmbus, "device_id"))
                 guid = deviceid.lstrip('{').split('-')
                 if guid[0] == g0 and guid[1] == "000" + ustr(port_id):
                     for root, dirs, files in os.walk(path + vmbus):
                         if root.endswith("/block"):
                             device = dirs[0]
                             break
                         else:
                             # older distros
                             for d in dirs:
                                 if ':' in d and "block" == d.split(':')[0]:
                                     device = d.split(':')[1]
                                     break
                     break
         except OSError as oe:
             logger.warn('Could not obtain device for IDE port {0}: {1}', port_id, ustr(oe))
     return device
Esempio n. 9
0
    def get_installed_version(self):
        lastest_version = None

        for path in glob.iglob(os.path.join(conf.get_lib_dir(), self.ext_handler.name + "-*")):
            if not os.path.isdir(path):
                continue

            separator = path.rfind('-')
            version = FlexibleVersion(path[separator+1:])
            existing_state = os.path.join(path, 'config', 'HandlerState')
            should_remove = False

            if not os.path.exists(existing_state):
                should_remove = True
            else:
                with open(existing_state) as fh:
                    existing_state_text = fh.read()
                    if existing_state_text is None or \
                       existing_state == ExtHandlerState.NotInstalled:
                        should_remove = True

            if should_remove:
                logger.warn("Extension directory does not contain a valid "
                            "status, removing [{0}]".format(path))
                shutil.rmtree(path, ignore_errors=True)
                continue
            else:
                logger.verbose("Extension directory contains valid status "
                               "[{0}]".format(path))

            if lastest_version is None or lastest_version < version:
                lastest_version = version

        return str(lastest_version) if lastest_version is not None else None
Esempio n. 10
0
    def run(self):
        # If provision is enabled, run default provision handler
        if conf.get_provision_enabled():
            logger.warn("Provisioning flag is enabled, which overrides using "
                        "cloud-init; running the default provisioning code")
            super(CloudInitProvisionHandler, self).run()
            return

        try:
            if super(CloudInitProvisionHandler, self).is_provisioned():
                logger.info("Provisioning already completed, skipping.")
                return

            utc_start = datetime.utcnow()
            logger.info("Running CloudInit provisioning handler")
            self.wait_for_ovfenv()
            self.protocol_util.get_protocol()
            self.report_not_ready("Provisioning", "Starting")

            thumbprint = self.wait_for_ssh_host_key()
            self.write_provisioned()
            logger.info("Finished provisioning")

            self.report_ready(thumbprint)
            self.report_event("Provisioning with cloud-init succeeded ({0}s)".format(self._get_uptime_seconds()),
                is_success=True,
                duration=elapsed_milliseconds(utc_start))

        except ProvisionError as e:
            msg = "Provisioning with cloud-init failed: {0} ({1}s)".format(ustr(e), self._get_uptime_seconds())
            logger.error(msg)
            self.report_not_ready("ProvisioningFailed", ustr(e))
            self.report_event(msg)
            return
Esempio n. 11
0
 def wait_for_ovfenv(self, max_retry=1800, sleep_time=1):
     """
     Wait for cloud-init to copy ovf-env.xml file from provision ISO
     """
     ovf_file_path = os.path.join(conf.get_lib_dir(), OVF_FILE_NAME)
     for retry in range(0, max_retry):
         if os.path.isfile(ovf_file_path):
             try:
                 ovf_env = OvfEnv(fileutil.read_file(ovf_file_path))
                 self.handle_provision_guest_agent(ovf_env.provision_guest_agent)
                 return
             except ProtocolError as pe:
                 raise ProvisionError("OVF xml could not be parsed "
                                      "[{0}]: {1}".format(ovf_file_path,
                                                          ustr(pe)))
         else:
             if retry < max_retry - 1:
                 logger.info(
                     "Waiting for cloud-init to copy ovf-env.xml to {0} "
                     "[{1} retries remaining, "
                     "sleeping {2}s]".format(ovf_file_path,
                                             max_retry - retry,
                                             sleep_time))
                 if not self.validate_cloud_init():
                     logger.warn("cloud-init does not appear to be running")
                 time.sleep(sleep_time)
     raise ProvisionError("Giving up, ovf-env.xml was not copied to {0} "
                          "after {1}s".format(ovf_file_path,
                                              max_retry * sleep_time))
Esempio n. 12
0
 def wait_for_ssh_host_key(self, max_retry=1800, sleep_time=1):
     """
     Wait for cloud-init to generate ssh host key
     """
     keypair_type = conf.get_ssh_host_keypair_type()
     path = conf.get_ssh_key_public_path()
     for retry in range(0, max_retry):
         if os.path.isfile(path):
             logger.info("ssh host key found at: {0}".format(path))
             try:
                 thumbprint = self.get_ssh_host_key_thumbprint(chk_err=False)
                 logger.info("Thumbprint obtained from : {0}".format(path))
                 return thumbprint
             except ProvisionError:
                 logger.warn("Could not get thumbprint from {0}".format(path))
         if retry < max_retry - 1:
             logger.info("Waiting for ssh host key be generated at {0} "
                         "[{1} attempts remaining, "
                         "sleeping {2}s]".format(path,
                                                 max_retry - retry,
                                                 sleep_time))
             if not self.validate_cloud_init():
                 logger.warn("cloud-init does not appear to be running")
             time.sleep(sleep_time)
     raise ProvisionError("Giving up, ssh host key was not found at {0} "
                          "after {1}s".format(path,
                                              max_retry * sleep_time))
Esempio n. 13
0
 def download_ext_handler_pkg(self, uri, headers=None):
     try:
         resp = restutil.http_get(uri, chk_proxy=True, headers=headers)
         if resp.status == restutil.httpclient.OK:
             return resp.read()
     except Exception as e:
         logger.warn("Failed to download from: {0}".format(uri), e)
Esempio n. 14
0
def migrate_handler_state():
    handler_state_path = os.path.join(conf.get_lib_dir(), "handler_state")
    if not os.path.isdir(handler_state_path):
        return

    for handler_path in glob.iglob(os.path.join(handler_state_path, "*")):
        handler = os.path.basename(handler_path)
        handler_config_path = os.path.join(conf.get_lib_dir(), handler, "config")
        if os.path.isdir(handler_config_path):
            for file in ("State", "Status"):
                from_path = os.path.join(handler_state_path, handler, file.lower())
                to_path = os.path.join(handler_config_path, "Handler" + file)
                if os.path.isfile(from_path) and not os.path.isfile(to_path):
                    try:
                        shutil.move(from_path, to_path)
                    except Exception as e:
                        logger.warn(
                            "Exception occurred migrating {0} {1} file: {2}",
                            handler,
                            file,
                            str(e))

    try:
        shutil.rmtree(handler_state_path)
    except Exception as e:
        logger.warn("Exception occurred removing {0}: {1}", handler_state_path, str(e))
    return
Esempio n. 15
0
    def mount_dvd(self,
                  max_retry=6,
                  chk_err=True,
                  dvd_device=None,
                  mount_point=None,
                  sleep_time=5):
        if dvd_device is None:
            dvd_device = self.get_dvd_device()
        if mount_point is None:
            mount_point = conf.get_dvd_mount_point()
        if not os.path.isdir(mount_point):
            os.makedirs(mount_point)

        for retry in range(0, max_retry):
            retcode = self.mount(dvd_device,
                                mount_point,
                                option="-o ro -t udf",
                                chk_err=False)
            if retcode == 0:
                logger.info("Successfully mounted DVD")
                return
            if retry < max_retry - 1:
                mountlist = shellutil.run_get_output("/sbin/mount")[1]
                existing = self.get_mount_point(mountlist, dvd_device)
                if existing is not None:
                    logger.info("{0} is mounted at {1}", dvd_device, existing)
                    return
                logger.warn("Mount DVD failed: retry={0}, ret={1}", retry,
                            retcode)
                time.sleep(sleep_time)
        if chk_err:
            raise OSUtilError("Failed to mount DVD.")
Esempio n. 16
0
    def mount_resource_disk(self, mount_point, fs):
        device = self.osutil.device_for_ide_port(1)
        if device is None:
            raise ResourceDiskError("unable to detect disk topology")

        device = "/dev/" + device
        mountlist = shellutil.run_get_output("mount")[1]
        existing = self.osutil.get_mount_point(mountlist, device)

        if(existing):
            logger.info("Resource disk {0}1 is already mounted", device)
            return existing

        fileutil.mkdir(mount_point, mode=0o755)

        logger.info("Detect GPT...")
        partition = device + "1"
        ret = shellutil.run_get_output("parted {0} print".format(device))
        if ret[0]:
            raise ResourceDiskError("({0}) {1}".format(device, ret[1]))

        if "gpt" in ret[1]:
            logger.info("GPT detected")
            logger.info("Get GPT partitions")
            parts = [x for x in ret[1].split("\n") if re.match("^\s*[0-9]+", x)]
            logger.info("Found more than {0} GPT partitions.", len(parts))
            if len(parts) > 1:
                logger.info("Remove old GPT partitions")
                for i in range(1, len(parts) + 1):
                    logger.info("Remove partition: {0}", i)
                    shellutil.run("parted {0} rm {1}".format(device, i))

                logger.info("Create a new GPT partition using entire disk space")
                shellutil.run("parted {0} mkpart primary 0% 100%".format(device))

                logger.info("Format partition: {0} with fstype {1}",partition,fs)
                shellutil.run("mkfs." + fs + " " + partition + " -F")
        else:
            logger.info("GPT not detected")
            logger.info("Check fstype")
            ret = shellutil.run_get_output("sfdisk -q -c {0} 1".format(device))
            if ret[1].rstrip() == "7" and fs != "ntfs":
                logger.info("The partition is formatted with ntfs")
                logger.info("Format partition: {0} with fstype {1}",partition,fs)
                shellutil.run("sfdisk -c {0} 1 83".format(device))
                shellutil.run("mkfs." + fs + " " + partition + " -F")

        logger.info("Mount resource disk")
        ret = shellutil.run("mount {0} {1}".format(partition, mount_point),
                                chk_err=False)
        if ret:
            logger.warn("Failed to mount resource disk. Retry mounting")
            shellutil.run("mkfs." + fs + " " + partition + " -F")
            ret = shellutil.run("mount {0} {1}".format(partition, mount_point))
            if ret:
                raise ResourceDiskError("({0}) {1}".format(partition, ret))

        logger.info("Resource disk ({0}) is mounted at {1} with fstype {2}",
                    device, mount_point, fs)
        return mount_point
Esempio n. 17
0
    def process(self):
        try:
            RDMADeviceHandler.update_dat_conf(dapl_config_paths, self.ipv4_addr)

            skip_rdma_device = False
            retcode,out = shellutil.run_get_output("modinfo hv_network_direct")
            if retcode == 0:
                version = re.search("version:\s+(\d+)\.(\d+)\.(\d+)\D", out, re.IGNORECASE)
                if version:
                    v1 = int(version.groups(0)[0])
                    v2 = int(version.groups(0)[1])
                    if v1>4 or v1==4 and v2>0:
                        logger.info("Skip setting /dev/hvnd_rdma on 4.1 or later")
                        skip_rdma_device = True
                else:
                    logger.info("RDMA: hv_network_direct driver version not present, assuming 4.0.x or older.")
            else:
                logger.warn("RDMA: failed to get module info on hv_network_direct.")

            if not skip_rdma_device:
                RDMADeviceHandler.wait_rdma_device(
                    self.rdma_dev, self.device_check_timeout_sec, self.device_check_interval_sec)
                RDMADeviceHandler.write_rdma_config_to_device(
                    self.rdma_dev, self.ipv4_addr, self.mac_addr)

            RDMADeviceHandler.update_network_interface(self.mac_addr, self.ipv4_addr)
        except Exception as e:
            logger.error("RDMA: device processing failed: {0}".format(e))
Esempio n. 18
0
    def mount_dvd(self, max_retry=6, chk_err=True, dvd_device=None, mount_point=None):
        if dvd_device is None:
            dvd_device = self.get_dvd_device()
        if mount_point is None:
            mount_point = conf.get_dvd_mount_point()
        mountlist = shellutil.run_get_output("mount")[1]
        existing = self.get_mount_point(mountlist, dvd_device)
        if existing is not None: #Already mounted
            logger.info("{0} is already mounted at {1}", dvd_device, existing)
            return
        if not os.path.isdir(mount_point):
            os.makedirs(mount_point)

        for retry in range(0, max_retry):
            retcode = self.mount(dvd_device, mount_point, option="-o ro -t udf,iso9660",
                                 chk_err=chk_err)
            if retcode == 0:
                logger.info("Successfully mounted dvd")
                return
            if retry < max_retry - 1:
                logger.warn("Mount dvd failed: retry={0}, ret={1}", retry,
                            retcode)
                time.sleep(5)
        if chk_err:
            raise OSUtilError("Failed to mount dvd.")
Esempio n. 19
0
    def get_first_if(self):
        """
        Return the interface name, and ip addr of the
        first active non-loopback interface.
        """
        iface=''
        expected=16 # how many devices should I expect...
        struct_size=40 # for 64bit the size is 40 bytes
        sock = socket.socket(socket.AF_INET,
                             socket.SOCK_DGRAM,
                             socket.IPPROTO_UDP)
        buff=array.array('B', b'\0' * (expected * struct_size))
        param = struct.pack('iL',
                            expected*struct_size,
                            buff.buffer_info()[0])
        ret = fcntl.ioctl(sock.fileno(), 0x8912, param)
        retsize=(struct.unpack('iL', ret)[0])
        if retsize == (expected * struct_size):
            logger.warn(('SIOCGIFCONF returned more than {0} up '
                         'network interfaces.'), expected)
        sock = buff.tostring()
        primary = bytearray(self.get_primary_interface(), encoding='utf-8')
        for i in range(0, struct_size * expected, struct_size):
            iface=sock[i:i+16].split(b'\0', 1)[0]
            if len(iface) == 0 or self.is_loopback(iface) or iface != primary:
                # test the next one
                if len(iface) != 0 and not self.disable_route_warning:
                    logger.info('interface [{0}] skipped'.format(iface))
                continue
            else:
                # use this one
                logger.info('interface [{0}] selected'.format(iface))
                break

        return iface.decode('latin-1'), socket.inet_ntoa(sock[i+20:i+24])
Esempio n. 20
0
    def _purge_agents(self):
        """
        Remove from disk all directories and .zip files of unknown agents
        (without removing the current, running agent).
        """
        path = os.path.join(conf.get_lib_dir(), "{0}-*".format(AGENT_NAME))

        known_versions = [agent.version for agent in self.agents]
        if not is_current_agent_installed() and CURRENT_VERSION not in known_versions:
            logger.warn(
                u"Running Agent {0} was not found in the agent manifest - adding to list",
                CURRENT_VERSION)
            known_versions.append(CURRENT_VERSION)

        for agent_path in glob.iglob(path):
            try:
                name = fileutil.trim_ext(agent_path, "zip")
                m = AGENT_DIR_PATTERN.match(name)
                if m is not None and FlexibleVersion(m.group(1)) not in known_versions:
                    if os.path.isfile(agent_path):
                        logger.info(u"Purging outdated Agent file {0}", agent_path)
                        os.remove(agent_path)
                    else:
                        logger.info(u"Purging outdated Agent directory {0}", agent_path)
                        shutil.rmtree(agent_path)
            except Exception as e:
                logger.warn(u"Purging {0} raised exception: {1}", agent_path, ustr(e))
        return
Esempio n. 21
0
    def init_sysinfo(self):
        osversion = "{0}:{1}-{2}-{3}:{4}".format(platform.system(),
                                                 DISTRO_NAME,
                                                 DISTRO_VERSION,
                                                 DISTRO_CODE_NAME,
                                                 platform.release())
        self.sysinfo.append(TelemetryEventParam("OSVersion", osversion))
        self.sysinfo.append(
            TelemetryEventParam("GAVersion", CURRENT_AGENT))

        try:
            ram = self.osutil.get_total_mem()
            processors = self.osutil.get_processor_cores()
            self.sysinfo.append(TelemetryEventParam("RAM", ram))
            self.sysinfo.append(TelemetryEventParam("Processors", processors))
        except OSUtilError as e:
            logger.warn("Failed to get system info: {0}", e)

        try:
            protocol = self.protocol_util.get_protocol()
            vminfo = protocol.get_vminfo()
            self.sysinfo.append(TelemetryEventParam("VMName",
                                                    vminfo.vmName))
            self.sysinfo.append(TelemetryEventParam("TenantName",
                                                    vminfo.tenantName))
            self.sysinfo.append(TelemetryEventParam("RoleName",
                                                    vminfo.roleName))
            self.sysinfo.append(TelemetryEventParam("RoleInstanceName",
                                                    vminfo.roleInstanceName))
            self.sysinfo.append(TelemetryEventParam("ContainerId",
                                                    vminfo.containerId))
        except ProtocolError as e:
            logger.warn("Failed to get system info: {0}", e)
Esempio n. 22
0
    def update_tracked(ext_handlers):
        """
        Track CGroups for all enabled extensions.
        Track CGroups for services created by enabled extensions.
        Stop tracking CGroups for not-enabled extensions.

        :param List(ExtHandler) ext_handlers:
        """
        if not CGroups.enabled():
            return

        not_enabled_extensions = set()
        for extension in ext_handlers:
            if extension.properties.state == u"enabled":
                CGroupsTelemetry.track_extension(extension.name)
            else:
                not_enabled_extensions.add(extension.name)

        names_now_tracked = set(CGroupsTelemetry._tracked.keys())
        if CGroupsTelemetry.tracked_names != names_now_tracked:
            now_tracking = " ".join("[{0}]".format(name) for name in sorted(names_now_tracked))
            if len(now_tracking):
                logger.info("After updating cgroup telemetry, tracking {0}".format(now_tracking))
            else:
                logger.warn("After updating cgroup telemetry, tracking no cgroups.")
            CGroupsTelemetry.tracked_names = names_now_tracked
Esempio n. 23
0
    def wireserver_route_exists(self):
        """
        Determine whether a route to the known wireserver
        ip already exists, and if so use that as the endpoint.
        This is true when running in a virtual network.
        :return: True if a route to KNOWN_WIRESERVER_IP exists.
        """
        route_exists = False
        logger.info("Test for route to {0}".format(KNOWN_WIRESERVER_IP))
        try:
            route_file = '/proc/net/route'
            if os.path.exists(route_file) and \
                    KNOWN_WIRESERVER_IP_ENTRY in open(route_file).read():
                # reset self.gateway and self.routes
                # we do not need to alter the routing table
                self.endpoint = KNOWN_WIRESERVER_IP
                self.gateway = None
                self.routes = None
                route_exists = True
                logger.info("Route to {0} exists".format(KNOWN_WIRESERVER_IP))
            else:
                logger.warn("No route exists to {0}".format(KNOWN_WIRESERVER_IP))
        except Exception as e:
            logger.error(
                "Could not determine whether route exists to {0}: {1}".format(
                    KNOWN_WIRESERVER_IP, e))

        return route_exists
Esempio n. 24
0
    def save_event(self, data):
        if self.event_dir is None:
            logger.warn("Event reporter is not initialized.")
            return

        if not os.path.exists(self.event_dir):
            os.mkdir(self.event_dir)
            os.chmod(self.event_dir, 0o700)

        existing_events = os.listdir(self.event_dir)
        if len(existing_events) >= 1000:
            existing_events.sort()
            oldest_files = existing_events[:-999]
            logger.warn("Too many files under: {0}, removing oldest".format(self.event_dir))
            try:
                for f in oldest_files:
                    os.remove(os.path.join(self.event_dir, f))
            except IOError as e:
                raise EventError(e)

        filename = os.path.join(self.event_dir,
                                ustr(int(time.time() * 1000000)))
        try:
            with open(filename + ".tmp", 'wb+') as hfile:
                hfile.write(data.encode("utf-8"))
            os.rename(filename + ".tmp", filename + ".tld")
        except IOError as e:
            raise EventError("Failed to write events to file:{0}", e)
Esempio n. 25
0
    def _download(self):
        for uri in self.pkg.uris:
            if not HostPluginProtocol.is_default_channel() and self._fetch(uri.uri):
                break
            elif self.host is not None and self.host.ensure_initialized():
                if not HostPluginProtocol.is_default_channel():
                    logger.warn("Download unsuccessful, falling back to host plugin")
                else:
                    logger.verbose("Using host plugin as default channel")

                uri, headers = self.host.get_artifact_request(uri.uri, self.host.manifest_uri)
                if self._fetch(uri, headers=headers):
                    if not HostPluginProtocol.is_default_channel():
                        logger.verbose("Setting host plugin as default channel")
                        HostPluginProtocol.set_default_channel(True)
                    break
                else:
                    logger.warn("Host plugin download unsuccessful")
            else:
                logger.error("No download channels available")

        if not os.path.isfile(self.get_agent_pkg_path()):
            msg = u"Unable to download Agent {0} from any URI".format(self.name)
            add_event(
                AGENT_NAME,
                op=WALAEventOperation.Download,
                version=CURRENT_VERSION,
                is_success=False,
                message=msg)
            raise UpdateError(msg)
        return
Esempio n. 26
0
    def collect_and_send_events(self):
        event_list = TelemetryEventList()
        event_dir = os.path.join(conf.get_lib_dir(), "events")
        event_files = os.listdir(event_dir)
        for event_file in event_files:
            if not event_file.endswith(".tld"):
                continue
            event_file_path = os.path.join(event_dir, event_file)
            try:
                data_str = self.collect_event(event_file_path)
            except EventError as e:
                logger.error("{0}", e)
                continue

            try:
                event = parse_event(data_str)
                self.add_sysinfo(event)
                event_list.events.append(event)
            except (ValueError, ProtocolError) as e:
                logger.warn("Failed to decode event file: {0}", e)
                continue

        if len(event_list.events) == 0:
            return

        try:
            protocol = self.protocol_util.get_protocol()
            protocol.report_event(event_list)
        except ProtocolError as e:
            logger.error("{0}", e)
Esempio n. 27
0
    def _ensure_no_orphans(self, orphan_wait_interval=ORPHAN_WAIT_INTERVAL):
        previous_pid_file, pid_file = self._write_pid_file()
        if previous_pid_file is not None:
            try:
                pid = fileutil.read_file(previous_pid_file)
                wait_interval = orphan_wait_interval
                while self.osutil.check_pid_alive(pid):
                    wait_interval -= GOAL_STATE_INTERVAL
                    if wait_interval <= 0:
                        logger.warn(
                            u"{0} forcibly terminated orphan process {1}",
                            CURRENT_AGENT,
                            pid)
                        os.kill(pid, signal.SIGKILL)
                        break
                    
                    logger.info(
                        u"{0} waiting for orphan process {1} to terminate",
                        CURRENT_AGENT,
                        pid)
                    time.sleep(GOAL_STATE_INTERVAL)

            except Exception as e:
                logger.warn(
                    u"Exception occurred waiting for orphan agent to terminate: {0}",
                    ustr(e))
        return
Esempio n. 28
0
 def _load_error(self):
     try:
         self.error = GuestAgentError(self.get_agent_error_file())
         self.error.load()
         logger.verbose(u"Agent {0} error state: {1}", self.name, ustr(self.error))
     except Exception as e:
         logger.warn(u"Agent {0} failed loading error state: {1}", self.name, ustr(e))
Esempio n. 29
0
    def put_vm_status(self, status_blob, sas_url, config_blob_type=None):
        """
        Try to upload the VM status via the host plugin /status channel
        :param sas_url: the blob SAS url to pass to the host plugin
        :param config_blob_type: the blob type from the extension config
        :type status_blob: StatusBlob
        """
        if not self.ensure_initialized():
            raise ProtocolError("HostGAPlugin: HostGAPlugin is not available")

        if status_blob is None or status_blob.vm_status is None:
            raise ProtocolError("HostGAPlugin: Status blob was not provided")

        logger.verbose("HostGAPlugin: Posting VM status")
        try:
            blob_type = status_blob.type if status_blob.type else config_blob_type

            if blob_type == "BlockBlob":
                self._put_block_blob_status(sas_url, status_blob)
            else:
                self._put_page_blob_status(sas_url, status_blob)

            if not HostPluginProtocol.is_default_channel():
                logger.info("HostGAPlugin: Setting host plugin as default channel")
                HostPluginProtocol.set_default_channel(True)
        except Exception as e:
            message = "HostGAPlugin: Exception Put VM status: {0}".format(e)
            logger.error(message)
            from azurelinuxagent.common.event import WALAEventOperation, report_event
            report_event(op=WALAEventOperation.ReportStatus,
                         is_success=False,
                         message=message)
            logger.warn("HostGAPlugin: resetting default channel")
            HostPluginProtocol.set_default_channel(False)
Esempio n. 30
0
    def is_provisioned(self):
        '''
        A VM is considered provisionend *anytime* the provisioning
        sentinel file exists and not provisioned *anytime* the file
        is absent.

        If the VM was provisioned using an agent that did not record
        the VM unique identifier, the provisioning file will be re-written
        to include the identifier.

        A warning is logged *if* the VM unique identifier has changed
        since VM was provisioned.
        '''
        if not os.path.isfile(self.provisioned_file_path()):
            return False

        s = fileutil.read_file(self.provisioned_file_path()).strip()
        if not self.osutil.is_current_instance_id(s):
            if len(s) > 0:
                logger.warn("VM is provisioned, "
                            "but the VM unique identifier has changed -- "
                            "clearing cached state")
                from azurelinuxagent.pa.deprovision \
                    import get_deprovision_handler
                deprovision_handler = get_deprovision_handler()
                deprovision_handler.run_changed_unique_id()

            self.write_provisioned()
            self.report_ready()

        return True
Esempio n. 31
0
    def add_to_extension_cgroup(name, pid=int(os.getpid())):
        """
        Create cgroup directories for this extension in each of the hierarchies and add this process to the new cgroup.
        Should only be called when creating sub-processes and invoked inside the fork/exec window. As a result,
        there's no point in returning the CGroups object itself; the goal is to move the child process into the
        cgroup before the new code even starts running.

        :param str name: Short name of extension, suitable for naming directories in the filesystem
        :param int pid: Process id of extension to be added to the cgroup
        """
        if not CGroups.enabled():
            return
        if name == AGENT_NAME:
            logger.warn('Extension cgroup name cannot match agent cgroup name ({0})'.format(AGENT_NAME))
            return

        try:
            logger.info("Move process {0} into cgroups for extension {1}".format(pid, name))
            CGroups.for_extension(name).add(pid)
        except Exception as ex:
            logger.warn("Unable to move process {0} into cgroups for extension {1}: {2}".format(pid, name, ex))
    def _operation(self):

        if self._send_telemetry_events_handler.stopped():
            logger.warn(
                "{0} service is not running, skipping current iteration".
                format(self._send_telemetry_events_handler.get_thread_name()))
            return

        delete_all_event_files = True
        extension_handler_with_event_dirs = []

        try:
            extension_handler_with_event_dirs = self._get_extension_events_dir_with_handler_name(
                conf.get_ext_log_dir())

            if not extension_handler_with_event_dirs:
                logger.verbose("No Extension events directory exist")
                return

            for extension_handler_with_event_dir in extension_handler_with_event_dirs:
                handler_name = extension_handler_with_event_dir[0]
                handler_event_dir_path = extension_handler_with_event_dir[1]
                self._capture_extension_events(handler_name,
                                               handler_event_dir_path)
        except ServiceStoppedError:
            # Since the service stopped, we should not delete the extension files and retry sending them whenever
            # the telemetry service comes back up
            delete_all_event_files = False
        except Exception as error:
            msg = "Unknown error occurred when trying to collect extension events:{0}".format(
                textutil.format_exception(error))
            add_event(op=WALAEventOperation.ExtensionTelemetryEventProcessing,
                      message=msg,
                      is_success=False)
        finally:
            # Always ensure that the events directory are being deleted each run except when Telemetry Service is stopped,
            # even if we run into an error and dont process them this run.
            if delete_all_event_files:
                self._ensure_all_events_directories_empty(
                    extension_handler_with_event_dirs)
Esempio n. 33
0
def http_request(method, url, data, headers=None, max_retry=3,
                 chk_proxy=False):
    """
    Sending http request to server
    On error, sleep 10 and retry max_retry times.
    """
    logger.verbose("HTTP Req: {0} {1}", method, url)
    logger.verbose("    Data={0}", data)
    logger.verbose("    Header={0}", headers)
    host, port, secure, rel_uri = _parse_url(url)

    # Check proxy
    proxy_host, proxy_port = (None, None)
    if chk_proxy:
        proxy_host, proxy_port = get_http_proxy()

    # If httplib module is not built with ssl support. Fallback to http
    if secure and not hasattr(httpclient, "HTTPSConnection"):
        logger.warn("httplib is not built with ssl support")
        secure = False

    # If httplib module doesn't support https tunnelling. Fallback to http
    if secure and proxy_host is not None and proxy_port is not None \
            and not hasattr(httpclient.HTTPSConnection, "set_tunnel"):
        logger.warn("httplib does not support https tunnelling "
                    "(new in python 2.7)")
        secure = False

    for retry in range(0, max_retry):
        try:
            resp = _http_request(method, host, rel_uri, port=port, data=data,
                                 secure=secure, headers=headers,
                                 proxy_host=proxy_host, proxy_port=proxy_port)
            logger.verbose("HTTP Resp: Status={0}", resp.status)
            logger.verbose("    Header={0}", resp.getheaders())
            return resp
        except httpclient.HTTPException as e:
            logger.warn('HTTPException {0}, args:{1}', e, repr(e.args))
        except IOError as e:
            logger.warn('Socket IOError {0}, args:{1}', e, repr(e.args))

        if retry < max_retry - 1:
            logger.info("Retry={0}, {1} {2}", retry, method, url)
            time.sleep(RETRY_WAITING_INTERVAL)

    if url is not None and len(url) > 100:
        url_log = url[0: 100]  # In case the url is too long
    else:
        url_log = url
    raise HttpError("HTTP Err: {0} {1}".format(method, url_log))
Esempio n. 34
0
    def mount_dvd(self,
                  max_retry=6,
                  chk_err=True,
                  dvd_device=None,
                  mount_point=None,
                  sleep_time=5):
        if dvd_device is None:
            dvd_device = self.get_dvd_device()
        if mount_point is None:
            mount_point = conf.get_dvd_mount_point()
        mount_list = shellutil.run_get_output("mount")[1]
        existing = self.get_mount_point(mount_list, dvd_device)

        if existing is not None:
            # already mounted
            logger.info("{0} is already mounted at {1}", dvd_device, existing)
            return

        if not os.path.isdir(mount_point):
            os.makedirs(mount_point)

        err = ''
        for retry in range(1, max_retry):
            return_code, err = self.mount(dvd_device,
                                          mount_point,
                                          option="-o ro -t udf,iso9660",
                                          chk_err=False)
            if return_code == 0:
                logger.info("Successfully mounted dvd")
                return
            else:
                logger.warn(
                    "Mounting dvd failed [retry {0}/{1}, sleeping {2} sec]",
                    retry,
                    max_retry - 1,
                    sleep_time)
                if retry < max_retry:
                    time.sleep(sleep_time)
        if chk_err:
            raise OSUtilError("Failed to mount dvd device", inner=err)
Esempio n. 35
0
    def get_primary_interface(self):
        """
        Get the name of the primary interface, which is the one with the
        default route attached to it; if there are multiple default routes,
        the primary has the lowest Metric.
        :return: the interface which has the default route
        """
        # from linux/route.h
        RTF_GATEWAY = 0x02
        DEFAULT_DEST = "00000000"

        hdr_iface = "Iface"
        hdr_dest = "Destination"
        hdr_flags = "Flags"
        hdr_metric = "Metric"

        idx_iface = -1
        idx_dest = -1
        idx_flags = -1
        idx_metric = -1
        primary = None
        primary_metric = None

        if not self.disable_route_warning:
            logger.info("Examine /proc/net/route for primary interface")
        with open('/proc/net/route') as routing_table:
            idx = 0
            for header in filter(lambda h: len(h) > 0, routing_table.readline().strip(" \n").split("\t")):
                if header == hdr_iface:
                    idx_iface = idx
                elif header == hdr_dest:
                    idx_dest = idx
                elif header == hdr_flags:
                    idx_flags = idx
                elif header == hdr_metric:
                    idx_metric = idx
                idx = idx + 1
            for entry in routing_table.readlines():
                route = entry.strip(" \n").split("\t")
                if route[idx_dest] == DEFAULT_DEST and int(route[idx_flags]) & RTF_GATEWAY == RTF_GATEWAY:
                    metric = int(route[idx_metric])
                    iface = route[idx_iface]
                    if primary is None or metric < primary_metric:
                        primary = iface
                        primary_metric = metric

        if primary is None:
            primary = ''
            if not self.disable_route_warning:
                with open('/proc/net/route') as routing_table_fh:
                    routing_table_text = routing_table_fh.read()
                    logger.warn('Could not determine primary interface, '
                                'please ensure /proc/net/route is correct')
                    logger.warn('Contents of /proc/net/route:\n{0}'.format(routing_table_text))
                    logger.warn('Primary interface examination will retry silently')
                    self.disable_route_warning = True
        else:
            logger.info('Primary interface is [{0}]'.format(primary))
            self.disable_route_warning = False
        return primary
Esempio n. 36
0
    def provision_network_direct_rdma(self):
        RDMADeviceHandler.update_dat_conf(dapl_config_paths, self.ipv4_addr)

        if not conf.enable_check_rdma_driver():
            logger.info("RDMA: skip checking RDMA driver version")
            RDMADeviceHandler.update_network_interface(self.mac_addr, self.ipv4_addr)
            return

        skip_rdma_device = False
        module_name = "hv_network_direct"
        retcode, out = shellutil.run_get_output("modprobe -R %s" % module_name, chk_err=False)
        if retcode == 0:
            module_name = out.strip()
        else:
            logger.info("RDMA: failed to resolve module name. Use original name")
        retcode, out = shellutil.run_get_output("modprobe %s" % module_name)
        if retcode != 0:
            logger.error("RDMA: failed to load module %s" % module_name)
            return
        retcode, out = shellutil.run_get_output("modinfo %s" % module_name)
        if retcode == 0:
            version = re.search("version:\s+(\d+)\.(\d+)\.(\d+)\D", out, re.IGNORECASE)  # pylint: disable=W1401
            if version:
                v1 = int(version.groups(0)[0])
                v2 = int(version.groups(0)[1])
                if v1 > 4 or v1 == 4 and v2 > 0:
                    logger.info("Skip setting /dev/hvnd_rdma on 4.1 or later")
                    skip_rdma_device = True
            else:
                logger.info("RDMA: hv_network_direct driver version not present, assuming 4.0.x or older.")
        else:
            logger.warn("RDMA: failed to get module info on hv_network_direct.")

        if not skip_rdma_device:
            RDMADeviceHandler.wait_rdma_device(
                self.rdma_dev, self.device_check_timeout_sec, self.device_check_interval_sec)
            RDMADeviceHandler.write_rdma_config_to_device(
                self.rdma_dev, self.ipv4_addr, self.mac_addr)

        RDMADeviceHandler.update_network_interface(self.mac_addr, self.ipv4_addr)
Esempio n. 37
0
def migrate_handler_state():
    """
    Migrate handler state and status (if they exist) from an agent-owned directory into the
    handler-owned config directory

    Notes:
     - The v2.0.x branch wrote all handler-related state into the handler-owned config
       directory (e.g., /var/lib/waagent/Microsoft.Azure.Extensions.LinuxAsm-2.0.1/config).
     - The v2.1.x branch original moved that state into an agent-owned handler
       state directory (e.g., /var/lib/waagent/handler_state).
     - This move can cause v2.1.x agents to multiply invoke a handler's install command. It also makes
       clean-up more difficult since the agent must remove the state as well as the handler directory.
    """
    handler_state_path = os.path.join(conf.get_lib_dir(), "handler_state")
    if not os.path.isdir(handler_state_path):
        return

    for handler_path in glob.iglob(os.path.join(handler_state_path, "*")):
        handler = os.path.basename(handler_path)
        handler_config_path = os.path.join(conf.get_lib_dir(), handler,
                                           "config")
        if os.path.isdir(handler_config_path):
            for file in ("State", "Status"):
                from_path = os.path.join(handler_state_path, handler,
                                         file.lower())
                to_path = os.path.join(handler_config_path, "Handler" + file)
                if os.path.isfile(from_path) and not os.path.isfile(to_path):
                    try:
                        shutil.move(from_path, to_path)
                    except Exception as e:
                        logger.warn(
                            "Exception occurred migrating {0} {1} file: {2}",
                            handler, file, str(e))

    try:
        shutil.rmtree(handler_state_path)
    except Exception as e:
        logger.warn("Exception occurred removing {0}: {1}", handler_state_path,
                    str(e))
    return
Esempio n. 38
0
    def _download(self):
        for uri in self.pkg.uris:
            if not HostPluginProtocol.is_default_channel() and self._fetch(uri.uri):
                break

            elif self.host is not None and self.host.ensure_initialized():
                if not HostPluginProtocol.is_default_channel():
                    logger.warn("Download failed, switching to host plugin")
                else:
                    logger.verbose("Using host plugin as default channel")

                uri, headers = self.host.get_artifact_request(uri.uri, self.host.manifest_uri)
                try:
                    if self._fetch(uri, headers=headers, use_proxy=False):
                        if not HostPluginProtocol.is_default_channel():
                            logger.verbose("Setting host plugin as default channel")
                            HostPluginProtocol.set_default_channel(True)
                        break
                    else:
                        logger.warn("Host plugin download failed")

                # If the HostPlugin rejects the request,
                # let the error continue, but set to use the HostPlugin
                except ResourceGoneError:
                    HostPluginProtocol.set_default_channel(True)
                    raise

            else:
                logger.error("No download channels available")

        if not os.path.isfile(self.get_agent_pkg_path()):
            msg = u"Unable to download Agent {0} from any URI".format(self.name)
            add_event(
                AGENT_NAME,
                op=WALAEventOperation.Download,
                version=CURRENT_VERSION,
                is_success=False,
                message=msg)
            raise UpdateError(msg)
Esempio n. 39
0
    def test_telemetry_logger_add_log_event(self, mock_lib_dir, *_):
        mock_lib_dir.return_value = self.lib_dir
        __event_logger__.event_dir = self.event_dir
        prefix = "YoloLogger"

        logger.add_logger_appender(logger.AppenderType.TELEMETRY, logger.LogLevel.WARNING, path=add_log_event)
        logger.set_prefix(prefix)

        logger.warn('Test Log - Warning')

        event_files = os.listdir(__event_logger__.event_dir)
        self.assertEqual(1, len(event_files))

        log_file_event = os.path.join(__event_logger__.event_dir, event_files[0])
        try:
            with open(log_file_event) as logfile:
                logcontent = logfile.read()
                # Checking the contents of the event file.
                self.assertIn("Test Log - Warning", logcontent)
        except Exception as e:
            self.assertFalse(True, "The log file looks like it isn't correctly setup for this test. Take a look. "
                                   "{0}".format(e))
Esempio n. 40
0
    def test_telemetry_logger_check_all_file_logs_written_when_events_gt_MAX_NUMBER_OF_EVENTS(self, mock_lib_dir, *_):
        mock_lib_dir.return_value = self.lib_dir
        __event_logger__.event_dir = self.event_dir
        no_of_log_statements = MAX_NUMBER_OF_EVENTS + 100
        exception_caught = False
        prefix = "YoloLogger"

        logger.add_logger_appender(logger.AppenderType.FILE, logger.LogLevel.INFO, path=self.log_file)
        logger.add_logger_appender(logger.AppenderType.TELEMETRY, logger.LogLevel.WARNING, path=add_log_event)
        logger.set_prefix(prefix)

        # Calling logger.warn no_of_log_statements times would cause the telemetry appender to writing
        # 1000 events into the events dir, and then drop the remaining events. It should not generate the RuntimeError
        try:
            for i in range(0, no_of_log_statements):
                logger.warn('Test Log - {0} - 1 - Warning'.format(i))
        except RuntimeError:
            exception_caught = True

        self.assertFalse(exception_caught, msg="Caught a Runtime Error. This should not have been raised.")
        self.assertEqual(MAX_NUMBER_OF_EVENTS, len(os.listdir(__event_logger__.event_dir)))

        try:
            with open(self.log_file) as logfile:
                logcontent = logfile.readlines()

                # Checking the last log entry.
                # Subtracting 1 as range is exclusive of the upper bound
                self.assertIn("WARNING {1} Test Log - {0} - 1 - Warning".format(no_of_log_statements - 1, prefix),
                              logcontent[-1])

                # Checking the 1001st log entry. We know that 1001st entry would generate a PERIODIC message of too many
                # events, which should be captured in the log file as well.
                self.assertRegex(logcontent[1001], r"(.*WARNING\s*{0}\s*\[PERIODIC\]\s*Too many files under:.*{1}, "
                                                   r"current count\:\s*\d+,\s*removing oldest\s*.*)".format(prefix,
                                                                                                            self.event_dir))
        except Exception as e:
            self.assertFalse(True, "The log file looks like it isn't correctly setup for this test. "
                                   "Take a look. {0}".format(e))
Esempio n. 41
0
    def send_cgroup_telemetry(self):
        if self.last_cgroup_telemetry is None:
            self.last_cgroup_telemetry = datetime.datetime.utcnow()

        if datetime.datetime.utcnow() >= (self.last_telemetry_heartbeat + MonitorHandler.CGROUP_TELEMETRY_PERIOD):
            try:
                for cgroup_name, metrics in CGroupsTelemetry.collect_all_tracked().items():
                    for metric_group, metric_name, value in metrics:
                        if value > 0:
                            report_metric(metric_group, metric_name, cgroup_name, value)
            except Exception as e:
                logger.warn("Monitor: failed to collect cgroups performance metrics: {0}", ustr(e))
                logger.verbose(traceback.format_exc())

            # Look for extension cgroups we're not already tracking and track them
            try:
                CGroupsTelemetry.update_tracked(self.protocol.client.get_current_handlers())
            except Exception as e:
                logger.warn("Monitor: failed to update cgroups tracked extensions: {0}", ustr(e))
                logger.verbose(traceback.format_exc())

            self.last_cgroup_telemetry = datetime.datetime.utcnow()
Esempio n. 42
0
    def get_first_if(self):
        """Return the interface name, and ip addr of the management interface.

        We need to add a struct_size check here because, curiously, our 64bit
        platform is identified by python in Azure(Stack) as 32 bit and without
        adjusting the struct_size, we can't get the information we need.

        I believe this may be caused by only python i686 being shipped with
        BIG-IP instead of python x86_64??
        """
        iface = ''
        expected = 16  # how many devices should I expect...

        python_arc = platform.architecture()[0]
        if python_arc == '64bit':
            struct_size = 40  # for 64bit the size is 40 bytes
        else:
            struct_size = 32  # for 32bit the size is 32 bytes
        sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM,
                             socket.IPPROTO_UDP)
        buff = array.array('B', b'\0' * (expected * struct_size))
        param = struct.pack('iL', expected * struct_size,
                            buff.buffer_info()[0])
        ret = fcntl.ioctl(sock.fileno(), 0x8912, param)
        retsize = (struct.unpack('iL', ret)[0])
        if retsize == (expected * struct_size):
            logger.warn(('SIOCGIFCONF returned more than {0} up '
                         'network interfaces.'), expected)

        sock = array_to_bytes(buff)
        for i in range(0, struct_size * expected, struct_size):
            iface = self._format_single_interface_name(sock, i)

            # Azure public was returning "lo:1" when deploying WAF
            if b'lo' in iface:
                continue
            else:
                break
        return iface.decode('latin-1'), socket.inet_ntoa(sock[i + 20:i + 24])  # pylint: disable=undefined-loop-variable
Esempio n. 43
0
        def _invoke_cgroup_operation(self,
                                     operation,
                                     error_message,
                                     on_error=None):
            """
            Ensures the given operation is invoked only if cgroups are enabled and traps any errors on the operation.
            """
            if not self.enabled():
                return None

            try:
                return operation()
            except Exception as exception:
                logger.warn("{0} Error: {1}".format(error_message,
                                                    ustr(exception)))
                if on_error is not None:
                    try:
                        on_error(exception)
                    except Exception as exception:
                        logger.warn(
                            "CGroupConfigurator._invoke_cgroup_operation: {0}".
                            format(ustr(exception)))
Esempio n. 44
0
    def _emit_changes_in_default_configuration():
        try:

            def log_event(msg):
                logger.info(msg)
                add_event(AGENT_NAME,
                          op=WALAEventOperation.ConfigurationChange,
                          message=msg)

            def log_if_int_changed_from_default(name, current):
                default = conf.get_int_default_value(name)
                if default != current:
                    log_event(
                        "{0} changed from its default: {1}. New value: {2}".
                        format(name, default, current))

            def log_if_op_disabled(name, value):
                if not value:
                    log_event(
                        "{0} is set to False, not processing the operation".
                        format(name))

            log_if_int_changed_from_default("Extensions.GoalStatePeriod",
                                            conf.get_goal_state_period())
            log_if_op_disabled("OS.EnableFirewall", conf.enable_firewall())
            log_if_op_disabled("Extensions.Enabled",
                               conf.get_extensions_enabled())

            if conf.enable_firewall():
                log_if_int_changed_from_default(
                    "OS.EnableFirewallPeriod",
                    conf.get_enable_firewall_period())

            if conf.get_lib_dir() != "/var/lib/waagent":
                log_event("lib dir is in an unexpected location: {0}".format(
                    conf.get_lib_dir()))

        except Exception as e:
            logger.warn("Failed to log changes in configuration: {0}", ustr(e))
Esempio n. 45
0
 def __setup_binary_file(self):
     binary_file_path = os.path.join(conf.get_lib_dir(),
                                     self.BINARY_FILE_NAME)
     try:
         fileutil.write_file(
             binary_file_path,
             self.__BINARY_CONTENTS.format(
                 egg_path=self._current_agent_executable_path,
                 wire_ip=self._dst_ip,
                 user_id=self._uid,
                 wait=self._wait,
                 py_path=sys.executable))
         logger.info(
             "Successfully updated the Binary file {0} for firewall setup".
             format(binary_file_path))
     except Exception:
         logger.warn(
             "Unable to setup binary file, removing the service unit file {0} to ensure its not run on system reboot"
             .format(self.get_service_file_path()))
         self.__remove_file_without_raising(binary_file_path)
         self.__remove_file_without_raising(self.get_service_file_path())
         raise
Esempio n. 46
0
    def save_event(self, data):
        if self.event_dir is None:
            logger.warn(
                "Cannot save event -- Event reporter is not initialized.")
            return

        try:
            fileutil.mkdir(self.event_dir, mode=0o700)
        except (IOError, OSError) as e:
            msg = "Failed to create events folder {0}. Error: {1}".format(
                self.event_dir, ustr(e))
            raise EventError(msg)

        try:
            existing_events = os.listdir(self.event_dir)
            if len(existing_events) >= MAX_NUMBER_OF_EVENTS:
                logger.periodic_warn(
                    logger.EVERY_MINUTE,
                    "[PERIODIC] Too many files under: {0}, current count:  {1}, "
                    "removing oldest event files".format(
                        self.event_dir, len(existing_events)))
                existing_events.sort()
                oldest_files = existing_events[:-999]
                for event_file in oldest_files:
                    os.remove(os.path.join(self.event_dir, event_file))
        except (IOError, OSError) as e:
            msg = "Failed to remove old events from events folder {0}. Error: {1}".format(
                self.event_dir, ustr(e))
            raise EventError(msg)

        filename = os.path.join(self.event_dir,
                                ustr(int(time.time() * 1000000)))
        try:
            with open(filename + ".tmp", 'wb+') as hfile:
                hfile.write(data.encode("utf-8"))
            os.rename(filename + ".tmp", filename + AGENT_EVENT_FILE_EXTENSION)
        except (IOError, OSError) as e:
            msg = "Failed to write events to file: {0}".format(e)
            raise EventError(msg)
Esempio n. 47
0
def add_event(name,
              op=WALAEventOperation.Unknown,
              is_success=True,
              duration=0,
              version=str(CURRENT_VERSION),
              message="",
              log_event=True,
              reporter=__event_logger__):
    if reporter.event_dir is None:
        logger.warn("Cannot add event -- Event reporter is not initialized.")
        _log_event(name, op, message, duration, is_success=is_success)
        return

    if should_emit_event(name, version, op, is_success):
        mark_event_status(name, version, op, is_success)
        reporter.add_event(name,
                           op=op,
                           is_success=is_success,
                           duration=duration,
                           version=str(version),
                           message=message,
                           log_event=log_event)
Esempio n. 48
0
    def _send_logs(self):
        msg = None
        success = False
        try:
            with open(COMPRESSED_ARCHIVE_PATH, "rb") as fh:
                archive_content = fh.read()
                self.protocol.upload_logs(archive_content)
                msg = "Successfully uploaded logs."
                logger.info(msg)

            success = True
        except Exception as e:
            msg = "Failed to upload logs. Error: {0}".format(ustr(e))
            logger.warn(msg)
        finally:
            add_event(
                name=AGENT_NAME,
                version=CURRENT_VERSION,
                op=WALAEventOperation.LogCollection,
                is_success=success,
                message=msg,
                log_event=False)
Esempio n. 49
0
    def image_origin(self):
        """
        An integer value describing the origin of the image.

          0 -> unknown
          1 -> custom - user created image
          2 -> endorsed - See https://docs.microsoft.com/en-us/azure/virtual-machines/linux/endorsed-distros
          3 -> platform - non-endorsed image that is available in the Azure Marketplace.
        """

        try:
            if self.publisher == "":
                return IMDS_IMAGE_ORIGIN_CUSTOM

            if ComputeInfo.__matcher.is_match(self.publisher, self.offer, self.sku, self.version):
                return IMDS_IMAGE_ORIGIN_ENDORSED
            else:
                return IMDS_IMAGE_ORIGIN_PLATFORM

        except Exception as e:
            logger.warn("Could not determine the image origin from IMDS: {0}", str(e))
            return IMDS_IMAGE_ORIGIN_UNKNOWN
Esempio n. 50
0
    def fetch_full_goal_state(self, wire_client):
        try:
            logger.info('Fetching goal state [incarnation {0}]', self.incarnation)

            xml_text = wire_client.fetch_config(self._hosting_env_uri, wire_client.get_header())
            self.hosting_env = HostingEnv(xml_text)

            xml_text = wire_client.fetch_config(self._shared_conf_uri, wire_client.get_header())
            self.shared_conf = SharedConfig(xml_text)

            if self._certs_uri is not None:
                xml_text = wire_client.fetch_config(self._certs_uri, wire_client.get_header_for_cert())
                self.certs = Certificates(xml_text)

            if self._remote_access_uri is not None:
                xml_text = wire_client.fetch_config(self._remote_access_uri, wire_client.get_header_for_cert())
                self.remote_access = RemoteAccess(xml_text)
        except Exception as exception:
            logger.warn("Fetching the goal state failed: {0}", ustr(exception))
            raise ProtocolError(msg="Error fetching goal state", inner=exception)
        finally:
            logger.info('Fetch goal state completed')
Esempio n. 51
0
def set_properties(name, obj, data):
    if isinstance(obj, DataContract):
        validate_param("Property '{0}'".format(name), data, dict)
        for prob_name, prob_val in data.items():
            prob_full_name = "{0}.{1}".format(name, prob_name)
            try:
                prob = getattr(obj, prob_name)
            except AttributeError:
                logger.warn("Unknown property: {0}", prob_full_name)
                continue
            prob = set_properties(prob_full_name, prob, prob_val)
            setattr(obj, prob_name, prob)
        return obj
    elif isinstance(obj, DataContractList):
        validate_param("List '{0}'".format(name), data, list)
        for item_data in data:
            item = obj.item_cls()
            item = set_properties(name, item, item_data)
            obj.append(item)
        return obj
    else:
        return data
Esempio n. 52
0
def read_response_error(resp):
    result = ''
    if resp is not None:
        try:
            result = "[HTTP Failed] [{0}: {1}] {2}".format(
                resp.status, resp.reason, resp.read())

            # this result string is passed upstream to several methods
            # which do a raise HttpError() or a format() of some kind;
            # as a result it cannot have any unicode characters
            if PY_VERSION_MAJOR < 3:
                result = ustr(result, encoding='ascii', errors='ignore')
            else:
                result = result\
                    .encode(encoding='ascii', errors='ignore')\
                    .decode(encoding='ascii', errors='ignore')

            result = textutil.replace_non_ascii(result)

        except Exception:
            logger.warn(traceback.format_exc())
    return result
Esempio n. 53
0
    def run(self):
        self.ext_handlers, etag = None, None
        try:
            self.protocol = self.protocol_util.get_protocol()
            self.ext_handlers, etag = self.protocol.get_ext_handlers()
        except Exception as e:
            msg = u"Exception retrieving extension handlers: {0}".format(
                ustr(e))
            logger.warn(msg)
            add_event(AGENT_NAME,
                      version=CURRENT_VERSION,
                      op=WALAEventOperation.ExtensionProcessing,
                      is_success=False,
                      message=msg)
            return

        try:
            msg = u"Handle extensions updates for incarnation {0}".format(etag)
            logger.verbose(msg)
            # Log status report success on new config
            self.log_report = True
            self.handle_ext_handlers(etag)
            self.last_etag = etag

            self.report_ext_handlers_status()
            self.cleanup_outdated_handlers()
        except RestartError:
            raise
        except Exception as e:
            msg = u"Exception processing extension handlers: {0}".format(
                ustr(e))
            logger.warn(msg)
            add_event(AGENT_NAME,
                      version=CURRENT_VERSION,
                      op=WALAEventOperation.ExtensionProcessing,
                      is_success=False,
                      message=msg)
            return
Esempio n. 54
0
    def initialize_vminfo_common_parameters(self, protocol):
        """
        Initializes the common parameters that come from the goal state and IMDS
        """
        # create an index of the event parameters for faster updates
        parameters = {}
        for p in self._common_parameters:
            parameters[p.name] = p

        try:
            vminfo = protocol.get_vminfo()
            parameters[CommonTelemetryEventSchema.
                       TenantName].value = vminfo.tenantName
            parameters[
                CommonTelemetryEventSchema.RoleName].value = vminfo.roleName
            parameters[CommonTelemetryEventSchema.
                       RoleInstanceName].value = vminfo.roleInstanceName
        except Exception as e:
            logger.warn(
                "Failed to get VM info from goal state; will be missing from telemetry: {0}",
                ustr(e))

        try:
            imds_client = get_imds_client(protocol.get_endpoint())
            imds_info = imds_client.get_compute()
            parameters[
                CommonTelemetryEventSchema.Location].value = imds_info.location
            parameters[CommonTelemetryEventSchema.
                       SubscriptionId].value = imds_info.subscriptionId
            parameters[CommonTelemetryEventSchema.
                       ResourceGroupName].value = imds_info.resourceGroupName
            parameters[CommonTelemetryEventSchema.VMId].value = imds_info.vmId
            parameters[CommonTelemetryEventSchema.ImageOrigin].value = int(
                imds_info.image_origin)
        except Exception as e:
            logger.warn(
                "Failed to get IMDS info; will be missing from telemetry: {0}",
                ustr(e))
Esempio n. 55
0
    def get_first_if(self):
        """
        Return the interface name, and ip addr of the
        first active non-loopback interface.
        """
        iface = ''
        expected = 16  # how many devices should I expect...

        # for 64bit the size is 40 bytes
        # for 32bit the size is 32 bytes
        python_arc = platform.architecture()[0]
        struct_size = 32 if python_arc == '32bit' else 40

        sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM,
                             socket.IPPROTO_UDP)
        buff = array.array('B', b'\0' * (expected * struct_size))
        param = struct.pack('iL', expected * struct_size,
                            buff.buffer_info()[0])
        ret = fcntl.ioctl(sock.fileno(), 0x8912, param)
        retsize = (struct.unpack('iL', ret)[0])
        if retsize == (expected * struct_size):
            logger.warn(('SIOCGIFCONF returned more than {0} up '
                         'network interfaces.'), expected)
        sock = buff.tostring()
        primary = bytearray(self.get_primary_interface(), encoding='utf-8')
        for i in range(0, struct_size * expected, struct_size):
            iface = sock[i:i + 16].split(b'\0', 1)[0]
            if len(iface) == 0 or self.is_loopback(iface) or iface != primary:
                # test the next one
                if len(iface) != 0 and not self.disable_route_warning:
                    logger.info('Interface [{0}] skipped'.format(iface))
                continue
            else:
                # use this one
                logger.info('Interface [{0}] selected'.format(iface))
                break

        return iface.decode('latin-1'), socket.inet_ntoa(sock[i + 20:i + 24])
Esempio n. 56
0
        def start_extension_command(self, extension_name, command, shell, cwd,
                                    env, stdout, stderr):
            """
            Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup
            :param extension_name: The extension executing the command
            :param command: The command to invoke
            :param cwd: The working directory for the command
            :param env:  The environment to pass to the command's process
            :param stdout: File object to redirect stdout to
            :param stderr: File object to redirect stderr to
            """
            if not self.enabled():
                process = subprocess.Popen(command,
                                           shell=shell,
                                           cwd=cwd,
                                           env=env,
                                           stdout=stdout,
                                           stderr=stderr,
                                           preexec_fn=os.setsid)
            else:
                process, extension_cgroups = self._cgroups_api.start_extension_command(
                    extension_name,
                    command,
                    shell=shell,
                    cwd=cwd,
                    env=env,
                    stdout=stdout,
                    stderr=stderr)

                try:
                    for cgroup in extension_cgroups:
                        CGroupsTelemetry.track_cgroup(cgroup)
                except Exception as e:
                    logger.warn(
                        "Cannot add cgroup '{0}' to tracking list; resource usage will not be tracked. Error: {1}"
                        .format(cgroup.path, ustr(e)))

            return process
Esempio n. 57
0
    def collect_and_send_events(self):
        if self.last_event_collection is None:
            self.last_event_collection = datetime.datetime.utcnow() - MonitorHandler.EVENT_COLLECTION_PERIOD

        if datetime.datetime.utcnow() >= (self.last_event_collection + MonitorHandler.EVENT_COLLECTION_PERIOD):
            try:
                event_list = TelemetryEventList()
                event_dir = os.path.join(conf.get_lib_dir(), "events")
                event_files = os.listdir(event_dir)
                for event_file in event_files:
                    if not event_file.endswith(".tld"):
                        continue
                    event_file_path = os.path.join(event_dir, event_file)
                    try:
                        data_str = self.collect_event(event_file_path)
                    except EventError as e:
                        logger.error("{0}", e)
                        continue

                    try:
                        event = parse_event(data_str)
                        self.add_sysinfo(event)
                        event_list.events.append(event)
                    except (ValueError, ProtocolError) as e:
                        logger.warn("Failed to decode event file: {0}", e)
                        continue

                if len(event_list.events) == 0:
                    return

                try:
                    self.protocol.report_event(event_list)
                except ProtocolError as e:
                    logger.error("{0}", e)
            except Exception as e:
                logger.warn("Failed to send events: {0}", e)

            self.last_event_collection = datetime.datetime.utcnow()
Esempio n. 58
0
    def _write_pid_file(self):
        pid_files = self._get_pid_files()

        pid_dir, pid_name, pid_re = self._get_pid_parts()

        previous_pid_file = None if len(pid_files) <= 0 else pid_files[-1]
        pid_index = -1 \
            if previous_pid_file is None \
            else int(pid_re.match(os.path.basename(previous_pid_file)).group(1))
        pid_file = os.path.join(pid_dir,
                                "{0}_{1}".format(pid_index + 1, pid_name))

        try:
            fileutil.write_file(pid_file, ustr(os.getpid()))
            logger.info(u"{0} running as process {1}", CURRENT_AGENT,
                        ustr(os.getpid()))
        except Exception as e:
            pid_file = None
            logger.warn(
                u"Expection writing goal state agent {0} pid to {1}: {2}",
                CURRENT_AGENT, pid_file, ustr(e))

        return pid_files, pid_file
Esempio n. 59
0
def add_event(name,
              op="",
              is_success=True,
              duration=0,
              version=CURRENT_VERSION,
              message="",
              evt_type="",
              is_internal=False,
              reporter=__event_logger__):
    log = logger.info if is_success else logger.error
    log("Event: name={0}, op={1}, message={2}", name, op, message)

    if reporter.event_dir is None:
        logger.warn("Event reporter is not initialized.")
        return
    reporter.add_event(name,
                       op=op,
                       is_success=is_success,
                       duration=duration,
                       version=str(version),
                       message=message,
                       evt_type=evt_type,
                       is_internal=is_internal)
Esempio n. 60
0
    def run(self, child_args=None):
        logger.info("{0} Version:{1}", AGENT_LONG_NAME, AGENT_VERSION)
        logger.info("OS: {0} {1}", DISTRO_NAME, DISTRO_VERSION)
        logger.info("Python: {0}.{1}.{2}", PY_VERSION_MAJOR, PY_VERSION_MINOR,
                    PY_VERSION_MICRO)

        self.check_pid()

        # If FIPS is enabled, set the OpenSSL environment variable
        # Note:
        # -- Subprocesses inherit the current environment
        if conf.get_fips_enabled():
            os.environ[OPENSSL_FIPS_ENVIRONMENT] = '1'

        while self.running:
            try:
                self.daemon(child_args)
            except Exception as e:
                err_msg = traceback.format_exc()
                add_event(name=AGENT_NAME, is_success=False, message=ustr(err_msg),
                          op=WALAEventOperation.UnhandledError)
                logger.warn("Daemon ended with exception -- Sleep 15 seconds and restart daemon")
                time.sleep(15)