Ejemplo n.º 1
0
def run_as_another_namespace(pid, namespaces, function, *args, **kwargs):
    hack_to_pre_load_modules()

    context = ProcessContext(pid, namespaces)
    context.attach()
    queue = multiprocessing.Queue(2**15)

    try:
        child_process = multiprocessing.Process(name='crawler-%s' % pid,
                                                target=function_wrapper,
                                                args=(queue, function, args),
                                                kwargs=kwargs)
        child_process.start()
    except OSError:
        queue.close()
        raise CrawlError()

    child_exception = None
    try:
        (result, child_exception) = queue.get(timeout=IN_CONTAINER_TIMEOUT)
    except Queue.Empty:
        child_exception = CrawlTimeoutError()
    except Exception:
        result = None

    if child_exception:
        result = None

    child_process.join(IN_CONTAINER_TIMEOUT)

    # The join failed and the process might still be alive

    if child_process.is_alive():
        errmsg = ('Timed out waiting for process %d to exit.' %
                  child_process.pid)
        queue.close()
        os.kill(child_process.pid, 9)
        context.detach()
        logger.error(errmsg)
        raise CrawlTimeoutError(errmsg)

    context.detach()

    if result is None:
        if child_exception:
            raise child_exception
        raise CrawlError('Unknown crawl error.')
    return result
Ejemplo n.º 2
0
    def _crawl_load(self):

        assert(self.crawl_mode is not Modes.OUTCONTAINER)

        logger.debug('Crawling system load')
        feature_key = 'load'

        try:
            shortterm = os.getloadavg()[0]
        except Exception as e:
            shortterm = 'unknown'
        try:
            midterm = os.getloadavg()[1]
        except Exception as e:
            midterm = 'unknown'
        try:
            longterm = os.getloadavg()[2]
        except Exception as e:
            longterm = 'unknown'

        feature_attributes = LoadFeature(shortterm, midterm, longterm)

        try:
            yield (feature_key, feature_attributes)
        except Exception as e:
            logger.error('Error crawling load', exc_info=True)
            raise CrawlError(e)
    def _crawl_packages(self, dbpath=None, root_dir='/'):

        # package attributes: ["installed", "name", "size", "version"]

        logger.debug('Crawling Packages')

        if self.crawl_mode in (Modes.INVM, Modes.MOUNTPOINT):
            reload_needed = False
        elif self.crawl_mode == Modes.OUTCONTAINER:
            reload_needed = True
        else:
            raise NotImplementedError('Unsupported crawl mode')

        installed_since = self.feature_epoch

        pkg_manager = self._get_package_manager(root_dir)

        try:
            if pkg_manager == 'dpkg':
                dbpath = dbpath or 'var/lib/dpkg'
                for (key,
                     feature) in get_dpkg_packages(root_dir, dbpath,
                                                   installed_since):
                    yield (key, feature)
            elif pkg_manager == 'rpm':
                dbpath = dbpath or 'var/lib/rpm'
                for (key, feature) in get_rpm_packages(root_dir, dbpath,
                                                       installed_since,
                                                       reload_needed):
                    yield (key, feature)
            else:
                logger.warning('Unsupported package manager for Linux distro')
        except Exception as e:
            logger.error('Error crawling packages', exc_info=True)
            raise CrawlError(e)
Ejemplo n.º 4
0
    def _crawl_metrics(self):

        assert(self.crawl_mode is not Modes.OUTCONTAINER)

        created_since = 0
        logger.debug('Crawling Metrics')
        for p in psutil.process_iter():
            create_time = (
                p.create_time() if hasattr(
                    p.create_time,
                    '__call__') else p.create_time)
            if create_time <= created_since:
                continue
            try:
                name = (p.name() if hasattr(p.name, '__call__'
                                            ) else p.name)
                pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid)
                status = (p.status() if hasattr(p.status, '__call__'
                                                ) else p.status)
                if status == psutil.STATUS_ZOMBIE:
                    continue
                username = (
                    p.username() if hasattr(
                        p.username,
                        '__call__') else p.username)
                meminfo = (
                    p.get_memory_info() if hasattr(
                        p.get_memory_info,
                        '__call__') else p.memory_info)
                ioinfo = (
                    p.get_io_counters() if hasattr(
                        p.get_io_counters,
                        '__call__') else p.io_counters)
                cpu_percent = (
                    p.get_cpu_percent(
                        interval=0) if hasattr(
                        p.get_cpu_percent,
                        '__call__') else p.cpu_percent)
                memory_percent = (
                    p.get_memory_percent() if hasattr(
                        p.get_memory_percent,
                        '__call__') else p.memory_percent)

                feature_key = '{0}/{1}'.format(name, pid)
                yield (feature_key, MetricFeature(
                    round(cpu_percent, 2),
                    round(memory_percent, 2),
                    name,
                    pid,
                    ioinfo.read_bytes,
                    meminfo.rss,
                    str(status),
                    username,
                    meminfo.vms,
                    ioinfo.write_bytes,
                ))
            except Exception as e:
                logger.error('Error crawling metric for process %s'
                             % pid, exc_info=True)
                raise CrawlError(e)
Ejemplo n.º 5
0
def crawl_packages(dbpath=None,
                   root_dir='/',
                   installed_since=0,
                   reload_needed=True):

    # package attributes: ["installed", "name", "size", "version"]

    logger.debug('Crawling Packages')

    pkg_manager = _get_package_manager(root_dir)

    try:
        if pkg_manager == 'dpkg':
            dbpath = dbpath or 'var/lib/dpkg'
            for (key, feature) in get_dpkg_packages(root_dir, dbpath,
                                                    installed_since):
                yield (key, feature, 'package')
        elif pkg_manager == 'rpm':
            dbpath = dbpath or 'var/lib/rpm'
            for (key, feature) in get_rpm_packages(root_dir, dbpath,
                                                   installed_since,
                                                   reload_needed):
                yield (key, feature, 'package')
        else:
            logger.warning('Unsupported package manager for Linux distro')
    except Exception as e:
        logger.error('Error crawling packages', exc_info=True)
        raise CrawlError(e)
Ejemplo n.º 6
0
    def _crawl_files(
        self,
        root_dir='/',
        exclude_dirs=['proc', 'mnt', 'dev', 'tmp'],
        root_dir_alias=None,
    ):

        root_dir=str(root_dir)
        assert(self.crawl_mode is not Modes.OUTCONTAINER)

        accessed_since = self.feature_epoch
        saved_args = locals()
        logger.debug('crawl_files: %s' % (saved_args))
        if self.crawl_mode in [Modes.INVM, Modes.MOUNTPOINT]:
            try:
                assert os.path.isdir(root_dir)
                if root_dir_alias is None:
                    root_dir_alias = root_dir
                exclude_dirs = [os.path.join(root_dir, d) for d in
                                exclude_dirs]
                exclude_regex = r'|'.join([fnmatch.translate(d)
                                           for d in exclude_dirs]) or r'$.'

                # walk the directory hierarchy starting at 'root_dir' in BFS
                # order

                feature = self._crawl_file(root_dir, root_dir,
                                           root_dir_alias)
                if feature and (feature.ctime > accessed_since or
                                feature.atime > accessed_since):
                    yield (feature.path, feature)
                for (root_dirpath, dirs, files) in os.walk(root_dir):
                    dirs[:] = [os.path.join(root_dirpath, d) for d in
                               dirs]
                    dirs[:] = [d for d in dirs
                               if not re.match(exclude_regex, d)]
                    files = [os.path.join(root_dirpath, f) for f in
                             files]
                    files = [f for f in files
                             if not re.match(exclude_regex, f)]
                    for fpath in files:
                        feature = self._crawl_file(root_dir, fpath,
                                                   root_dir_alias)
                        if feature and (feature.ctime > accessed_since or
                                        feature.atime > accessed_since):
                            yield (feature.path, feature)
                    for fpath in dirs:
                        feature = self._crawl_file(root_dir, fpath,
                                                   root_dir_alias)
                        if feature and (feature.ctime > accessed_since or
                                        feature.atime > accessed_since):
                            yield (feature.path, feature)
            except Exception as e:
                logger.error('Error crawling root_dir %s' % root_dir,
                             exc_info=True)
                raise CrawlError(e)
Ejemplo n.º 7
0
    def crawl_dockerinspect(self):
        logger.debug('Crawling docker inspect')

        long_id = self.container.long_id
        try:
            inspect = dockerutils.exec_dockerinspect(long_id)
            yield (long_id, inspect)
        except Exception as e:
            logger.error('Error crawling docker inspect', exc_info=True)
            raise CrawlError(e)
Ejemplo n.º 8
0
    def crawl_dockerhistory(self):
        logger.debug('Crawling docker history')

        long_id = self.container.long_id
        try:
            history = dockerutils.exec_docker_history(long_id)
            image_id = history[0]['Id']
            yield (image_id, {'history': history})
        except Exception as e:
            logger.error('Error crawling docker history', exc_info=True)
            raise CrawlError(e)
Ejemplo n.º 9
0
    def _crawl_interface_counters(self):

        logger.debug('Crawling interface information')

        for ifname in psutil.net_io_counters(pernic=True):
            try:
                interface = psutil.net_io_counters(pernic=True)[ifname]
            except:
                continue

            try:
                bytes_sent = interface.bytes_sent
            except Exception as e:
                bytes_sent = 'unknown'
            try:
                bytes_recv = interface.bytes_recv
            except Exception as e:
                bytes_recv = 'unknown'

            try:
                packets_sent = interface.packets_sent
            except Exception as e:
                packets_sent = 'unknown'
            try:
                packets_recv = interface.packets_recv
            except Exception as e:
                packets_recv = 'unknown'

            try:
                errout = interface.errout
            except Exception as e:
                errout = 'unknown'
            try:
                errin = interface.errin
            except Exception as e:
                errin = 'unknown'

            curr_count = [
                bytes_sent,
                bytes_recv,
                packets_sent,
                packets_recv,
                errout,
                errin,
            ]

            try:
                yield (ifname, curr_count)
            except Exception as e:
                logger.error('Error crawling interface information',
                             exc_info=True)
                raise CrawlError(e)
    def crawl_cpu(self, per_cpu=False):

        logger.debug('Crawling cpu information')

        if self.crawl_mode == Modes.INVM:
            for key, feature in self._crawl_cpu_invm():
                yield (key, feature)
        elif self.crawl_mode == Modes.OUTCONTAINER:
            try:
                for key, feature in self._crawl_cpu_outcontainer(per_cpu):
                    yield (key, feature)
            except Exception as e:
                logger.error('Error crawling cpu information', exc_info=True)
                raise CrawlError(e)
        else:
            raise NotImplementedError('Unsupported crawl mode')
Ejemplo n.º 11
0
    def _crawl_file(
        self,
        root_dir,
        fpath,
        root_dir_alias,
    ):

        try:
            lstat = os.lstat(fpath)
            fmode = lstat.st_mode
            fperm = self._fileperm(fmode)
            ftype = self._filetype(fpath, fperm)
            flinksto = None
            if ftype == 'link':
                try:

                    # This has to be an absolute path, not a root-relative path

                    flinksto = os.readlink(fpath)
                except:
                    logger.error('Error reading linksto info for file %s'
                                 % fpath, exc_info=True)
            fgroup = lstat.st_gid
            fuser = lstat.st_uid

            # root_dir relative path

            frelpath = fpath.replace(root_dir, root_dir_alias, 1)
            (_, fname) = os.path.split(frelpath)
            return FileFeature(
                lstat.st_atime,
                lstat.st_ctime,
                fgroup,
                flinksto,
                fmode,
                lstat.st_mtime,
                fname,
                frelpath,
                lstat.st_size,
                ftype,
                fuser,
            )
        except Exception as e:

            logger.error('Error crawling file %s' % fpath,
                         exc_info=True)
            raise CrawlError(e)
Ejemplo n.º 12
0
    def crawl_dockerps(self):
        assert(self.crawl_mode == Modes.INVM)
        logger.debug('Crawling docker ps results')

        try:
            for inspect in dockerutils.exec_dockerps():
                yield (inspect['Id'], DockerPSFeature._make([
                    inspect['State']['Running'],
                    0,
                    inspect['Image'],
                    [],
                    inspect['Config']['Cmd'],
                    inspect['Name'],
                    inspect['Id'],
                ]))
        except Exception as e:
            logger.error('Error crawling docker ps', exc_info=True)
            raise CrawlError(e)
Ejemplo n.º 13
0
    def _crawl_disk_partitions(self):

        assert(self.crawl_mode is not Modes.OUTCONTAINER)

        logger.debug('Crawling Disk partitions')
        for partition in psutil.disk_partitions():
            try:
                pdiskusage = psutil.disk_usage(partition.mountpoint)
                yield (partition.mountpoint, DiskFeature(
                    partition.device,
                    100.0 - pdiskusage.percent,
                    partition.fstype,
                    partition.mountpoint,
                    partition.opts,
                    pdiskusage.total,
                ))
            except Exception as e:
                logger.error('Error crawling disk partition %s'
                             % partition.mountpoint, exc_info=True)
                raise CrawlError(e)
    def crawl_memory(self):

        # memory attributes: ["used", "buffered", "cached", "free"]

        logger.debug('Crawling memory')
        feature_key = 'memory'

        if self.crawl_mode == Modes.INVM:

            vm = psutil.virtual_memory()

            if (vm.free + vm.used) > 0:
                util_percentage = float(vm.used) / (vm.free + vm.used) * 100.0
            else:
                util_percentage = 'unknown'

            feature_attributes = MemoryFeature(vm.used, vm.buffers, vm.cached,
                                               vm.free, util_percentage)
        elif self.crawl_mode == Modes.OUTVM:
            if psvmi is None:
                raise NotImplementedError()
            else:
                sysmem = psvmi.system_memory_info(self.get_vm_context())
                feature_attributes = MemoryFeature(
                    sysmem.memory_used, sysmem.memory_buffered,
                    sysmem.memory_cached, sysmem.memory_free,
                    (sysmem.memory_used * 100 /
                     (sysmem.memory_used + sysmem.memory_free)))

        elif self.crawl_mode == Modes.OUTCONTAINER:

            try:
                feature_attributes = self._crawl_memory_outcontainer()
            except Exception as e:

                logger.error('Error crawling memory', exc_info=True)
                raise CrawlError(e)
        else:
            raise NotImplementedError('Unsupported crawl mode')

        yield (feature_key, feature_attributes)
Ejemplo n.º 15
0
def run_as_another_process(function, _args=(), _kwargs={}):
    try:
        queue = multiprocessing.Queue(2**15)
    except OSError:
        # try again with a smaller queue
        queue = multiprocessing.Queue(2**14)

    child_process = multiprocessing.Process(target=_function_wrapper,
                                            args=(queue, function),
                                            kwargs={
                                                '_args': _args,
                                                '_kwargs': _kwargs
                                            })
    child_process.start()

    child_exception, result = None, None
    try:
        (result, child_exception) = queue.get(timeout=IN_PROCESS_TIMEOUT)
    except Queue.Empty:
        child_exception = CrawlTimeoutError()
    except Exception as exc:
        logger.warn(exc)

    child_process.join(IN_PROCESS_TIMEOUT)

    # The join failed and the process might still be alive

    if child_process.is_alive():
        errmsg = ('Timed out waiting for process %d to exit.' %
                  child_process.pid)
        queue.close()
        os.kill(child_process.pid, 9)
        logger.error(errmsg)
        raise CrawlTimeoutError(errmsg)

    if result is None:
        if child_exception:
            raise child_exception
        raise CrawlError('Unknown crawl error.')
    return result
Ejemplo n.º 16
0
    def crawl_dockerps(self):
        assert(self.crawl_mode == Modes.INVM)
        logger.debug('Crawling docker ps results')

        try:
            for inspect in dockerutils.exec_dockerps():
                long_id = inspect['Id']
                state = inspect['State']
                running = state['Running']
                image = inspect['Image']
                names = inspect['Name']
                cmd = inspect['Config']['Cmd']
                yield (long_id, DockerPSFeature._make([
                    running,
                    0,
                    image,
                    [],
                    cmd,
                    names,
                    long_id,
                ]))
        except Exception as e:
            logger.error('Error crawling docker ps', exc_info=True)
            raise CrawlError(e)
Ejemplo n.º 17
0
    def crawl_cpu(self, per_cpu=False):

        logger.debug('Crawling cpu information')

        if self.crawl_mode not in [
                Modes.INVM,
                Modes.OUTCONTAINER,
                Modes.OUTVM]:
            logger.error('Unsupported crawl mode: ' + self.crawl_mode +
                         '. Returning unknown memory key and attributes.'
                         )
            feature_attributes = CpuFeature(
                'unknown',
                'unknown',
                'unknown',
                'unknown',
                'unknown',
                'unknown',
                'unknown',
                'unknown',
            )

        host_cpu_feature = {}
        if self.crawl_mode in [Modes.INVM, Modes.OUTCONTAINER]:
            for (index, cpu) in \
                    enumerate(psutil.cpu_times_percent(percpu=True)):

                try:
                    idle = cpu.idle
                except Exception as e:
                    idle = 'unknown'
                try:
                    nice = cpu.nice
                except Exception as e:
                    nice = 'unknown'
                try:
                    user = cpu.user
                except Exception as e:
                    user = '******'
                try:
                    wait = cpu.iowait
                except Exception as e:
                    wait = 'unknown'
                try:
                    system = cpu.system
                except Exception as e:
                    system = 'unknown'
                try:
                    interrupt = cpu.irq
                except Exception as e:
                    interrupt = 'unknown'
                try:
                    steal = cpu.steal
                except Exception as e:
                    steal = 'unknown'

                used = 100 - int(idle)

                feature_key = '{0}-{1}'.format('cpu', index)
                feature_attributes = CpuFeature(
                    idle,
                    nice,
                    user,
                    wait,
                    system,
                    interrupt,
                    steal,
                    used,
                )
                host_cpu_feature[index] = feature_attributes
                if self.crawl_mode == Modes.INVM:
                    try:
                        yield (feature_key, feature_attributes)
                    except Exception as e:
                        logger.error('Error crawling cpu information',
                                     exc_info=True)
                        raise CrawlError(e)

        if self.crawl_mode == Modes.OUTCONTAINER:

            if per_cpu:
                stat_file_name = 'cpuacct.usage_percpu'
            else:
                stat_file_name = 'cpuacct.usage'

            container = self.container

            try:
                (cpu_usage_t1, prev_time) = \
                    self._get_prev_container_cpu_times(container.long_id)

                if cpu_usage_t1:
                    logger.debug('Using previous cpu times for container %s'
                                 % container.long_id)
                    interval = time.time() - prev_time

                if not cpu_usage_t1 or interval == 0:
                    logger.debug(
                        'There are no previous cpu times for container %s '
                        'so we will be sleeping for 100 milliseconds' %
                        container.long_id)

                    with open(container.get_cpu_cgroup_path(stat_file_name),
                              'r') as f:
                        cpu_usage_t1 = f.readline().strip().split(' ')
                    interval = 0.1  # sleep for 100ms
                    time.sleep(interval)

                with open(container.get_cpu_cgroup_path(stat_file_name),
                          'r') as f:
                    cpu_usage_t2 = f.readline().strip().split(' ')

                # Store the cpu times for the next crawl

                self._save_container_cpu_times(container.long_id,
                                               cpu_usage_t2)
            except Exception as e:
                logger.error('Error crawling cpu information',
                             exc_info=True)
                raise CrawlError(e)

            cpu_user_system = {}
            try:
                path = container.get_cpu_cgroup_path('cpuacct.stat')
                with open(path, 'r') as f:
                    for line in f:
                        m = re.search(r"(system|user)\s+(\d+)", line)
                        if m:
                            cpu_user_system[m.group(1)] = \
                                float(m.group(2))
            except Exception as e:
                logger.error('Error crawling cpu information',
                             exc_info=True)
                raise CrawlError(e)

            for (index, cpu_usage_ns) in enumerate(cpu_usage_t1):
                usage_secs = (float(cpu_usage_t2[index]) -
                              float(cpu_usage_ns)) / float(1e9)

                # Interval is never 0 because of step 0 (forcing a sleep)

                usage_percent = usage_secs / interval * 100.0
                if usage_percent > 100.0:
                    usage_percent = 100.0
                idle = 100.0 - usage_percent

                # Approximation 1

                user_plus_sys_hz = cpu_user_system['user'] \
                    + cpu_user_system['system']
                if user_plus_sys_hz == 0:
                    # Fake value to avoid divide by zero.
                    user_plus_sys_hz = 0.1
                user = usage_percent * (cpu_user_system['user'] /
                                        user_plus_sys_hz)
                system = usage_percent * (cpu_user_system['system'] /
                                          user_plus_sys_hz)

                # Approximation 2

                nice = host_cpu_feature[index][1]
                wait = host_cpu_feature[index][3]
                interrupt = host_cpu_feature[index][5]
                steal = host_cpu_feature[index][6]
                feature_key = '{0}-{1}'.format('cpu', index)
                feature_attributes = CpuFeature(
                    idle,
                    nice,
                    user,
                    wait,
                    system,
                    interrupt,
                    steal,
                    usage_percent,
                )
                try:
                    yield (feature_key, feature_attributes)
                except Exception as e:
                    logger.error('Error crawling cpu information',
                                 exc_info=True)
                    raise CrawlError(e)
Ejemplo n.º 18
0
    def crawl_memory(self):

        # memory attributes: ["used", "buffered", "cached", "free"]

        logger.debug('Crawling memory')
        feature_key = 'memory'

        if self.crawl_mode == Modes.INVM:
            try:
                used = psutil.virtual_memory().used
            except Exception as e:
                used = 'unknown'
            try:
                buffered = psutil.virtual_memory().buffers
            except Exception as e:
                buffered = 'unknown'
            try:
                cached = psutil.virtual_memory().cached
            except Exception as e:
                cached = 'unknown'
            try:
                free = psutil.virtual_memory().free
            except Exception as e:
                free = 'unknown'

            feature_attributes = MemoryFeature(used, buffered, cached,
                                               free)
        elif self.crawl_mode == Modes.OUTVM:

            (domain_name, kernel_version, distro, arch) = self.vm
            from psvmi import system_info
            sys = system_info(domain_name, kernel_version, distro, arch)
            feature_attributes = MemoryFeature(
                sys.memory_used,
                sys.memory_buffered,
                sys.memory_cached,
                sys.memory_free)
        elif self.crawl_mode == Modes.OUTCONTAINER:

            used = buffered = cached = free = 'unknown'
            try:
                with open(self.container.get_memory_cgroup_path('memory.stat'
                                                                ), 'r') as f:
                    for line in f:
                        (key, value) = line.strip().split(' ')
                        if key == 'total_cache':
                            cached = int(value)
                        if key == 'total_active_file':
                            buffered = int(value)

                with open(self.container.get_memory_cgroup_path(
                        'memory.limit_in_bytes'), 'r') as f:
                    limit = int(f.readline().strip())

                with open(self.container.get_memory_cgroup_path(
                        'memory.usage_in_bytes'), 'r') as f:
                    used = int(f.readline().strip())

                host_free = psutil.virtual_memory().free

                container_total = used + min(host_free, limit - used)
                free = container_total - used
                feature_attributes = MemoryFeature(used, buffered,
                                                   cached, free)
            except Exception as e:

                logger.error('Error crawling memory', exc_info=True)
                raise CrawlError(e)
        else:

            logger.error('Unsupported crawl mode: ' + self.crawl_mode +
                         '. Returning unknown memory key and attributes.'
                         )
            feature_attributes = MemoryFeature('unknown', 'unknown',
                                               'unknown', 'unknown')
        try:
            yield (feature_key, feature_attributes)
        except Exception as e:
            logger.error('Error crawling memory', exc_info=True)
            raise CrawlError(e)
Ejemplo n.º 19
0
    def _crawl_packages(self, dbpath=None, root_dir='/'):

        assert(self.crawl_mode is not Modes.OUTCONTAINER)

        # package attributes: ["installed", "name", "size", "version"]

        (installtime, name, version, size) = (None, None, None, None)

        if self.crawl_mode == Modes.INVM:

            logger.debug('Using in-VM state information (crawl mode: ' +
                         self.crawl_mode + ')')
            system_type = platform.system().lower()
            distro = platform.linux_distribution()[0].lower()
        elif self.crawl_mode == Modes.MOUNTPOINT:
            logger.debug('Using disk image information (crawl mode: ' +
                         self.crawl_mode + ')')
            system_type = \
                platform_outofband.system(prefix=root_dir).lower()
            distro = platform_outofband.linux_distribution(prefix=root_dir)[
                0].lower()
        else:
            logger.error('Unsupported crawl mode: ' + self.crawl_mode +
                         '. Skipping package crawl.')
            system_type = 'unknown'
            distro = 'unknown'

        installed_since = self.feature_epoch
        if system_type != 'linux' or (system_type == 'linux' and distro == ''):
            # Distro is blank for FROM scratch images
            # Package feature is only valid for Linux platforms.

            raise StopIteration()
        logger.debug('Crawling Packages')

        pkg_manager = 'unknown'
        if distro in ['ubuntu', 'debian']:
            pkg_manager = 'dpkg'
        elif distro.startswith('red hat') or distro in ['redhat',
                                                        'fedora', 'centos']:

            pkg_manager = 'rpm'
        elif os.path.exists(os.path.join(root_dir, 'var/lib/dpkg')):
            pkg_manager = 'dpkg'
        elif os.path.exists(os.path.join(root_dir, 'var/lib/rpm')):
            pkg_manager = 'rpm'

        try:
            if pkg_manager == 'dpkg':
                if not dbpath:
                    dbpath = 'var/lib/dpkg'
                if os.path.isabs(dbpath):
                    logger.warning(
                        'dbpath: ' +
                        dbpath +
                        ' is defined absolute. Ignoring prefix: ' +
                        root_dir +
                        '.')

                # Update for a different route.

                dbpath = os.path.join(root_dir, dbpath)
                if installed_since > 0:
                    logger.warning(
                        'dpkg does not provide install-time, defaulting to '
                        'all packages installed since epoch')
                try:
                    dpkg = subprocess.Popen(['dpkg-query', '-W',
                                             '--admindir={0}'.format(dbpath),
                                             '-f=${Package}|${Version}'
                                             '|${Installed-Size}\n'
                                             ], stdout=subprocess.PIPE,
                                            stderr=subprocess.PIPE)
                    dpkglist = dpkg.stdout.read().strip('\n')
                except OSError as e:
                    logger.error(
                        'Failed to launch dpkg query for packages. Check if '
                        'dpkg-query is installed: [Errno: %d] ' %
                        e.errno + e.strerror + ' [Exception: ' +
                        type(e).__name__ + ']')
                    dpkglist = None
                if dpkglist:
                    for dpkginfo in dpkglist.split('\n'):
                        (name, version, size) = dpkginfo.split(r'|')

            # dpkg does not provide any installtime field
            # feature_key = '{0}/{1}'.format(name, version) -->
            # changed to below per Suriya's request

                        feature_key = '{0}'.format(name, version)
                        yield (feature_key, PackageFeature(None, name,
                                                           size, version))
            elif pkg_manager == 'rpm':
                if not dbpath:
                    dbpath = 'var/lib/rpm'
                if os.path.isabs(dbpath):
                    logger.warning(
                        'dbpath: ' +
                        dbpath +
                        ' is defined absolute. Ignoring prefix: ' +
                        root_dir +
                        '.')
                # update for a different route
                dbpath = os.path.join(root_dir, dbpath)
                try:
                    rpm = subprocess.Popen([
                        'rpm',
                        '--dbpath',
                        dbpath,
                        '-qa',
                        '--queryformat',
                        '%{installtime}|%{name}|%{version}'
                        '-%{release}|%{size}\n',
                    ], stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE)
                    rpmlist = rpm.stdout.read().strip('\n')
                except OSError as e:
                    logger.error(
                        'Failed to launch rpm query for packages. Check if '
                        'rpm is installed: [Errno: %d] ' %
                        e.errno + e.strerror + ' [Exception: ' +
                        type(e).__name__ + ']')
                    rpmlist = None
                if rpmlist:
                    for rpminfo in rpmlist.split('\n'):
                        (installtime, name, version, size) = \
                            rpminfo.split(r'|')

            # if int(installtime) <= installed_since: --> this
            # barfs for sth like: 1376416422. Consider try: xxx
            # except ValueError: pass

                        if installtime <= installed_since:
                            continue

            # feature_key = '{0}/{1}'.format(name, version) -->
            # changed to below per Suriya's request

                        feature_key = '{0}'.format(name, version)
                        yield (feature_key,
                               PackageFeature(installtime,
                                              name, size, version))
            else:
                raise CrawlError(
                    Exception(
                        'Unsupported package manager for Linux distro %s' %
                        distro))
        except Exception as e:
            logger.error('Error crawling package %s'
                         % ((name if name else 'Unknown')),
                         exc_info=True)
            raise CrawlError(e)
Ejemplo n.º 20
0
    def crawl_memory(self):

        # memory attributes: ["used", "buffered", "cached", "free"]

        logger.debug('Crawling memory')
        feature_key = 'memory'

        if self.crawl_mode == Modes.INVM:

            vm = psutil.virtual_memory()

            if (vm.free + vm.used) > 0:
                util_percentage = float(vm.used) / (vm.free + vm.used) * 100.0
            else:
                util_percentage = 'unknown'

            feature_attributes = MemoryFeature(vm.used, vm.buffers, vm.cached,
                                               vm.free, util_percentage)
        elif self.crawl_mode == Modes.OUTVM:

            (domain_name, kernel_version, distro, arch) = self.vm
            sys = system_info(domain_name, kernel_version, distro, arch)
            feature_attributes = MemoryFeature(
                sys.memory_used,
                sys.memory_buffered,
                sys.memory_cached,
                sys.memory_free,
                sys.memory_free / (sys.memory_used + sys.memory_buffered))
        elif self.crawl_mode == Modes.OUTCONTAINER:

            used = buffered = cached = free = 'unknown'
            try:
                with open(self.container.get_memory_cgroup_path('memory.stat'
                                                                ), 'r') as f:
                    for line in f:
                        (key, value) = line.strip().split(' ')
                        if key == 'total_cache':
                            cached = int(value)
                        if key == 'total_active_file':
                            buffered = int(value)

                with open(self.container.get_memory_cgroup_path(
                        'memory.limit_in_bytes'), 'r') as f:
                    limit = int(f.readline().strip())

                with open(self.container.get_memory_cgroup_path(
                        'memory.usage_in_bytes'), 'r') as f:
                    used = int(f.readline().strip())

                host_free = psutil.virtual_memory().free
                container_total = used + min(host_free, limit - used)
                free = container_total - used

                if 'unknown' not in [used, free] and (free + used) > 0:
                    util_percentage = float(used) / (free + used) * 100.0
                else:
                    util_percentage = 'unknown'

                feature_attributes = MemoryFeature(
                    used, buffered, cached, free, util_percentage)
            except Exception as e:

                logger.error('Error crawling memory', exc_info=True)
                raise CrawlError(e)
        else:
            raise NotImplementedError('Unsupported crawl mode')

        yield (feature_key, feature_attributes)
Ejemplo n.º 21
0
    def _crawl_connections(self):

        assert(self.crawl_mode is not Modes.OUTCONTAINER)

        created_since = 0
        logger.debug('Crawling Connections: since={0}'.format(created_since))

        list = psutil.process_iter()

        for p in list:
            pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid)
            status = (p.status() if hasattr(p.status, '__call__'
                                            ) else p.status)
            if status == psutil.STATUS_ZOMBIE:
                continue

            create_time = (
                p.create_time() if hasattr(
                    p.create_time,
                    '__call__') else p.create_time)
            name = (p.name() if hasattr(p.name, '__call__') else p.name)

            if create_time <= created_since:
                continue
            try:
                for c in p.get_connections():
                    try:
                        (localipaddr, localport) = c.laddr[:]
                    except:

                        # Older version of psutil uses local_address instead of
                        # laddr.

                        (localipaddr, localport) = c.local_address[:]
                    try:
                        if c.raddr:
                            (remoteipaddr, remoteport) = c.raddr[:]
                        else:
                            (remoteipaddr, remoteport) = (None, None)
                    except:

                        # Older version of psutil uses remote_address instead
                        # of raddr.

                        if c.remote_address:
                            (remoteipaddr, remoteport) = \
                                c.remote_address[:]
                        else:
                            (remoteipaddr, remoteport) = (None, None)
                    feature_key = '{0}/{1}/{2}'.format(pid,
                                                       localipaddr, localport)
                    yield (feature_key, ConnectionFeature(
                        localipaddr,
                        localport,
                        name,
                        pid,
                        remoteipaddr,
                        remoteport,
                        str(c.status),
                    ))
            except Exception as e:
                logger.error('Error crawling connection for process %s'
                             % pid, exc_info=True)
                raise CrawlError(e)
Ejemplo n.º 22
0
    def _crawl_processes(self):

        created_since = 0
        logger.debug('Crawling Processes: since={0}'.format(created_since))

        list = psutil.process_iter()

        for p in list:
            create_time = (
                p.create_time() if hasattr(
                    p.create_time,
                    '__call__') else p.create_time)
            if create_time > created_since:
                name = (p.name() if hasattr(p.name, '__call__'
                                            ) else p.name)
                cmdline = (p.cmdline() if hasattr(p.cmdline, '__call__'
                                                  ) else p.cmdline)
                pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid)
                status = (p.status() if hasattr(p.status, '__call__'
                                                ) else p.status)
                if status == psutil.STATUS_ZOMBIE:
                    cwd = 'unknown'  # invalid
                else:
                    try:
                        cwd = (p.cwd() if hasattr(p, 'cwd') and
                               hasattr(p.cwd, '__call__') else p.getcwd())
                    except Exception as e:
                        logger.error('Error crawling process %s for cwd'
                                     % pid, exc_info=True)
                        cwd = 'unknown'
                ppid = (p.ppid() if hasattr(p.ppid, '__call__'
                                            ) else p.ppid)
                if (hasattr(p, 'num_threads') and
                        hasattr(p.num_threads, '__call__')):
                    num_threads = p.num_threads()
                else:
                    num_threads = p.get_num_threads()
                try:
                    username = (p.username() if hasattr(p, 'username') and
                                hasattr(p.username, '__call__') else
                                p.username)
                except Exception as e:
                    logger.error('Error crawling process %s for username'
                                 % pid, exc_info=True)
                    username = '******'

                try:
                    openfiles = []
                    for f in p.get_open_files():
                        openfiles.append(f.path)
                    openfiles.sort()
                    feature_key = '{0}/{1}'.format(name, pid)
                    yield (feature_key, ProcessFeature(
                        str(' '.join(cmdline)),
                        create_time,
                        cwd,
                        name,
                        openfiles,
                        pid,
                        ppid,
                        num_threads,
                        username,
                    ))
                except Exception as e:
                    logger.error('Error crawling process %s' % pid,
                                 exc_info=True)
                    raise CrawlError(e)
Ejemplo n.º 23
0
    def _crawl_config_files(
        self,
        root_dir='/',
        exclude_dirs=['proc', 'mnt', 'dev', 'tmp'],
        root_dir_alias=None,
        known_config_files=[],
        discover_config_files=False,
    ):

        assert(self.crawl_mode is not Modes.OUTCONTAINER)

        saved_args = locals()
        logger.debug('Crawling config files: %s' % (saved_args))
        accessed_since = self.feature_epoch
        try:
            assert os.path.isdir(root_dir)
            if root_dir_alias is None:
                root_dir_alias = root_dir
            exclude_dirs = [os.path.join(root_dir, d) for d in
                            exclude_dirs]
            exclude_regex = r'|'.join([fnmatch.translate(d) for d in
                                       exclude_dirs]) or r'$.'
            known_config_files[:] = [os.path.join(root_dir, f) for f in
                                     known_config_files]
            known_config_files[:] = [f for f in known_config_files
                                     if not re.match(exclude_regex, f)]
            config_file_set = set()
            for fpath in known_config_files:
                if os.path.exists(fpath):
                    lstat = os.lstat(fpath)
                    if (lstat.st_atime > accessed_since or
                            lstat.st_ctime > accessed_since):
                        config_file_set.add(fpath)
        except Exception as e:
            logger.error('Error examining %s' % root_dir, exc_info=True)
            raise CrawlError(e)
        try:
            if discover_config_files:

                # Walk the directory hierarchy starting at 'root_dir' in BFS
                # order looking for config files.

                for (root_dirpath, dirs, files) in os.walk(root_dir):
                    dirs[:] = [os.path.join(root_dirpath, d) for d in
                               dirs]
                    dirs[:] = [d for d in dirs
                               if not re.match(exclude_regex, d)]
                    files = [os.path.join(root_dirpath, f) for f in
                             files]
                    files = [f for f in files
                             if not re.match(exclude_regex, f)]
                    for fpath in files:
                        if os.path.exists(fpath) \
                                and self.is_config_file(fpath):
                            lstat = os.lstat(fpath)
                            if lstat.st_atime > accessed_since \
                                    or lstat.st_ctime > accessed_since:
                                config_file_set.add(fpath)
        except Exception as e:
            logger.error('Error examining %s' % root_dir, exc_info=True)
            raise CrawlError(e)
        try:
            for fpath in config_file_set:
                try:
                    (_, fname) = os.path.split(fpath)
                    frelpath = fpath.replace(root_dir, root_dir_alias,
                                             1)  # root_dir relative path

            # Copy this config_file into / before reading it, so we
            # don't change its atime attribute.

                    (th, temppath) = tempfile.mkstemp(prefix='config.',
                                                      dir='/')
                    os.close(th)
                    shutil.copyfile(fpath, temppath)
                    with codecs.open(filename=fpath, mode='r',
                                     encoding='utf-8', errors='ignore') as \
                            config_file:

                        # Encode the contents of config_file as utf-8.

                        yield (frelpath, ConfigFeature(fname,
                                                       config_file.read(),
                                                       frelpath))
                    os.remove(temppath)
                except IOError as e:
                    raise CrawlError(e)
                except Exception as e:
                    logger.error('Error crawling config file %s'
                                 % fpath, exc_info=True)
                    raise CrawlError(e)
        except Exception as e:
            logger.error('Error examining %s' % root_dir, exc_info=True)
            raise CrawlError(e)
Ejemplo n.º 24
0
    def _crawl_os(self, mountpoint=None):

        assert(self.crawl_mode is not Modes.OUTCONTAINER)

        logger.debug('Crawling OS')
        if self.crawl_mode == Modes.INVM:
            logger.debug('Using in-VM state information (crawl mode: ' +
                         self.crawl_mode + ')')
            feature_key = platform.system().lower()

            try:
                ips = misc.get_host_ip4_addresses()
            except Exception as e:
                ips = 'unknown'
            try:
                distro = platform.linux_distribution()[0]
            except Exception as e:
                distro = 'unknown'
            try:
                osname = platform.platform()
            except Exception as e:
                osname = 'unknown'

            boot_time = (
                psutil.boot_time() if hasattr(
                    psutil, 'boot_time') else psutil.BOOT_TIME)
            uptime = int(time.time()) - boot_time
            feature_attributes = OSFeature(
                boot_time,
                uptime,
                ips,
                distro,
                osname,
                platform.machine(),
                platform.release(),
                platform.system().lower(),
                platform.version(),
            )
        elif self.crawl_mode == Modes.MOUNTPOINT:
            logger.debug('Using disk image information (crawl mode: ' +
                         self.crawl_mode + ')')
            feature_key = \
                platform_outofband.system(prefix=mountpoint).lower()
            feature_attributes = OSFeature(  # boot time unknown for img
                                             # live IP unknown for img
                'unsupported',
                '0.0.0.0',
                platform_outofband.linux_distribution(
                    prefix=mountpoint)[0],
                platform_outofband.platform(prefix=mountpoint),
                platform_outofband.machine(prefix=mountpoint),
                platform_outofband.release(prefix=mountpoint),
                platform_outofband.system(prefix=mountpoint).lower(),
                platform_outofband.version(prefix=mountpoint),
            )
        elif self.crawl_mode == Modes.OUTVM:

            (domain_name, kernel_version, distro, arch) = self.vm
            from psvmi import system_info
            sys = system_info(domain_name, kernel_version, distro, arch)
            uptime = int(time.time()) - sys.boottime
            feature_attributes = OSFeature(
                sys.boottime,
                sys.ipaddr,
                sys.osdistro,
                sys.osname,
                sys.osplatform,
                sys.osrelease,
                sys.ostype,
                sys.osversion,
            )
            feature_key = sys.ostype
        else:
            raise NotImplementedError()
        try:
            yield (feature_key, feature_attributes)
        except Exception as e:
            logger.error('Error crawling OS', exc_info=True)
            raise CrawlError(e)
Ejemplo n.º 25
0
    def _crawl_packages(self, dbpath=None, root_dir='/'):

        # package attributes: ["installed", "name", "size", "version"]

        (installtime, name, version, size) = (None, None, None, None)

        if self.crawl_mode == Modes.INVM:

            logger.debug('Using in-VM state information (crawl mode: ' +
                         self.crawl_mode + ')')
            system_type = platform.system().lower()
            distro = platform.linux_distribution()[0].lower()
            reload_needed = False
        elif self.crawl_mode == Modes.OUTCONTAINER:

            logger.debug('Using outcontainer state information (crawl mode: ' +
                         self.crawl_mode + ')')

            # XXX assuming containers will always run in linux

            system_type = 'linux'

            # The package manager will be discovered after checking for the
            # existence of /var/lib/dpkg or /ar/lib/rpm

            distro = ''

            reload_needed = True
        elif self.crawl_mode == Modes.MOUNTPOINT:
            logger.debug('Using disk image information (crawl mode: ' +
                         self.crawl_mode + ')')
            system_type = \
                platform_outofband.system(prefix=root_dir).lower()
            distro = platform_outofband.linux_distribution(prefix=root_dir)[
                0].lower()
            reload_needed = False
        else:
            raise NotImplementedError('Unsupported crawl mode')

        installed_since = self.feature_epoch
        if system_type != 'linux':
            # Package feature is only valid for Linux platforms.

            raise StopIteration()
        logger.debug('Crawling Packages')

        pkg_manager = 'unknown'
        if distro in ['ubuntu', 'debian']:
            pkg_manager = 'dpkg'
        elif distro.startswith('red hat') or distro in ['redhat',
                                                        'fedora', 'centos']:
            pkg_manager = 'rpm'
        elif os.path.exists(os.path.join(root_dir, 'var/lib/dpkg')):
            pkg_manager = 'dpkg'
        elif os.path.exists(os.path.join(root_dir, 'var/lib/rpm')):
            pkg_manager = 'rpm'

        try:
            if pkg_manager == 'dpkg':
                if not dbpath:
                    dbpath = 'var/lib/dpkg'
                for (key, feature) in get_dpkg_packages(
                        root_dir, dbpath, installed_since):
                    yield (key, feature)
            elif pkg_manager == 'rpm':
                if not dbpath:
                    dbpath = 'var/lib/rpm'
                for (key, feature) in get_rpm_packages(
                        root_dir, dbpath, installed_since, reload_needed):
                    yield (key, feature)
            else:
                logger.warning('Unsupported package manager for Linux distro')
        except Exception as e:
            logger.error('Error crawling package %s'
                         % ((name if name else 'Unknown')),
                         exc_info=True)
            raise CrawlError(e)
Ejemplo n.º 26
0
 def _crawl_test_crash(self):
     raise CrawlError("oops")