def run_as_another_namespace(pid, namespaces, function, *args, **kwargs): hack_to_pre_load_modules() context = ProcessContext(pid, namespaces) context.attach() queue = multiprocessing.Queue(2**15) try: child_process = multiprocessing.Process(name='crawler-%s' % pid, target=function_wrapper, args=(queue, function, args), kwargs=kwargs) child_process.start() except OSError: queue.close() raise CrawlError() child_exception = None try: (result, child_exception) = queue.get(timeout=IN_CONTAINER_TIMEOUT) except Queue.Empty: child_exception = CrawlTimeoutError() except Exception: result = None if child_exception: result = None child_process.join(IN_CONTAINER_TIMEOUT) # The join failed and the process might still be alive if child_process.is_alive(): errmsg = ('Timed out waiting for process %d to exit.' % child_process.pid) queue.close() os.kill(child_process.pid, 9) context.detach() logger.error(errmsg) raise CrawlTimeoutError(errmsg) context.detach() if result is None: if child_exception: raise child_exception raise CrawlError('Unknown crawl error.') return result
def _crawl_load(self): assert(self.crawl_mode is not Modes.OUTCONTAINER) logger.debug('Crawling system load') feature_key = 'load' try: shortterm = os.getloadavg()[0] except Exception as e: shortterm = 'unknown' try: midterm = os.getloadavg()[1] except Exception as e: midterm = 'unknown' try: longterm = os.getloadavg()[2] except Exception as e: longterm = 'unknown' feature_attributes = LoadFeature(shortterm, midterm, longterm) try: yield (feature_key, feature_attributes) except Exception as e: logger.error('Error crawling load', exc_info=True) raise CrawlError(e)
def _crawl_packages(self, dbpath=None, root_dir='/'): # package attributes: ["installed", "name", "size", "version"] logger.debug('Crawling Packages') if self.crawl_mode in (Modes.INVM, Modes.MOUNTPOINT): reload_needed = False elif self.crawl_mode == Modes.OUTCONTAINER: reload_needed = True else: raise NotImplementedError('Unsupported crawl mode') installed_since = self.feature_epoch pkg_manager = self._get_package_manager(root_dir) try: if pkg_manager == 'dpkg': dbpath = dbpath or 'var/lib/dpkg' for (key, feature) in get_dpkg_packages(root_dir, dbpath, installed_since): yield (key, feature) elif pkg_manager == 'rpm': dbpath = dbpath or 'var/lib/rpm' for (key, feature) in get_rpm_packages(root_dir, dbpath, installed_since, reload_needed): yield (key, feature) else: logger.warning('Unsupported package manager for Linux distro') except Exception as e: logger.error('Error crawling packages', exc_info=True) raise CrawlError(e)
def _crawl_metrics(self): assert(self.crawl_mode is not Modes.OUTCONTAINER) created_since = 0 logger.debug('Crawling Metrics') for p in psutil.process_iter(): create_time = ( p.create_time() if hasattr( p.create_time, '__call__') else p.create_time) if create_time <= created_since: continue try: name = (p.name() if hasattr(p.name, '__call__' ) else p.name) pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) status = (p.status() if hasattr(p.status, '__call__' ) else p.status) if status == psutil.STATUS_ZOMBIE: continue username = ( p.username() if hasattr( p.username, '__call__') else p.username) meminfo = ( p.get_memory_info() if hasattr( p.get_memory_info, '__call__') else p.memory_info) ioinfo = ( p.get_io_counters() if hasattr( p.get_io_counters, '__call__') else p.io_counters) cpu_percent = ( p.get_cpu_percent( interval=0) if hasattr( p.get_cpu_percent, '__call__') else p.cpu_percent) memory_percent = ( p.get_memory_percent() if hasattr( p.get_memory_percent, '__call__') else p.memory_percent) feature_key = '{0}/{1}'.format(name, pid) yield (feature_key, MetricFeature( round(cpu_percent, 2), round(memory_percent, 2), name, pid, ioinfo.read_bytes, meminfo.rss, str(status), username, meminfo.vms, ioinfo.write_bytes, )) except Exception as e: logger.error('Error crawling metric for process %s' % pid, exc_info=True) raise CrawlError(e)
def crawl_packages(dbpath=None, root_dir='/', installed_since=0, reload_needed=True): # package attributes: ["installed", "name", "size", "version"] logger.debug('Crawling Packages') pkg_manager = _get_package_manager(root_dir) try: if pkg_manager == 'dpkg': dbpath = dbpath or 'var/lib/dpkg' for (key, feature) in get_dpkg_packages(root_dir, dbpath, installed_since): yield (key, feature, 'package') elif pkg_manager == 'rpm': dbpath = dbpath or 'var/lib/rpm' for (key, feature) in get_rpm_packages(root_dir, dbpath, installed_since, reload_needed): yield (key, feature, 'package') else: logger.warning('Unsupported package manager for Linux distro') except Exception as e: logger.error('Error crawling packages', exc_info=True) raise CrawlError(e)
def _crawl_files( self, root_dir='/', exclude_dirs=['proc', 'mnt', 'dev', 'tmp'], root_dir_alias=None, ): root_dir=str(root_dir) assert(self.crawl_mode is not Modes.OUTCONTAINER) accessed_since = self.feature_epoch saved_args = locals() logger.debug('crawl_files: %s' % (saved_args)) if self.crawl_mode in [Modes.INVM, Modes.MOUNTPOINT]: try: assert os.path.isdir(root_dir) if root_dir_alias is None: root_dir_alias = root_dir exclude_dirs = [os.path.join(root_dir, d) for d in exclude_dirs] exclude_regex = r'|'.join([fnmatch.translate(d) for d in exclude_dirs]) or r'$.' # walk the directory hierarchy starting at 'root_dir' in BFS # order feature = self._crawl_file(root_dir, root_dir, root_dir_alias) if feature and (feature.ctime > accessed_since or feature.atime > accessed_since): yield (feature.path, feature) for (root_dirpath, dirs, files) in os.walk(root_dir): dirs[:] = [os.path.join(root_dirpath, d) for d in dirs] dirs[:] = [d for d in dirs if not re.match(exclude_regex, d)] files = [os.path.join(root_dirpath, f) for f in files] files = [f for f in files if not re.match(exclude_regex, f)] for fpath in files: feature = self._crawl_file(root_dir, fpath, root_dir_alias) if feature and (feature.ctime > accessed_since or feature.atime > accessed_since): yield (feature.path, feature) for fpath in dirs: feature = self._crawl_file(root_dir, fpath, root_dir_alias) if feature and (feature.ctime > accessed_since or feature.atime > accessed_since): yield (feature.path, feature) except Exception as e: logger.error('Error crawling root_dir %s' % root_dir, exc_info=True) raise CrawlError(e)
def crawl_dockerinspect(self): logger.debug('Crawling docker inspect') long_id = self.container.long_id try: inspect = dockerutils.exec_dockerinspect(long_id) yield (long_id, inspect) except Exception as e: logger.error('Error crawling docker inspect', exc_info=True) raise CrawlError(e)
def crawl_dockerhistory(self): logger.debug('Crawling docker history') long_id = self.container.long_id try: history = dockerutils.exec_docker_history(long_id) image_id = history[0]['Id'] yield (image_id, {'history': history}) except Exception as e: logger.error('Error crawling docker history', exc_info=True) raise CrawlError(e)
def _crawl_interface_counters(self): logger.debug('Crawling interface information') for ifname in psutil.net_io_counters(pernic=True): try: interface = psutil.net_io_counters(pernic=True)[ifname] except: continue try: bytes_sent = interface.bytes_sent except Exception as e: bytes_sent = 'unknown' try: bytes_recv = interface.bytes_recv except Exception as e: bytes_recv = 'unknown' try: packets_sent = interface.packets_sent except Exception as e: packets_sent = 'unknown' try: packets_recv = interface.packets_recv except Exception as e: packets_recv = 'unknown' try: errout = interface.errout except Exception as e: errout = 'unknown' try: errin = interface.errin except Exception as e: errin = 'unknown' curr_count = [ bytes_sent, bytes_recv, packets_sent, packets_recv, errout, errin, ] try: yield (ifname, curr_count) except Exception as e: logger.error('Error crawling interface information', exc_info=True) raise CrawlError(e)
def crawl_cpu(self, per_cpu=False): logger.debug('Crawling cpu information') if self.crawl_mode == Modes.INVM: for key, feature in self._crawl_cpu_invm(): yield (key, feature) elif self.crawl_mode == Modes.OUTCONTAINER: try: for key, feature in self._crawl_cpu_outcontainer(per_cpu): yield (key, feature) except Exception as e: logger.error('Error crawling cpu information', exc_info=True) raise CrawlError(e) else: raise NotImplementedError('Unsupported crawl mode')
def _crawl_file( self, root_dir, fpath, root_dir_alias, ): try: lstat = os.lstat(fpath) fmode = lstat.st_mode fperm = self._fileperm(fmode) ftype = self._filetype(fpath, fperm) flinksto = None if ftype == 'link': try: # This has to be an absolute path, not a root-relative path flinksto = os.readlink(fpath) except: logger.error('Error reading linksto info for file %s' % fpath, exc_info=True) fgroup = lstat.st_gid fuser = lstat.st_uid # root_dir relative path frelpath = fpath.replace(root_dir, root_dir_alias, 1) (_, fname) = os.path.split(frelpath) return FileFeature( lstat.st_atime, lstat.st_ctime, fgroup, flinksto, fmode, lstat.st_mtime, fname, frelpath, lstat.st_size, ftype, fuser, ) except Exception as e: logger.error('Error crawling file %s' % fpath, exc_info=True) raise CrawlError(e)
def crawl_dockerps(self): assert(self.crawl_mode == Modes.INVM) logger.debug('Crawling docker ps results') try: for inspect in dockerutils.exec_dockerps(): yield (inspect['Id'], DockerPSFeature._make([ inspect['State']['Running'], 0, inspect['Image'], [], inspect['Config']['Cmd'], inspect['Name'], inspect['Id'], ])) except Exception as e: logger.error('Error crawling docker ps', exc_info=True) raise CrawlError(e)
def _crawl_disk_partitions(self): assert(self.crawl_mode is not Modes.OUTCONTAINER) logger.debug('Crawling Disk partitions') for partition in psutil.disk_partitions(): try: pdiskusage = psutil.disk_usage(partition.mountpoint) yield (partition.mountpoint, DiskFeature( partition.device, 100.0 - pdiskusage.percent, partition.fstype, partition.mountpoint, partition.opts, pdiskusage.total, )) except Exception as e: logger.error('Error crawling disk partition %s' % partition.mountpoint, exc_info=True) raise CrawlError(e)
def crawl_memory(self): # memory attributes: ["used", "buffered", "cached", "free"] logger.debug('Crawling memory') feature_key = 'memory' if self.crawl_mode == Modes.INVM: vm = psutil.virtual_memory() if (vm.free + vm.used) > 0: util_percentage = float(vm.used) / (vm.free + vm.used) * 100.0 else: util_percentage = 'unknown' feature_attributes = MemoryFeature(vm.used, vm.buffers, vm.cached, vm.free, util_percentage) elif self.crawl_mode == Modes.OUTVM: if psvmi is None: raise NotImplementedError() else: sysmem = psvmi.system_memory_info(self.get_vm_context()) feature_attributes = MemoryFeature( sysmem.memory_used, sysmem.memory_buffered, sysmem.memory_cached, sysmem.memory_free, (sysmem.memory_used * 100 / (sysmem.memory_used + sysmem.memory_free))) elif self.crawl_mode == Modes.OUTCONTAINER: try: feature_attributes = self._crawl_memory_outcontainer() except Exception as e: logger.error('Error crawling memory', exc_info=True) raise CrawlError(e) else: raise NotImplementedError('Unsupported crawl mode') yield (feature_key, feature_attributes)
def run_as_another_process(function, _args=(), _kwargs={}): try: queue = multiprocessing.Queue(2**15) except OSError: # try again with a smaller queue queue = multiprocessing.Queue(2**14) child_process = multiprocessing.Process(target=_function_wrapper, args=(queue, function), kwargs={ '_args': _args, '_kwargs': _kwargs }) child_process.start() child_exception, result = None, None try: (result, child_exception) = queue.get(timeout=IN_PROCESS_TIMEOUT) except Queue.Empty: child_exception = CrawlTimeoutError() except Exception as exc: logger.warn(exc) child_process.join(IN_PROCESS_TIMEOUT) # The join failed and the process might still be alive if child_process.is_alive(): errmsg = ('Timed out waiting for process %d to exit.' % child_process.pid) queue.close() os.kill(child_process.pid, 9) logger.error(errmsg) raise CrawlTimeoutError(errmsg) if result is None: if child_exception: raise child_exception raise CrawlError('Unknown crawl error.') return result
def crawl_dockerps(self): assert(self.crawl_mode == Modes.INVM) logger.debug('Crawling docker ps results') try: for inspect in dockerutils.exec_dockerps(): long_id = inspect['Id'] state = inspect['State'] running = state['Running'] image = inspect['Image'] names = inspect['Name'] cmd = inspect['Config']['Cmd'] yield (long_id, DockerPSFeature._make([ running, 0, image, [], cmd, names, long_id, ])) except Exception as e: logger.error('Error crawling docker ps', exc_info=True) raise CrawlError(e)
def crawl_cpu(self, per_cpu=False): logger.debug('Crawling cpu information') if self.crawl_mode not in [ Modes.INVM, Modes.OUTCONTAINER, Modes.OUTVM]: logger.error('Unsupported crawl mode: ' + self.crawl_mode + '. Returning unknown memory key and attributes.' ) feature_attributes = CpuFeature( 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', 'unknown', ) host_cpu_feature = {} if self.crawl_mode in [Modes.INVM, Modes.OUTCONTAINER]: for (index, cpu) in \ enumerate(psutil.cpu_times_percent(percpu=True)): try: idle = cpu.idle except Exception as e: idle = 'unknown' try: nice = cpu.nice except Exception as e: nice = 'unknown' try: user = cpu.user except Exception as e: user = '******' try: wait = cpu.iowait except Exception as e: wait = 'unknown' try: system = cpu.system except Exception as e: system = 'unknown' try: interrupt = cpu.irq except Exception as e: interrupt = 'unknown' try: steal = cpu.steal except Exception as e: steal = 'unknown' used = 100 - int(idle) feature_key = '{0}-{1}'.format('cpu', index) feature_attributes = CpuFeature( idle, nice, user, wait, system, interrupt, steal, used, ) host_cpu_feature[index] = feature_attributes if self.crawl_mode == Modes.INVM: try: yield (feature_key, feature_attributes) except Exception as e: logger.error('Error crawling cpu information', exc_info=True) raise CrawlError(e) if self.crawl_mode == Modes.OUTCONTAINER: if per_cpu: stat_file_name = 'cpuacct.usage_percpu' else: stat_file_name = 'cpuacct.usage' container = self.container try: (cpu_usage_t1, prev_time) = \ self._get_prev_container_cpu_times(container.long_id) if cpu_usage_t1: logger.debug('Using previous cpu times for container %s' % container.long_id) interval = time.time() - prev_time if not cpu_usage_t1 or interval == 0: logger.debug( 'There are no previous cpu times for container %s ' 'so we will be sleeping for 100 milliseconds' % container.long_id) with open(container.get_cpu_cgroup_path(stat_file_name), 'r') as f: cpu_usage_t1 = f.readline().strip().split(' ') interval = 0.1 # sleep for 100ms time.sleep(interval) with open(container.get_cpu_cgroup_path(stat_file_name), 'r') as f: cpu_usage_t2 = f.readline().strip().split(' ') # Store the cpu times for the next crawl self._save_container_cpu_times(container.long_id, cpu_usage_t2) except Exception as e: logger.error('Error crawling cpu information', exc_info=True) raise CrawlError(e) cpu_user_system = {} try: path = container.get_cpu_cgroup_path('cpuacct.stat') with open(path, 'r') as f: for line in f: m = re.search(r"(system|user)\s+(\d+)", line) if m: cpu_user_system[m.group(1)] = \ float(m.group(2)) except Exception as e: logger.error('Error crawling cpu information', exc_info=True) raise CrawlError(e) for (index, cpu_usage_ns) in enumerate(cpu_usage_t1): usage_secs = (float(cpu_usage_t2[index]) - float(cpu_usage_ns)) / float(1e9) # Interval is never 0 because of step 0 (forcing a sleep) usage_percent = usage_secs / interval * 100.0 if usage_percent > 100.0: usage_percent = 100.0 idle = 100.0 - usage_percent # Approximation 1 user_plus_sys_hz = cpu_user_system['user'] \ + cpu_user_system['system'] if user_plus_sys_hz == 0: # Fake value to avoid divide by zero. user_plus_sys_hz = 0.1 user = usage_percent * (cpu_user_system['user'] / user_plus_sys_hz) system = usage_percent * (cpu_user_system['system'] / user_plus_sys_hz) # Approximation 2 nice = host_cpu_feature[index][1] wait = host_cpu_feature[index][3] interrupt = host_cpu_feature[index][5] steal = host_cpu_feature[index][6] feature_key = '{0}-{1}'.format('cpu', index) feature_attributes = CpuFeature( idle, nice, user, wait, system, interrupt, steal, usage_percent, ) try: yield (feature_key, feature_attributes) except Exception as e: logger.error('Error crawling cpu information', exc_info=True) raise CrawlError(e)
def crawl_memory(self): # memory attributes: ["used", "buffered", "cached", "free"] logger.debug('Crawling memory') feature_key = 'memory' if self.crawl_mode == Modes.INVM: try: used = psutil.virtual_memory().used except Exception as e: used = 'unknown' try: buffered = psutil.virtual_memory().buffers except Exception as e: buffered = 'unknown' try: cached = psutil.virtual_memory().cached except Exception as e: cached = 'unknown' try: free = psutil.virtual_memory().free except Exception as e: free = 'unknown' feature_attributes = MemoryFeature(used, buffered, cached, free) elif self.crawl_mode == Modes.OUTVM: (domain_name, kernel_version, distro, arch) = self.vm from psvmi import system_info sys = system_info(domain_name, kernel_version, distro, arch) feature_attributes = MemoryFeature( sys.memory_used, sys.memory_buffered, sys.memory_cached, sys.memory_free) elif self.crawl_mode == Modes.OUTCONTAINER: used = buffered = cached = free = 'unknown' try: with open(self.container.get_memory_cgroup_path('memory.stat' ), 'r') as f: for line in f: (key, value) = line.strip().split(' ') if key == 'total_cache': cached = int(value) if key == 'total_active_file': buffered = int(value) with open(self.container.get_memory_cgroup_path( 'memory.limit_in_bytes'), 'r') as f: limit = int(f.readline().strip()) with open(self.container.get_memory_cgroup_path( 'memory.usage_in_bytes'), 'r') as f: used = int(f.readline().strip()) host_free = psutil.virtual_memory().free container_total = used + min(host_free, limit - used) free = container_total - used feature_attributes = MemoryFeature(used, buffered, cached, free) except Exception as e: logger.error('Error crawling memory', exc_info=True) raise CrawlError(e) else: logger.error('Unsupported crawl mode: ' + self.crawl_mode + '. Returning unknown memory key and attributes.' ) feature_attributes = MemoryFeature('unknown', 'unknown', 'unknown', 'unknown') try: yield (feature_key, feature_attributes) except Exception as e: logger.error('Error crawling memory', exc_info=True) raise CrawlError(e)
def _crawl_packages(self, dbpath=None, root_dir='/'): assert(self.crawl_mode is not Modes.OUTCONTAINER) # package attributes: ["installed", "name", "size", "version"] (installtime, name, version, size) = (None, None, None, None) if self.crawl_mode == Modes.INVM: logger.debug('Using in-VM state information (crawl mode: ' + self.crawl_mode + ')') system_type = platform.system().lower() distro = platform.linux_distribution()[0].lower() elif self.crawl_mode == Modes.MOUNTPOINT: logger.debug('Using disk image information (crawl mode: ' + self.crawl_mode + ')') system_type = \ platform_outofband.system(prefix=root_dir).lower() distro = platform_outofband.linux_distribution(prefix=root_dir)[ 0].lower() else: logger.error('Unsupported crawl mode: ' + self.crawl_mode + '. Skipping package crawl.') system_type = 'unknown' distro = 'unknown' installed_since = self.feature_epoch if system_type != 'linux' or (system_type == 'linux' and distro == ''): # Distro is blank for FROM scratch images # Package feature is only valid for Linux platforms. raise StopIteration() logger.debug('Crawling Packages') pkg_manager = 'unknown' if distro in ['ubuntu', 'debian']: pkg_manager = 'dpkg' elif distro.startswith('red hat') or distro in ['redhat', 'fedora', 'centos']: pkg_manager = 'rpm' elif os.path.exists(os.path.join(root_dir, 'var/lib/dpkg')): pkg_manager = 'dpkg' elif os.path.exists(os.path.join(root_dir, 'var/lib/rpm')): pkg_manager = 'rpm' try: if pkg_manager == 'dpkg': if not dbpath: dbpath = 'var/lib/dpkg' if os.path.isabs(dbpath): logger.warning( 'dbpath: ' + dbpath + ' is defined absolute. Ignoring prefix: ' + root_dir + '.') # Update for a different route. dbpath = os.path.join(root_dir, dbpath) if installed_since > 0: logger.warning( 'dpkg does not provide install-time, defaulting to ' 'all packages installed since epoch') try: dpkg = subprocess.Popen(['dpkg-query', '-W', '--admindir={0}'.format(dbpath), '-f=${Package}|${Version}' '|${Installed-Size}\n' ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) dpkglist = dpkg.stdout.read().strip('\n') except OSError as e: logger.error( 'Failed to launch dpkg query for packages. Check if ' 'dpkg-query is installed: [Errno: %d] ' % e.errno + e.strerror + ' [Exception: ' + type(e).__name__ + ']') dpkglist = None if dpkglist: for dpkginfo in dpkglist.split('\n'): (name, version, size) = dpkginfo.split(r'|') # dpkg does not provide any installtime field # feature_key = '{0}/{1}'.format(name, version) --> # changed to below per Suriya's request feature_key = '{0}'.format(name, version) yield (feature_key, PackageFeature(None, name, size, version)) elif pkg_manager == 'rpm': if not dbpath: dbpath = 'var/lib/rpm' if os.path.isabs(dbpath): logger.warning( 'dbpath: ' + dbpath + ' is defined absolute. Ignoring prefix: ' + root_dir + '.') # update for a different route dbpath = os.path.join(root_dir, dbpath) try: rpm = subprocess.Popen([ 'rpm', '--dbpath', dbpath, '-qa', '--queryformat', '%{installtime}|%{name}|%{version}' '-%{release}|%{size}\n', ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) rpmlist = rpm.stdout.read().strip('\n') except OSError as e: logger.error( 'Failed to launch rpm query for packages. Check if ' 'rpm is installed: [Errno: %d] ' % e.errno + e.strerror + ' [Exception: ' + type(e).__name__ + ']') rpmlist = None if rpmlist: for rpminfo in rpmlist.split('\n'): (installtime, name, version, size) = \ rpminfo.split(r'|') # if int(installtime) <= installed_since: --> this # barfs for sth like: 1376416422. Consider try: xxx # except ValueError: pass if installtime <= installed_since: continue # feature_key = '{0}/{1}'.format(name, version) --> # changed to below per Suriya's request feature_key = '{0}'.format(name, version) yield (feature_key, PackageFeature(installtime, name, size, version)) else: raise CrawlError( Exception( 'Unsupported package manager for Linux distro %s' % distro)) except Exception as e: logger.error('Error crawling package %s' % ((name if name else 'Unknown')), exc_info=True) raise CrawlError(e)
def crawl_memory(self): # memory attributes: ["used", "buffered", "cached", "free"] logger.debug('Crawling memory') feature_key = 'memory' if self.crawl_mode == Modes.INVM: vm = psutil.virtual_memory() if (vm.free + vm.used) > 0: util_percentage = float(vm.used) / (vm.free + vm.used) * 100.0 else: util_percentage = 'unknown' feature_attributes = MemoryFeature(vm.used, vm.buffers, vm.cached, vm.free, util_percentage) elif self.crawl_mode == Modes.OUTVM: (domain_name, kernel_version, distro, arch) = self.vm sys = system_info(domain_name, kernel_version, distro, arch) feature_attributes = MemoryFeature( sys.memory_used, sys.memory_buffered, sys.memory_cached, sys.memory_free, sys.memory_free / (sys.memory_used + sys.memory_buffered)) elif self.crawl_mode == Modes.OUTCONTAINER: used = buffered = cached = free = 'unknown' try: with open(self.container.get_memory_cgroup_path('memory.stat' ), 'r') as f: for line in f: (key, value) = line.strip().split(' ') if key == 'total_cache': cached = int(value) if key == 'total_active_file': buffered = int(value) with open(self.container.get_memory_cgroup_path( 'memory.limit_in_bytes'), 'r') as f: limit = int(f.readline().strip()) with open(self.container.get_memory_cgroup_path( 'memory.usage_in_bytes'), 'r') as f: used = int(f.readline().strip()) host_free = psutil.virtual_memory().free container_total = used + min(host_free, limit - used) free = container_total - used if 'unknown' not in [used, free] and (free + used) > 0: util_percentage = float(used) / (free + used) * 100.0 else: util_percentage = 'unknown' feature_attributes = MemoryFeature( used, buffered, cached, free, util_percentage) except Exception as e: logger.error('Error crawling memory', exc_info=True) raise CrawlError(e) else: raise NotImplementedError('Unsupported crawl mode') yield (feature_key, feature_attributes)
def _crawl_connections(self): assert(self.crawl_mode is not Modes.OUTCONTAINER) created_since = 0 logger.debug('Crawling Connections: since={0}'.format(created_since)) list = psutil.process_iter() for p in list: pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) status = (p.status() if hasattr(p.status, '__call__' ) else p.status) if status == psutil.STATUS_ZOMBIE: continue create_time = ( p.create_time() if hasattr( p.create_time, '__call__') else p.create_time) name = (p.name() if hasattr(p.name, '__call__') else p.name) if create_time <= created_since: continue try: for c in p.get_connections(): try: (localipaddr, localport) = c.laddr[:] except: # Older version of psutil uses local_address instead of # laddr. (localipaddr, localport) = c.local_address[:] try: if c.raddr: (remoteipaddr, remoteport) = c.raddr[:] else: (remoteipaddr, remoteport) = (None, None) except: # Older version of psutil uses remote_address instead # of raddr. if c.remote_address: (remoteipaddr, remoteport) = \ c.remote_address[:] else: (remoteipaddr, remoteport) = (None, None) feature_key = '{0}/{1}/{2}'.format(pid, localipaddr, localport) yield (feature_key, ConnectionFeature( localipaddr, localport, name, pid, remoteipaddr, remoteport, str(c.status), )) except Exception as e: logger.error('Error crawling connection for process %s' % pid, exc_info=True) raise CrawlError(e)
def _crawl_processes(self): created_since = 0 logger.debug('Crawling Processes: since={0}'.format(created_since)) list = psutil.process_iter() for p in list: create_time = ( p.create_time() if hasattr( p.create_time, '__call__') else p.create_time) if create_time > created_since: name = (p.name() if hasattr(p.name, '__call__' ) else p.name) cmdline = (p.cmdline() if hasattr(p.cmdline, '__call__' ) else p.cmdline) pid = (p.pid() if hasattr(p.pid, '__call__') else p.pid) status = (p.status() if hasattr(p.status, '__call__' ) else p.status) if status == psutil.STATUS_ZOMBIE: cwd = 'unknown' # invalid else: try: cwd = (p.cwd() if hasattr(p, 'cwd') and hasattr(p.cwd, '__call__') else p.getcwd()) except Exception as e: logger.error('Error crawling process %s for cwd' % pid, exc_info=True) cwd = 'unknown' ppid = (p.ppid() if hasattr(p.ppid, '__call__' ) else p.ppid) if (hasattr(p, 'num_threads') and hasattr(p.num_threads, '__call__')): num_threads = p.num_threads() else: num_threads = p.get_num_threads() try: username = (p.username() if hasattr(p, 'username') and hasattr(p.username, '__call__') else p.username) except Exception as e: logger.error('Error crawling process %s for username' % pid, exc_info=True) username = '******' try: openfiles = [] for f in p.get_open_files(): openfiles.append(f.path) openfiles.sort() feature_key = '{0}/{1}'.format(name, pid) yield (feature_key, ProcessFeature( str(' '.join(cmdline)), create_time, cwd, name, openfiles, pid, ppid, num_threads, username, )) except Exception as e: logger.error('Error crawling process %s' % pid, exc_info=True) raise CrawlError(e)
def _crawl_config_files( self, root_dir='/', exclude_dirs=['proc', 'mnt', 'dev', 'tmp'], root_dir_alias=None, known_config_files=[], discover_config_files=False, ): assert(self.crawl_mode is not Modes.OUTCONTAINER) saved_args = locals() logger.debug('Crawling config files: %s' % (saved_args)) accessed_since = self.feature_epoch try: assert os.path.isdir(root_dir) if root_dir_alias is None: root_dir_alias = root_dir exclude_dirs = [os.path.join(root_dir, d) for d in exclude_dirs] exclude_regex = r'|'.join([fnmatch.translate(d) for d in exclude_dirs]) or r'$.' known_config_files[:] = [os.path.join(root_dir, f) for f in known_config_files] known_config_files[:] = [f for f in known_config_files if not re.match(exclude_regex, f)] config_file_set = set() for fpath in known_config_files: if os.path.exists(fpath): lstat = os.lstat(fpath) if (lstat.st_atime > accessed_since or lstat.st_ctime > accessed_since): config_file_set.add(fpath) except Exception as e: logger.error('Error examining %s' % root_dir, exc_info=True) raise CrawlError(e) try: if discover_config_files: # Walk the directory hierarchy starting at 'root_dir' in BFS # order looking for config files. for (root_dirpath, dirs, files) in os.walk(root_dir): dirs[:] = [os.path.join(root_dirpath, d) for d in dirs] dirs[:] = [d for d in dirs if not re.match(exclude_regex, d)] files = [os.path.join(root_dirpath, f) for f in files] files = [f for f in files if not re.match(exclude_regex, f)] for fpath in files: if os.path.exists(fpath) \ and self.is_config_file(fpath): lstat = os.lstat(fpath) if lstat.st_atime > accessed_since \ or lstat.st_ctime > accessed_since: config_file_set.add(fpath) except Exception as e: logger.error('Error examining %s' % root_dir, exc_info=True) raise CrawlError(e) try: for fpath in config_file_set: try: (_, fname) = os.path.split(fpath) frelpath = fpath.replace(root_dir, root_dir_alias, 1) # root_dir relative path # Copy this config_file into / before reading it, so we # don't change its atime attribute. (th, temppath) = tempfile.mkstemp(prefix='config.', dir='/') os.close(th) shutil.copyfile(fpath, temppath) with codecs.open(filename=fpath, mode='r', encoding='utf-8', errors='ignore') as \ config_file: # Encode the contents of config_file as utf-8. yield (frelpath, ConfigFeature(fname, config_file.read(), frelpath)) os.remove(temppath) except IOError as e: raise CrawlError(e) except Exception as e: logger.error('Error crawling config file %s' % fpath, exc_info=True) raise CrawlError(e) except Exception as e: logger.error('Error examining %s' % root_dir, exc_info=True) raise CrawlError(e)
def _crawl_os(self, mountpoint=None): assert(self.crawl_mode is not Modes.OUTCONTAINER) logger.debug('Crawling OS') if self.crawl_mode == Modes.INVM: logger.debug('Using in-VM state information (crawl mode: ' + self.crawl_mode + ')') feature_key = platform.system().lower() try: ips = misc.get_host_ip4_addresses() except Exception as e: ips = 'unknown' try: distro = platform.linux_distribution()[0] except Exception as e: distro = 'unknown' try: osname = platform.platform() except Exception as e: osname = 'unknown' boot_time = ( psutil.boot_time() if hasattr( psutil, 'boot_time') else psutil.BOOT_TIME) uptime = int(time.time()) - boot_time feature_attributes = OSFeature( boot_time, uptime, ips, distro, osname, platform.machine(), platform.release(), platform.system().lower(), platform.version(), ) elif self.crawl_mode == Modes.MOUNTPOINT: logger.debug('Using disk image information (crawl mode: ' + self.crawl_mode + ')') feature_key = \ platform_outofband.system(prefix=mountpoint).lower() feature_attributes = OSFeature( # boot time unknown for img # live IP unknown for img 'unsupported', '0.0.0.0', platform_outofband.linux_distribution( prefix=mountpoint)[0], platform_outofband.platform(prefix=mountpoint), platform_outofband.machine(prefix=mountpoint), platform_outofband.release(prefix=mountpoint), platform_outofband.system(prefix=mountpoint).lower(), platform_outofband.version(prefix=mountpoint), ) elif self.crawl_mode == Modes.OUTVM: (domain_name, kernel_version, distro, arch) = self.vm from psvmi import system_info sys = system_info(domain_name, kernel_version, distro, arch) uptime = int(time.time()) - sys.boottime feature_attributes = OSFeature( sys.boottime, sys.ipaddr, sys.osdistro, sys.osname, sys.osplatform, sys.osrelease, sys.ostype, sys.osversion, ) feature_key = sys.ostype else: raise NotImplementedError() try: yield (feature_key, feature_attributes) except Exception as e: logger.error('Error crawling OS', exc_info=True) raise CrawlError(e)
def _crawl_packages(self, dbpath=None, root_dir='/'): # package attributes: ["installed", "name", "size", "version"] (installtime, name, version, size) = (None, None, None, None) if self.crawl_mode == Modes.INVM: logger.debug('Using in-VM state information (crawl mode: ' + self.crawl_mode + ')') system_type = platform.system().lower() distro = platform.linux_distribution()[0].lower() reload_needed = False elif self.crawl_mode == Modes.OUTCONTAINER: logger.debug('Using outcontainer state information (crawl mode: ' + self.crawl_mode + ')') # XXX assuming containers will always run in linux system_type = 'linux' # The package manager will be discovered after checking for the # existence of /var/lib/dpkg or /ar/lib/rpm distro = '' reload_needed = True elif self.crawl_mode == Modes.MOUNTPOINT: logger.debug('Using disk image information (crawl mode: ' + self.crawl_mode + ')') system_type = \ platform_outofband.system(prefix=root_dir).lower() distro = platform_outofband.linux_distribution(prefix=root_dir)[ 0].lower() reload_needed = False else: raise NotImplementedError('Unsupported crawl mode') installed_since = self.feature_epoch if system_type != 'linux': # Package feature is only valid for Linux platforms. raise StopIteration() logger.debug('Crawling Packages') pkg_manager = 'unknown' if distro in ['ubuntu', 'debian']: pkg_manager = 'dpkg' elif distro.startswith('red hat') or distro in ['redhat', 'fedora', 'centos']: pkg_manager = 'rpm' elif os.path.exists(os.path.join(root_dir, 'var/lib/dpkg')): pkg_manager = 'dpkg' elif os.path.exists(os.path.join(root_dir, 'var/lib/rpm')): pkg_manager = 'rpm' try: if pkg_manager == 'dpkg': if not dbpath: dbpath = 'var/lib/dpkg' for (key, feature) in get_dpkg_packages( root_dir, dbpath, installed_since): yield (key, feature) elif pkg_manager == 'rpm': if not dbpath: dbpath = 'var/lib/rpm' for (key, feature) in get_rpm_packages( root_dir, dbpath, installed_since, reload_needed): yield (key, feature) else: logger.warning('Unsupported package manager for Linux distro') except Exception as e: logger.error('Error crawling package %s' % ((name if name else 'Unknown')), exc_info=True) raise CrawlError(e)
def _crawl_test_crash(self): raise CrawlError("oops")