def test_failed_containers_crawler_with_ignore_failure(self, *args): crawler = ContainersCrawler(features=['os']) frames = list(crawler.crawl()) # defaults to ignore_plugin_exception namespaces = sorted([f.metadata['namespace'] for f in frames]) assert namespaces == sorted(['aaa', 'errorid', 'ccc']) features_count = sorted([f.num_features for f in frames]) assert features_count == sorted([3, 2, 3]) system_types = [f.metadata['system_type'] for f in frames] assert system_types == ['container', 'container', 'container'] assert args[0].call_count == 1 assert args[1].call_count == 1
def test_containers_crawler(self, *args): crawler = ContainersCrawler(features=['os']) frames = list(crawler.crawl()) namespaces = sorted([f.metadata['namespace'] for f in frames]) assert namespaces == sorted(['aaa', 'bbb', 'ccc']) features_count = sorted([f.num_features for f in frames]) assert features_count == sorted([2, 2, 2]) system_types = sorted([f.metadata['system_type'] for f in frames]) assert system_types == sorted(['container', 'container', 'container']) assert args[0].call_count == 1 assert args[1].call_count == 1
def testCrawlContainer1(self): crawler = ContainersCrawler( features=['cpu', 'memory', 'interface', 'package'], environment='kubernetes') frames = list(crawler.crawl()) output = str(frames[0]) print output # only printed if the test fails assert 'interface-lo' in output assert 'if_octets_tx=' in output assert 'cpu-0' in output assert 'cpu_nice=' in output assert 'memory' in output assert 'memory_buffered=' in output assert 'apt' in output assert 'pkgarchitecture=' in output
def testCrawlContainer1(self): crawler = ContainersCrawler( features=[ 'cpu', 'memory', 'interface', 'package']) frames = list(crawler.crawl()) output = str(frames[0]) print output # only printed if the test fails assert 'interface-lo' in output assert 'if_octets_tx=' in output assert 'cpu-0' in output assert 'cpu_nice=' in output assert 'memory' in output assert 'memory_buffered=' in output assert 'apt' in output assert 'pkgarchitecture=' in output
def testCrawlContainerAvoidSetns(self): options = {'avoid_setns': True} crawler = ContainersCrawler( user_list=self.container['Id'], features=['cpu', 'memory', 'interface', 'package'], options=options) frames = list(crawler.crawl()) output = str(frames[0]) print output # only printed if the test fails # interface in avoid_setns mode is not supported #assert 'interface-lo' in output #assert 'if_octets_tx=' in output assert 'cpu-0' in output assert 'cpu_nice=' in output assert 'memory' in output assert 'memory_buffered=' in output assert 'apt' in output assert 'pkgarchitecture=' in output
def testCrawlContainerKafka2(self): emitters = EmittersManager(urls=['kafka://localhost:9092/test']) crawler = ContainersCrawler( features=['os', 'process'], user_list=self.container['Id']) worker = Worker(emitters=emitters, frequency=-1, crawler=crawler) worker.iterate() kafka = pykafka.KafkaClient(hosts='localhost:9092') topic = kafka.topics['test'] consumer = topic.get_simple_consumer() message = consumer.consume() assert '"cmd":"/bin/sleep 60"' in message.value for i in range(1, 5): worker.iterate() message = consumer.consume() assert '"cmd":"/bin/sleep 60"' in message.value
def test_failed_containers_crawler(self, *args): crawler = ContainersCrawler(features=['os']) with self.assertRaises(OSError): frames = list(crawler.crawl(ignore_plugin_exception=False)) assert args[0].call_count == 1 assert args[1].call_count == 1
def main(): euid = os.geteuid() if euid != 0: print 'Need to run this as root.' exit(1) parser = argparse.ArgumentParser() parser.add_argument( '--options', dest='options', type=json_parser, default={}, help='JSON dict of crawler options used to be passed as arguments' 'to the crawler plugins.' ) parser.add_argument( '--url', dest='url', type=csv_list, default=['stdout://'], help='Send the snapshot data to URL. Defaults to the console.', ) parser.add_argument( '--namespace', dest='namespace', type=str, nargs='?', default=misc.get_host_ipaddr(), help='Data source this crawler is associated with. Defaults to ' '/localhost', ) parser.add_argument( '--features', dest='features', type=csv_list, default=['os', 'cpu'], help='Comma-separated list of feature-types to crawl. Defaults to ' 'os,cpu', ) parser.add_argument( '--frequency', dest='frequency', type=int, default=-1, help='Target time period for iterations. Defaults to -1 which ' 'means only run one iteration.' ) parser.add_argument( '--compress', dest='compress', action='store_true', default=False, help='Whether to GZIP-compress the output frame data, must be one of ' '{true,false}. Defaults to false', ) parser.add_argument('--logfile', dest='logfile', type=str, default='crawler.log', help='Logfile path. Defaults to crawler.log' ) parser.add_argument( '--crawlmode', dest='crawlmode', type=str, choices=[ Modes.INVM, Modes.OUTVM, Modes.MOUNTPOINT, Modes.OUTCONTAINER, Modes.MESOS, ], default=Modes.INVM, help='The crawler mode: ' '{INVM,OUTVM,MOUNTPOINT,OUTCONTAINER}. ' 'Defaults to INVM', ) parser.add_argument( '--mountpoint', dest='mountpoint', type=str, default='/', help='Mountpoint location used as the / for features like packages,' 'files, config' ) parser.add_argument( '--format', dest='format', type=str, default='csv', choices=['csv', 'graphite', 'json', 'logstash'], help='Emitted data format.', ) parser.add_argument( '--crawlContainers', dest='crawlContainers', type=str, nargs='?', default='ALL', help='List of containers to crawl as a list of Docker container IDs' '(only Docker is supported at the moment). ' 'Defaults to all ' 'running containers. Example: --crawlContainers aaa,bbb', ) parser.add_argument( '--crawlVMs', dest='vm_descs_list', nargs='+', default='ALL', help='List of VMs to crawl' 'Default is \'ALL\' VMs' 'Currently need following as input for each VM' '\'vm_name, kernel_version_long, linux_flavour, arch\'' 'Auto kernel version detection in future, when only vm names' '(\'ALL\' by default) would need to be passed' 'Example --crawlVM' 'vm1,3.13.0-24-generic_3.13.0-24.x86_64,ubuntu,x86_64' 'vm2,4.0.3.x86_64,vanilla,x86_64', ) parser.add_argument( '--environment', dest='environment', type=str, default='cloudsight', help='This speficies some environment specific behavior, like how ' 'to name a container. The way to add a new behavior is by ' 'implementing a plugin (see plugins/cloudsight_environment.py ' 'as an example. Defaults to "cloudsight".', ) parser.add_argument( '--plugins', dest='plugin_places', type=csv_list, default=['plugins'], help='This is a comma separated list of directories where to find ' 'plugins. Each path can be an absolute, or a relative to the ' 'location of the crawler.py. Default is "plugins"', ) parser.add_argument( '--numprocesses', dest='numprocesses', type=int, default=1, help='Number of processes used for container crawling. Defaults ' 'to the number of cores. NOT SUPPORTED.' ) parser.add_argument( '--extraMetadata', dest='extraMetadata', type=json_parser, default={}, help='Json with data to annotate all features. It can be used ' 'to append a set of system identifiers to the metadata feature ' 'and if the --extraMetadataForAll' ) parser.add_argument( '--avoidSetns', dest='avoid_setns', action='store_true', default=False, help='Avoids the use of the setns() syscall to crawl containers. ' 'Some features like process will not work with this option. ' 'Only applies to the OUTCONTAINER mode' ) args = parser.parse_args() misc.setup_logger('crawlutils', args.logfile) misc.setup_logger('yapsy', 'yapsy.log') options = args.options options['avoid_setns'] = args.avoid_setns options['mountpoint'] = args.mountpoint emitters = EmittersManager(urls=args.url, format=args.format, compress=args.compress, extra_metadata=args.extraMetadata, plugin_places=args.plugin_places) if args.crawlmode == 'OUTCONTAINER': crawler = ContainersCrawler( features=args.features, environment=args.environment, user_list=args.crawlContainers, host_namespace=args.namespace, plugin_places=args.plugin_places, options=options) elif args.crawlmode == 'INVM' or args.crawlmode == 'MOUNTPOINT': crawler = HostCrawler( features=args.features, namespace=args.namespace, plugin_places=args.plugin_places, options=options) elif args.crawlmode == 'OUTVM': crawler = VirtualMachinesCrawler( features=args.features, user_list=args.vm_descs_list, host_namespace=args.namespace, plugin_places=args.plugin_places, options=options) else: raise NotImplementedError('Invalid crawlmode') worker = Worker(emitters=emitters, frequency=args.frequency, crawler=crawler) try: worker.run() except KeyboardInterrupt: pass