def processStatistics(self): stats_event = {"stats_type": "process_stats", "timestamp": time.time()} stats_event["worker_count"] = len(self.lumbermill.child_processes) + 1 stats_event["uptime"] = int(time.time() - self.psutil_processes[0].create_time()) self.logger.info(">> Process stats") self.logger.info("num workers: %d" % (len(self.lumbermill.child_processes)+1)) self.logger.info("started: %s" % datetime.datetime.fromtimestamp(self.psutil_processes[0].create_time()).strftime("%Y-%m-%d %H:%M:%S")) aggregated_metrics = defaultdict(int) for psutil_process in self.psutil_processes: stats_event["pid"] = psutil_process.pid for metric_name, metric_value in psutil_process.as_dict(self.process_statistics).iteritems(): # Call metric specific method if it exists. if "convertMetric_%s" % metric_name in self.methods: metric_name, metric_value = getattr(self, "convertMetric_%s" % self.action)(metric_name, metric_value) try: aggregated_metrics[metric_name] += metric_value except TypeError: try: metric_value = dict(metric_value.__dict__) except: pass try: stats_event[metric_name].append(metric_value) except KeyError: stats_event[metric_name] = [metric_value] self.logger.info("%s(pid: %s): %s" % (metric_name, psutil_process.pid, metric_value)) if self.emit_as_event: self.sendEvent(DictUtils.getDefaultEventDict(stats_event, caller_class_name="Statistics", event_type="statistic")) for agg_metric_name, agg_metric_value in aggregated_metrics.iteritems(): self.logger.info("%s: %s" % (agg_metric_name, agg_metric_value)) if self.emit_as_event: self.sendEvent(DictUtils.getDefaultEventDict(aggregated_metrics, caller_class_name="Statistics", event_type="statistic"))
def testMd5Hash(self): self.test_object.configure({'action': 'hash', 'source_fields': ['hash_me'], 'target_fields': ['hash_me_hashed']}) expected = DictUtils.getDefaultEventDict({'lumbermill': {'event_id': 1}, 'hash_me': 'Nobody inspects the spammish repetition', 'hash_me_hashed': 'bb649c83dd1ea5c9d9dec9a18df0ffe9'}) for event in self.test_object.handleEvent(DictUtils.getDefaultEventDict({'lumbermill': {'event_id': 1}, 'hash_me': 'Nobody inspects the spammish repetition'})): self.assertEqual(event, expected)
def testAnonymizeMd5(self): self.test_object.configure({'action': 'anonymize', 'source_fields': ['anon_me'], 'algorithm': 'md5'}) expected = DictUtils.getDefaultEventDict({'lumbermill': {'event_id': 1}, 'anon_me': 'bb649c83dd1ea5c9d9dec9a18df0ffe9'}) for event in self.test_object.handleEvent(DictUtils.getDefaultEventDict({'lumbermill': {'event_id': 1}, 'anon_me': 'Nobody inspects the spammish repetition'})): self.assertEqual(event, expected)
def testSha1Hash(self): self.test_object.configure({ 'action': 'hash', 'algorithm': 'sha1', 'source_fields': ['hash_me'], 'target_fields': ['hash_me_hashed'] }) expected = DictUtils.getDefaultEventDict({ 'lumbermill': { 'id': 1 }, 'hash_me': 'Nobody inspects the spammish repetition', 'hash_me_hashed': '531b07a0f5b66477a21742d2827176264f4bbfe2' }) for event in self.test_object.handleEvent( DictUtils.getDefaultEventDict({ 'lumbermill': { 'id': 1 }, 'hash_me': 'Nobody inspects the spammish repetition' })): self.assertEqual(event, expected)
def run(self): for found_file in self.files: if not os.path.isfile(found_file): self.logger.warning("File %s does not exist. Skipping." % found_file) continue with open(found_file, 'r') as data_file: if self.line_by_line: for line in data_file: self.sendEvent( DictUtils.getDefaultEventDict( dict={ "filename": found_file, "data": line }, caller_class_name=self.__class__.__name__)) else: self.sendEvent( DictUtils.getDefaultEventDict( dict={ "filename": found_file, "data": data_file.read() }, caller_class_name=self.__class__.__name__)) self.lumbermill.shutDown()
def testSha1Hash(self): self.test_object.configure({'action': 'hash', 'algorithm': 'sha1', 'source_fields': ['hash_me'], 'target_fields': ['hash_me_hashed']}) expected = DictUtils.getDefaultEventDict({'lumbermill': {'id': 1}, 'hash_me': 'Nobody inspects the spammish repetition', 'hash_me_hashed': '531b07a0f5b66477a21742d2827176264f4bbfe2'}) for event in self.test_object.handleEvent(DictUtils.getDefaultEventDict({'lumbermill': {'id': 1}, 'hash_me': 'Nobody inspects the spammish repetition'})): self.assertEqual(event, expected)
def extractFieldsFromResultDocument(self, fields, document): document = DictUtils.KeyDotNotationDict(document) new_document = DictUtils.KeyDotNotationDict() for field in fields: if field not in document: continue new_document[field] = document[field] return new_document
def extractFieldsFromResultDocumentWithMapping(self, field_mapping, document): document = DictUtils.KeyDotNotationDict(document) new_document = DictUtils.KeyDotNotationDict() for source_field, target_field in field_mapping.iteritems(): if source_field not in document: continue new_document[target_field] = document[source_field] return new_document
def testNewlineEndEvent(self): self.test_object.configure({'pattern': "\n$", 'pattern_marks': 'EndOfEvent'}) self.checkConfiguration() self.test_object.initAfterFork() event = DictUtils.getDefaultEventDict({'data': 'No newline.'}, received_from='TestMergeEvent_%s' % os.getpid()) self.test_object.receiveEvent(event) event = DictUtils.getDefaultEventDict({'data': "But now: \n"}, received_from='TestMergeEvent_%s' % os.getpid()) self.test_object.receiveEvent(event) time.sleep(1.5) events = [] for event in self.receiver.getEvent(): events.append(event) self.assertEquals(len(events), 1) self.assertEquals(events[0]['data'], 'No newline.But now: \n')
def testFacetValuesMustBeUnique(self): rc = RedisStore.RedisStore(mock.Mock()) rc.configure({'server': 'localhost'}) self.test_object.lumbermill.modules = { 'RedisStore': { 'instances': [rc] } } self.test_object.configure({ 'source_field': 'url', 'group_by': '$(remote_ip)', 'interval': 4, 'backend': 'RedisStore', 'backend_ttl': 10 }) self.checkConfiguration() self.test_object.initAfterFork() self.test_object.receiveEvent( DictUtils.getDefaultEventDict({ 'url': 'http://www.google.com', 'remote_ip': '127.0.0.1', 'user_agent': 'Eric' })) time.sleep(1) self.test_object.receiveEvent( DictUtils.getDefaultEventDict({ 'url': 'http://www.google.com', 'remote_ip': '127.0.0.1', 'user_agent': 'Eric' })) time.sleep(1) self.test_object.receiveEvent( DictUtils.getDefaultEventDict({ 'url': 'http://www.google.com', 'remote_ip': '127.0.0.1', 'user_agent': 'Eric' })) time.sleep(1) self.test_object.shutDown() events = [] for event in self.receiver.getEvent(): if event['lumbermill']['event_type'] != 'facet': continue events.append(event) self.assertEquals(len(events), 1) self.assertEquals(len(events[0]['facets']), 1) self.assertEquals(events[0]['facets'][0], 'http://www.google.com')
def handleBatchEvents(self): pipeline = self.client.pipeline() while self.alive: for _ in range(0, self.batch_size): pipeline.blpop(self.lists, timeout=self.timeout) try: events = pipeline.execute() except: exc_type, exc_value, exc_tb = sys.exc_info() self.logger.error( "Could not read data from redis list(s) %s. Exception: %s, Error: %s." % (self.lists, exc_type, exc_value)) continue for event in events: # If batch_size is bigger than events waiting in redis queue, the remaining entries will be filled with None values. # So break out if a None value is found. if not event: # Queue is exhausted. Sleep a bit and retry. time.sleep(.5) break event = DictUtils.getDefaultEventDict( dict={ "received_from": '%s' % event[0], "data": event[1] }, caller_class_name=self.__class__.__name__) self.sendEvent(event)
def setUp(self): event = { 'bytes_send': 3395, 'data': '192.168.2.20 - - [28/Jul/2006:10:27:10 -0300] "GET /wiki/Monty_Python/?spanish=inquisition HTTP/1.0" 200 3395\n', 'datetime': '28/Jul/2006:10:27:10 -0300', 'lumbermill': { 'event_id': '715bd321b1016a442bf046682722c78e', 'event_type': 'httpd_access_log', 'received_from': '127.0.0.1', 'source_module': 'StdIn', 'list': [10, 20, { 'hovercraft': 'eels' }] }, 'http_status': 200, 'identd': '-', 'remote_ip': '192.168.2.20', 'url': 'GET /wiki/Monty_Python/?spanish=inquisition HTTP/1.0', 'fields': ['nobody', 'expects', 'the'], 'params': { u'spanish': [u'inquisition'] }, 'user': '******' } self.event = DictUtils.getDefaultEventDict(event)
def testAddGeoInfoFromDefaultField(self): self.test_object.configure({'geoip_dat_path': './test_data/GeoLiteCity.dat', 'geo_info_fields': ['country_code']}) self.checkConfiguration() dict = DictUtils.getDefaultEventDict({'x_forwarded_for': '99.124.167.129'}) for event in self.test_object.handleEvent(dict): self.assertEqual(event['country_code'], 'US')
def testHttpsQuery(self): self.test_object.configure({'url': 'https://www.google.com'}) self.checkConfiguration() for event in self.test_object.handleEvent( DictUtils.getDefaultEventDict({'TreeNodeID': '1'})): self.assertTrue('gambolputty_http_request' in event and len(event['gambolputty_http_request']) > 0)
def testUnixSocket(self): try: os.remove('/tmp/test.sock') except OSError: pass self.assertFalse(os.path.exists('/tmp/test.sock')) self.test_object.configure({'path_to_socket': '/tmp/test.sock'}) self.checkConfiguration() self.test_object.start() self.startTornadoEventLoop() time.sleep(.1) self.assertTrue(os.path.exists('/tmp/test.sock')) unix_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) try: unix_socket.connect('/tmp/test.sock') except socket.errno: self.fail("Could not connect to unix socket.") for _ in range(0, 5000): unix_socket.send( b"http://en.wikipedia.org/wiki/Monty_Python/?gambol=putty\r\n") expected_ret_val = DictUtils.getDefaultEventDict({ 'data': "http://en.wikipedia.org/wiki/Monty_Python/?gambol=putty\r\n" }) expected_ret_val.pop('lumbermill') time.sleep(.5) event = False counter = 0 for event in self.receiver.getEvent(): counter += 1 self.assertTrue(event) self.assertEqual(counter, 5000) event.pop('lumbermill') self.assertDictEqual(event, expected_ret_val)
def run(self): while self.alive: packet = None try: pcap_header, packet = self.sniffer.next() except: pass if not packet: continue decoder = self.getPacketDecoder('eth') if not decoder: continue decoded_data = {'protocols': []} for decoded_packet in decoder.decodePacket(packet): packet_type = str(type(decoded_packet)) if packet_type == "<class 'impacket.ImpactPacket.Ethernet'>": self.parseEtherPacket(decoded_packet, decoded_data) elif packet_type == "<class 'impacket.ImpactPacket.IP'>": self.parseIPPacketEvent(decoded_packet, decoded_data) elif packet_type == "<class 'impacket.ImpactPacket.TCP'>": self.parseTCPPacketEvent(decoded_packet, decoded_data) elif packet_type == "<class 'impacket.ImpactPacket.Data'>": self.parseDataPacketEvent(decoded_packet, decoded_data) if decoded_data['data']: event = DictUtils.getDefaultEventDict(caller_class_name=self.__class__.__name__) if self.target_field: event[self.target_field] = decoded_data else: event.update(decoded_data) self.sendEvent(event)
def testZmqPull(self): ipaddr, port = self.getFreePortoOnLocalhost() self.test_object.configure({ 'address': '%s:%s' % (ipaddr, port), 'pattern': 'pull' }) self.checkConfiguration() self.test_object.start() message = 'A comfy chair is not an effective method of torture!' sender = self.getZmqSocket(ipaddr, port, 'push') self.assertTrue(sender is not None) for _ in range(0, 1000): sender.send(message) sender.close() expected_ret_val = DictUtils.getDefaultEventDict( {'data': 'A comfy chair is not an effective method of torture!'}) expected_ret_val.pop('lumbermill') event = False time.sleep(.1) counter = 0 for event in self.receiver.getEvent(): counter += 1 self.assertTrue(event is not False) event.pop('lumbermill') self.assertDictEqual(event, expected_ret_val) self.assertEqual(counter, 1000)
def testSQSSink(self): self.test_object.configure({'aws_access_key_id': os.environ['AWS_ID'], 'aws_secret_access_key': os.environ['AWS_KEY'], 'region': 'eu-west-1', 'queue': self.queue_name}) self.checkConfiguration() self.test_object.initAfterFork() # Send some messages to the test queue. for _ in range(0, 100): event = DictUtils.getDefaultEventDict({u'data': u"You get 'Gone with the Wind', 'Les Miserables' by Victor Hugo, " u"'The French Lieutenant's Woman' and with every third book you get dung."}) self.test_object.receiveEvent(event) self.test_object.shutDown() # Give messages some time to arrive. time.sleep(2) # Get messages from queue messages = [] for _ in range(0, 50): response = self.sqs_client.receive_message(QueueUrl=self.sqs_queue.url, MaxNumberOfMessages=10) if not 'Messages' in response: break for message in response['Messages']: messages.append(message) self.assertEqual(len(messages), 100) self.assertEqual(json.loads(messages[0]['Body'])['data'], event['data'])
def testAddDateTimeCustomFormat(self): self.test_object.configure({'format': '%Y/%M/%d %H.%M.%S'}) for event in self.test_object.handleEvent( DictUtils.getDefaultEventDict({})): self.assert_( re.match('^\d+/\d+/\d+ \d+.\d+.\d+$', event['@timestamp'])) # 2013/08/29 10.25.26
def testUnixSocket(self): self.test_object.configure({'path_to_socket': '/tmp/test.sock'}) raise unittest.SkipTest('Skipping test because UnixSocket input is currently broken.') try: os.remove('/tmp/test.sock') except OSError: pass self.assertFalse(os.path.exists('/tmp/test.sock')) self.checkConfiguration() self.test_object.start() self.startTornadoEventLoop() time.sleep(.1) self.assertTrue(os.path.exists('/tmp/test.sock')) unix_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) try: unix_socket.connect('/tmp/test.sock') except socket.errno: self.fail("Could not connect to unix socket.") for _ in range(0,5000): unix_socket.send(b"http://en.wikipedia.org/wiki/Monty_Python/?gambol=putty\r\n") expected_ret_val = DictUtils.getDefaultEventDict({'data': "http://en.wikipedia.org/wiki/Monty_Python/?gambol=putty\r\n"}) expected_ret_val.pop('lumbermill') time.sleep(.5) event = False counter = 0 for event in self.receiver.getEvent(): counter += 1 self.assertTrue(event) self.assertEqual(counter, 5000) event.pop('lumbermill') self.assertDictEqual(event, expected_ret_val)
def requeueEvents(self): input_modules = {} for module_name, module_info in self.lumbermill.modules.items(): instance = module_info['instances'][0] if instance.module_type == "input": input_modules[instance.__class__.__name__] = instance self.logger.warning("Found unfinished events. Requeing...") for key in self.persistence_backend.iterKeys(): if not key.startswith("%s" % self.key_prefix): continue requeue_counter = 0 event = self.persistence_backend.pop(key) if not event: continue if "source_module" not in event.get("lumbermill", {}): self.logger.warning( "Could not requeue event. Source module info not found in event data." ) continue source_module = event["lumbermill"]["source_module"] if source_module not in input_modules: self.logger.error( "Could not requeue event. Module %s not found." % (source_module)) continue requeue_counter += 1 input_modules[source_module].sendEvent( DictUtils.KeyDotNotationDict(event)) self.logger.warning("Done. Requeued %s events." % (requeue_counter)) self.logger.warning( "Note: If more than one gp instance is running, requeued events count may differ from total events." )
def __testStorageTTL(self): """ Does not seem to be testable without waiting for at least 60 seconds. That seems to be the smallest interval the purger thread is running, no matter what I set ttl.interval to. The documentation @http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-indices.html#indices-ttl does not say anything about a lower limit but testing leads me to the assumption that 60s is the lowest limit. """ self.test_object.configure({'nodes': [self.es_server], 'index_name': self.test_index_name, 'ttl': 100, 'sniff_on_start': False, 'store_interval_in_secs': 1}) self.checkConfiguration() self.test_object.initAfterFork() # Enable ttl mapping. self.es.indices.close(index=self.test_index_name) self.es.indices.put_settings(index=self.test_index_name, body='{"ttl": {"interval" : "1s"}}') self.es.indices.open(index=self.test_index_name) self.es.indices.put_mapping(index=self.test_index_name, doc_type='Unknown', body='{"_ttl" : { "enabled" : true }}') event = DictUtils.getDefaultEventDict({'McTeagle': "But it was with more simple, homespun verses that McTeagle's unique style first flowered."}) doc_id = event['lumbermill']['event_id'] self.test_object.receiveEvent(event) self.test_object.shutDown() try: result = self.es.get(index=self.test_index_name, doc_type='Unknown', id=doc_id) except elasticsearch.NotFoundError: self.fail("Document was not found.") self.assertEqual(type(result), dict) self.assertDictContainsSubset(event, result['_source']) time.sleep(2) try: result = self.es.get(index=self.test_index_name, doc_type='Unknown', id=doc_id) self.fail("Document was not deleted after ttl.") except elasticsearch.NotFoundError: pass
def test(self): self.test_object.configure({}) self.checkConfiguration() self.test_object.start() # Give server process time to startup. time.sleep(.1) s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.settimeout(1) for _ in range(0, 100): s.sendto( "Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever.", ('127.0.0.1', self.test_object.getConfigurationValue('port'))) s.close() expected_ret_val = DictUtils.getDefaultEventDict({ 'data': "Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever." }) expected_ret_val.pop('lumbermill') event = False time.sleep(2) counter = 0 for event in self.receiver.getEvent(): counter += 1 self.assertTrue(event != False) self.assertEqual(counter, 100) event.pop('lumbermill') self.assertDictEqual(event, expected_ret_val)
def run(self): while self.alive: for kafka_event in self.consumer: event = DictUtils.getDefaultEventDict(dict={"topic": kafka_event.topic, "data": kafka_event.value}, caller_class_name=self.__class__.__name__) self.sendEvent(event) if(self.auto_commit_enable): self.consumer.task_done(kafka_event)
def testCustomDocId(self): self.test_object.configure({ 'host': self.mongodb_server, 'doc_id': '$(event_doc_id)', 'optinonal_connection_params': { 'serverSelectionTimeoutMS': 1 } }) self.checkConfiguration() self.test_object.initAfterFork() timestring = datetime.datetime.utcnow().strftime('%Y.%m.%d') collection_name = 'lumbermill-%s' % timestring database_name = self.test_object.getConfigurationValue('database') event = DictUtils.getDefaultEventDict({ 'McTeagle': "But it was with more simple, homespun verses that McTeagle's unique style first flowered.", 'event_doc_id': 'Ewan' }) self.test_object.receiveEvent(event) self.test_object.shutDown() result = self.mongodb[database_name][collection_name].find_one( {'_id': event['event_doc_id']}) self.assertEqual(type(result), dict) self.assertEqual( result['McTeagle'], "But it was with more simple, homespun verses that McTeagle's unique style first flowered." ) self.mongodb.drop_database(database_name)
def handleFileChange(self, callback_data): while True: try: line = callback_data['lines'].popleft() except IndexError: break self.sendEvent(DictUtils.getDefaultEventDict(dict={"filename": callback_data['filename'], "data": line}, caller_class_name=self.__class__.__name__))
def testCustomDatabaseAndCustomCollection(self): self.test_object.configure({ 'host': self.mongodb_server, 'database': 'my_test_database', 'collection': 'lumbermill-$(target_collection)', 'optinonal_connection_params': { 'serverSelectionTimeoutMS': 1 } }) self.checkConfiguration() self.test_object.initAfterFork() collection_name = 'lumbermill-mcteagles' database_name = self.test_object.getConfigurationValue('database') event = DictUtils.getDefaultEventDict({ 'McTeagle': "But it was with more simple, homespun verses that McTeagle's unique style first flowered.", 'target_collection': 'mcteagles' }) self.test_object.receiveEvent(event) self.test_object.shutDown() result = self.mongodb[database_name][collection_name].find_one( {'_id': event['lumbermill']['event_id']}) self.assertEqual(type(result), dict) self.assertEqual( result['McTeagle'], "But it was with more simple, homespun verses that McTeagle's unique style first flowered." ) self.mongodb.drop_database(database_name)
def testUserAgentTargetField(self): self.test_object.configure({'source_fields': 'user_agent', 'target_field': 'http_user_agent_data'}) self.checkConfiguration() event = DictUtils.getDefaultEventDict({'user_agent': "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"}) for event in self.test_object.handleEvent(event): self.assert_('http_user_agent_data' in event and event['http_user_agent_data']['device']['family'] == "Spider")
def _on_read_line(self, data): self.gp_module.sendEvent( DictUtils.getDefaultEventDict({"data": data}, caller_class_name='UnixSocket', received_from=self.address)) if not self.stream.closed(): self.stream.read_until_regex(b'\r?\n', self._on_read_line)
def __testStorageTTL(self): """ Does not seem to be testable without waiting for at least 60 seconds. That seems to be the smallest interval the purger thread is running, no matter what I set ttl.interval to. The documentation @http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/modules-indices.html#indices-ttl does not say anything about a lower limit but testing leads me to the assumption that 60s is the lowest limit. """ self.test_object.configure({'index_name': self.test_index_name, 'nodes': [self.es_server], 'ttl': 100, 'sniff_on_start': False, 'store_interval_in_secs': 1}) self.checkConfiguration() self.test_object.initAfterFork() # Enable ttl mapping. self.es.indices.close(index=self.test_index_name) self.es.indices.put_settings(index=self.test_index_name, body='{"ttl": {"interval" : "1s"}}') self.es.indices.open(index=self.test_index_name) self.es.indices.put_mapping(index=self.test_index_name, doc_type='Unknown', body='{"_ttl" : { "enabled" : true }}') event = DictUtils.getDefaultEventDict({'McTeagle': "But it was with more simple, homespun verses that McTeagle's unique style first flowered."}) doc_id = event['lumbermill']['event_id'] self.test_object.receiveEvent(event) self.test_object.shutDown() try: result = self.es.get(index=self.test_index_name, id=doc_id) except elasticsearch.NotFoundError: self.fail("Document was not found.") self.assertEqual(type(result), dict) self.assertDictContainsSubset(event, result['_source']) time.sleep(2) try: result = self.es.get(index=self.test_index_name, id=doc_id) self.fail("Document was not deleted after ttl.") except elasticsearch.NotFoundError: pass
def TestATcpConnection(self): print("testTcpConnection") self.test_object.configure({'port': 5353, 'simple_separator': '\n'}) self.checkConfiguration() self.test_object.initAfterFork() self.startTornadoEventLoop() # Give server process time to startup. time.sleep(.1) try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(1) s.connect(('localhost', self.test_object.getConfigurationValue('port'))) for _ in range(0, 1500): s.sendall("Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever.\n") s.shutdown(socket.SHUT_RDWR) s.close() connection_succeeded = True except: etype, evalue, etb = sys.exc_info() print "Could not connect to %s:%s. Exception: %s, Error: %s" % ('localhost', self.test_object.getConfigurationValue("port"), etype, evalue) connection_succeeded = False self.assertTrue(connection_succeeded) expected_ret_val = DictUtils.getDefaultEventDict({'data': "Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever."}) expected_ret_val.pop('lumbermill') event = False time.sleep(2) counter = 0 for event in self.receiver.getEvent(): counter += 1 self.assertTrue(event is not False) self.assertEqual(counter, 1500) event.pop('lumbermill') self.assertDictEqual(event, expected_ret_val) self.tearDown()
def testSelectedFields(self): self.test_object.configure({'nodes': [self.es_server], 'fields': ['sheep'], 'doc_id': '$(id)', 'doc_type': '$(type)', 'batch_size': 1}) self.checkConfiguration() self.test_object.initAfterFork() timestring = datetime.datetime.utcnow().strftime('%Y.%m.%d') index_name = 'lumbermill-%s' % timestring try: self.es.indices.delete(index=index_name, ignore=[400, 404]) except: pass self.es.indices.create(index=index_name) event = DictUtils.getDefaultEventDict({'McTeagle': "But it was with more simple, homespun verses that McTeagle's unique style first flowered.", 'sheep': {'flying': 'scotsman', 'id': '12345', 'type': 'pirate'}}) doc_id = event['sheep.id'] self.test_object.receiveEvent(event) self.test_object.shutDown() time.sleep(1) try: result = self.es.get(index=index_name, doc_type='pirate', id=doc_id) except elasticsearch.exceptions.NotFoundError, e: self.fail(e)
def onReceive(self, data): data = data[0] if self.separator: topic, data = data.split(self.separator) event = DictUtils.getDefaultEventDict({"data": data}, caller_class_name="ZmqTornado") self.sendEvent(event)
def testAddGeoInfoFromListField(self): self.test_object.configure({'geo_info_fields': ['country_code'], 'source_fields': ['x_forwarded_for']}) self.checkConfiguration() dict = DictUtils.getDefaultEventDict({'x_forwarded_for': ['99.124.167.129']}) for event in self.test_object.handleEvent(dict): self.assertEqual(event['geo_info']['country_code'], 'US')
def testTcpConnection(self): self.test_object.configure({}) self.checkConfiguration() self.test_object.initAfterFork() self.startTornadoEventLoop() # Give server process time to startup. time.sleep(.1) try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(1) s.connect(('localhost', self.test_object.getConfigurationValue('port'))) for _ in range(0, 1500): s.sendall("Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever.\n") s.close() connection_succeeded = True except: etype, evalue, etb = sys.exc_info() print "Could not connect to %s:%s. Exception: %s, Error: %s" % ( 'localhost', self.test_object.getConfigurationValue("port"), etype, evalue) connection_succeeded = False self.assertTrue(connection_succeeded) expected_ret_val = DictUtils.getDefaultEventDict({'data': "Beethoven, Mozart, Chopin, Liszt, Brahms, Panties...I'm sorry...Schumann, Schubert, Mendelssohn and Bach. Names that will live for ever."}) expected_ret_val.pop('lumbermill') event = False time.sleep(2) counter = 0 for event in self.receiver.getEvent(): counter += 1 self.assertTrue(event != False) self.assertEqual(counter, 1500) event.pop('lumbermill') self.assertDictEqual(event, expected_ret_val)
def testNoop(self): self.test_object.configure() self.checkConfiguration() event = DictUtils.getDefaultEventDict({}) event_received = None for event_received in self.test_object.handleEvent(event): self.assertEquals(event, event_received) self.assertIsNotNone(event_received)
def testTarpit(self): self.test_object.configure({'delay': 1}) self.checkConfiguration() before = time.time() self.test_object.handleEvent(DictUtils.getDefaultEventDict({})) for event in self.receiver.getEvent(): after = time.time() self.assertEquals(1, int(after - before))
def testDefaultValues(self): self.test_object.configure({'field_extraction_patterns': [{'http_access_log': '(?P<remote_ip>\d+\.\d+\.\d+\.\d+)\s+(?P<identd>\w+|-)\s+(?P<user>\w+|-)\s+\[(?P<datetime>\d+\/\w+\/\d+:\d+:\d+:\d+\s.\d+)\]\s+\"(?P<url>.*)\"\s+(?P<http_status>\d+)\s+(?P<bytes_send>\d+)'}]}) self.checkConfiguration() data = DictUtils.getDefaultEventDict({'data': self.raw_data}) event = None for event in self.test_object.handleEvent(data): self.assert_('bytes_send' in event and event['bytes_send'] == '3395') self.assertIsNotNone(event)
def test(self): self.test_object.configure({}) self.checkConfiguration() self.test_object.receiveEvent(DictUtils.getDefaultEventDict({'/dev/null': '"Spam! Spam! Spam! Lovely Spam! Spam! Spam!"'})) got_event = False for event in self.receiver.getEvent(): got_event = True self.assertFalse(got_event)
def eventsInQueuesStatistics(self): if len(self.module_queues) == 0: return self.logger.info(">> Queue stats") for module_name, queue in sorted(self.module_queues.items()): self.logger.info("Events in %s queue: %s%s%s" % (module_name, AnsiColors.YELLOW, queue.qsize(), AnsiColors.ENDC)) if self.emit_as_event: self.sendEvent(DictUtils.getDefaultEventDict({"queue_count": queue.qsize(), "field_name": "queue_counts", "interval": self.interval }, caller_class_name="Statistics", event_type="statistic"))
def testHandleEvent(self): self.test_object.configure({'source_field': 'uri'}) self.checkConfiguration() data = DictUtils.getDefaultEventDict( {'uri': 'http://en.wikipedia.org/wiki/Monty_Python/?gambol=putty'}) for event in self.test_object.handleEvent(data): self.assert_('uri' in event and event['uri']['query'] == 'gambol=putty')
def testAddGeoInfo(self): self.test_object.configure({'source_fields': ['f1', 'f2'], 'target_field': 'geoip', 'geo_info_fields': ['country_code']}) self.checkConfiguration() dict = DictUtils.getDefaultEventDict({'f2': '99.124.167.129'}) for event in self.test_object.handleEvent(dict): self.assertEqual(event['geoip']['country_code'], 'US')
def testDynamicQueryTargetField(self): self.test_object.configure({'url': '$(schema)://$(host)', 'target_field': 'Johann Gambolputty'}) self.checkConfiguration() data_dict = DictUtils.getDefaultEventDict({'schema': 'http', 'host': 'www.google.com'}) for event in self.test_object.handleEvent(data_dict): self.assertTrue('Johann Gambolputty' in event and len(event['Johann Gambolputty']) > 0)
def sendFacetEventToReceivers(self, facet_data): event = DictUtils.getDefaultEventDict({'facet_field': self.source_field, 'facet_count': len(facet_data['facets']), 'facets': facet_data['facets']}, caller_class_name=self.__class__.__name__, event_type='facet') event['other_event_fields'] = facet_data['other_event_fields'] self.sendEvent(event)
def testDecodeOfNestedSourceField(self): self.test_object.configure({'source_fields': ['swallow.json_data']}) self.checkConfiguration() data = DictUtils.getDefaultEventDict({'swallow': {'json_data': '{"South African": "Fast", "unladen": "swallow"}'}}) event = None for event in self.test_object.handleEvent(data): self.assertTrue('South African' in event and event['South African'] == "Fast") self.assertIsNotNone(event)
def testIsTimeStamp(self): self.test_object.configure({}) self.checkConfiguration() for event in self.test_object.handleEvent( DictUtils.getDefaultEventDict({})): self.assert_( re.match('^\d+-\d+-\d+T\d+:\d+:\d+$', event['@timestamp'])) # 2013-08-29T10:25:26
def testBase64Decode(self): config = {'action': 'decode'} self.test_object.configure(config) self.checkConfiguration() payload = 'SSBjdXQgZG93biB0cmVlcywgSSBza2lwIGFuZCBqdW1wLCBJIGxpa2UgdG8gcHJlc3Mgd2lsZCBmbG93ZXJzLiBJIHB1dCBvbiB3b21lbidzIGNsb3RoaW5nIGFuZCBoYW5nIGFyb3VuZCBpbiBiYXJzLg==' data = DictUtils.getDefaultEventDict({'data': payload}) for event in self.test_object.handleEvent(data): self.assertTrue(event['data'] == "I cut down trees, I skip and jump, I like to press wild flowers. I put on women's clothing and hang around in bars.")
def run(self): counter = 0 while self.alive: for event_data in self.events: if isinstance(event_data, str): event = DictUtils.getDefaultEventDict({'data': event_data}, caller_class_name=self.__class__.__name__) elif isinstance(event_data, dict): event = DictUtils.getDefaultEventDict(event_data, caller_class_name=self.__class__.__name__) # self.getConfigurationValue("event") self.sendEvent(event) if self.sleep > 0: time.sleep(self.sleep) if self.max_events_count == 0: continue counter += 1 if (counter - self.max_events_count == 0): time.sleep(2) self.alive = False self.lumbermill.shutDown()
def testAddGeoInfo(self): self.test_object.configure({'source_fields': ['f1','f2'], 'geoip_dat_path': './test_data/GeoLiteCity.dat', 'target_field': 'geoip', 'geo_info_fields': ['country_code']}) self.checkConfiguration() dict = DictUtils.getDefaultEventDict({'f2': '99.124.167.129'}) for event in self.test_object.handleEvent(dict): self.assertEqual(event['geoip']['country_code'], 'US')
def testTarpit(self): self.test_object.configure({'delay': 1}) self.checkConfiguration() before = time.time() event = None for event in self.test_object.handleEvent(DictUtils.getDefaultEventDict({})): after = time.time() self.assertEquals(1, int(after - before)) self.assertIsNotNone(event)
def testGetMetaData(self): self.test_object.configure({'url': 'http://www.google.com', 'get_metadata': True}) self.checkConfiguration() event = None for event in self.test_object.handleEvent(DictUtils.getDefaultEventDict({})): self.assertTrue('http_request_result' in event and len(event['http_request_result']) > 0) self.assertIsNotNone(event) self.assertTrue(len(event['http_request_result']['headers']) > 0)
def receiveRateStatistics(self): self.logger.info(">> Receive rate stats") events_received = MultiProcessStatisticCollector().getCounter('events_received') if not events_received: events_received = 0 MultiProcessStatisticCollector().resetCounter('events_received') self.logger.info("Received events in %ss: %s%s (%s/eps)%s" % (self.getConfigurationValue('interval'), AnsiColors.YELLOW, events_received, (events_received/self.interval), AnsiColors.ENDC)) if self.emit_as_event: self.sendEvent(DictUtils.getDefaultEventDict({"total_count": events_received, "count_per_sec": (events_received/self.interval), "field_name": "all_events", "interval": self.interval }, caller_class_name="Statistics", event_type="statistic"))
def testDateTimeParser(self): config = {'source_field': 'date', 'source_date_pattern': '%d/%b/%Y', 'target_date_pattern': '%d-%b-%Y'} self.test_object.configure(config) self.checkConfiguration() data = DictUtils.getDefaultEventDict({'date': '13/Sep/2017'}) for event in self.test_object.handleEvent(data): self.assertTrue(event['date'] == "13-Sep-2017")
def handleEvent(self, event): if event[0] != 'message': return yield DictUtils.getDefaultEventDict( dict={ "received_from": '%s' % event[1], "data": event[2] }, caller_class_name=self.__class__.__name__)
def testQueryTargetField(self): self.test_object.configure({ 'url': 'http://www.google.com', 'target_field': 'Johann Gambolputty' }) self.checkConfiguration() for event in self.test_object.handleEvent( DictUtils.getDefaultEventDict({'TreeNodeID': '1'})): self.assertTrue('Johann Gambolputty' in event and len(event['Johann Gambolputty']) > 0)
def testDecodeLineMode(self): self.test_object.configure({'mode': 'line'}) self.checkConfiguration() data = {'spam': 'spam' * 16384} msg_packed_data = msgpack.packb(data) dict = DictUtils.getDefaultEventDict({'data': msg_packed_data}) event = None for event in self.test_object.handleEvent(dict): self.assertEquals(event['spam'], data['spam']) self.assertIsNotNone(event)
def printIntervalStatistics(self): last_field_name = None field_counts = {} total_count = 0 for field_name_value, field_count in sorted( self.stats_collector.getAllCounters().items()): if not isinstance(field_name_value, tuple): continue field_name, field_value = field_name_value if field_name not in self.fields: continue self.stats_collector.resetCounter(field_name_value) if not last_field_name: last_field_name = field_name if field_name != last_field_name: self.sendEvent( DictUtils.getDefaultEventDict( { "total_count": total_count, "field_name": last_field_name, "field_counts": field_counts, "interval": self.interval }, caller_class_name="Statistics", event_type="statistic")) last_field_name = field_name field_counts = {} total_count = 0 field_counts.update({field_value: field_count}) total_count += field_count # Send remaining. if last_field_name: self.sendEvent( DictUtils.getDefaultEventDict( { "total_count": total_count, "field_name": field_name, "field_counts": field_counts, "interval": self.interval }, caller_class_name="Statistics", event_type="statistic"))
def testDefaultMappedSyslogPrivalFields(self): config = {'source_field': 'syslog_prival', 'map_values': True} self.test_object.configure(config) self.checkConfiguration() data = DictUtils.getDefaultEventDict({'syslog_prival': '13', 'data': 'This is an ex parrot!'}) event = False for event in self.test_object.handleEvent(data): self.assertTrue('syslog_severity' in event and event['syslog_severity'] == "Notice" ) self.assertTrue('syslog_facility' in event and event['syslog_facility'] == "user-level" ) self.assertTrue(event != False)
def testUserAgentSingleSourceField(self): self.test_object.configure({'source_fields': 'user_agent'}) self.checkConfiguration() event = DictUtils.getDefaultEventDict({ 'user_agent': "Mozilla/5.0 (Windows NT 6.0; rv:33.0) Gecko/20100101 Firefox/33.0" }) for event in self.test_object.handleEvent(event): self.assert_('user_agent_info' in event and event['user_agent_info']['user_agent']['family'] == "Firefox")
def testBase64Decode(self): config = {'action': 'decode'} self.test_object.configure(config) self.checkConfiguration() payload = 'SSBjdXQgZG93biB0cmVlcywgSSBza2lwIGFuZCBqdW1wLCBJIGxpa2UgdG8gcHJlc3Mgd2lsZCBmbG93ZXJzLiBJIHB1dCBvbiB3b21lbidzIGNsb3RoaW5nIGFuZCBoYW5nIGFyb3VuZCBpbiBiYXJzLg==' data = DictUtils.getDefaultEventDict({'data': payload}) for event in self.test_object.handleEvent(data): self.assertTrue( event['data'] == "I cut down trees, I skip and jump, I like to press wild flowers. I put on women's clothing and hang around in bars." )