예제 #1
0
class RedisListSink(BaseThreadedModule):
    """
    Send events to a redis lists.

    list: Name of redis list to send data to.
    server: Redis server to connect to.
    port: Port redis server is listening on.
    db: Redis db.
    password: Redis password.
    format: Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'. If not set the whole event dict is send.
    store_interval_in_secs: Send data to redis in x seconds intervals.
    batch_size: Send data to redis if event count is above, even if store_interval_in_secs is not reached.
    backlog_size: Maximum count of events waiting for transmission. Events above count will be dropped.

    Configuration template:

    - RedisListSink:
       list:                            # <type: String; is: required>
       server:                          # <default: 'localhost'; type: string; is: optional>
       port:                            # <default: 6379; type: integer; is: optional>
       db:                              # <default: 0; type: integer; is: optional>
       password:                        # <default: None; type: None||string; is: optional>
       format:                          # <default: None; type: None||string; is: optional>
       store_interval_in_secs:          # <default: 5; type: integer; is: optional>
       batch_size:                      # <default: 500; type: integer; is: optional>
       backlog_size:                    # <default: 500; type: integer; is: optional>
    """

    module_type = "output"
    """Set module type"""
    can_run_forked = True

    def configure(self, configuration):
        # Call parent configure method
        BaseThreadedModule.configure(self, configuration)
        self.format = self.getConfigurationValue('format')
        self.list = self.getConfigurationValue('list')
        self.client = redis.StrictRedis(
            host=self.getConfigurationValue('server'),
            port=self.getConfigurationValue('port'),
            password=self.getConfigurationValue('password'),
            db=self.getConfigurationValue('db'))
        try:
            self.client.ping()
        except:
            etype, evalue, etb = sys.exc_info()
            self.logger.error(
                "Could not connect to redis store at %s. Exception: %s, Error: %s."
                % (self.getConfigurationValue('server'), etype, evalue))
            self.lumbermill.shutDown()

    def getStartMessage(self):
        return "[%s] on %s:%s. Max buffer size: %d" % (
            self.list, self.getConfigurationValue('server'),
            self.getConfigurationValue('port'),
            self.getConfigurationValue('backlog_size'))

    def initAfterFork(self):
        BaseThreadedModule.initAfterFork(self)
        self.buffer = Buffer(
            self.getConfigurationValue('batch_size'),
            self.storeData,
            self.getConfigurationValue('store_interval_in_secs'),
            maxsize=self.getConfigurationValue('backlog_size'))

    def storeData(self, buffered_data):
        try:
            self.client.rpush(self.list, *buffered_data)
            return True
        except:
            exc_type, exc_value, exc_tb = sys.exc_info()
            self.logger.error(
                "Could not add event to redis list %s. Exception: %s, Error: %s."
                % (self.list, exc_type, exc_value))
            return False

    def handleEvent(self, event):
        if self.format:
            publish_data = mapDynamicValue(self.format, event)
        else:
            publish_data = event
        self.buffer.append(publish_data)
        yield None

    def shutDown(self):
        try:
            self.buffer.flush()
        except:
            pass
        BaseThreadedModule.shutDown(self)
예제 #2
0
class ZabbixSink(BaseThreadedModule):
    """
    Send events to zabbix.

    hostname: Hostname for which the metrics should be stored.
    fields: Event fields to send.
    field_prefix: Prefix to prepend to field names. For e.g. cpu_count field with default lumbermill_ prefix, the Zabbix key is lumbermill_cpu_count.
    timestamp_field: Field to provide timestamp. If not provided, current timestamp is used.
    agent_conf: Path to zabbix_agent configuration file. If set to True defaults to /etc/zabbix/zabbix_agentd.conf.
    server: Address of zabbix server. If port differs from default it can be set by appending it, e.g. 127.0.0.1:10052.
    store_interval_in_secs: sending data to es in x seconds intervals.
    batch_size: sending data to es if event count is above, even if store_interval_in_secs is not reached.
    backlog_size: maximum count of events waiting for transmission. Events above count will be dropped.

    Configuration template:

    - ZabbixSink:
       hostname:                        # <type: string; is: required>
       fields:                          # <type: list; is: required>
       field_prefix:                    # <default: "lumbermill_"; type: string; is: optional>
       timestamp_field:                 # <default: "timestamp"; type: string; is: optional>
       agent_conf:                      # <default: True; type: boolean||string; is: optional>
       server:                          # <default: False; type: boolean||string; is: required if agent_conf is False else optional>
       store_interval_in_secs:          # <default: 10; type: integer; is: optional>
       batch_size:                      # <default: 500; type: integer; is: optional>
       backlog_size:                    # <default: 500; type: integer; is: optional>
    """

    module_type = "output"
    """Set module type"""

    def configure(self, configuration):
        BaseThreadedModule.configure(self, configuration)
        self.hostname = self.getConfigurationValue("hostname")
        self.fields = self.getConfigurationValue("fields")
        self.field_prefix = self.getConfigurationValue("field_prefix")
        self.timestamp_field = self.getConfigurationValue("timestamp_field")
        self.batch_size = self.getConfigurationValue('batch_size')
        self.backlog_size = self.getConfigurationValue('backlog_size')
        self.agent_conf = self.getConfigurationValue("agent_conf")
        if self.agent_conf:
            if self.agent_conf is True:
                self.agent_conf = "/etc/zabbix/zabbix_agentd.conf"
            if not os.path.isfile(self.agent_conf):
                self.logger.error("%s does not point to an existing file." % self.agent_conf)
                self.lumbermill.shutDown()
            self.zabbix_sender = ZabbixSender(use_config=self.agent_conf)

        else:
            self.logger.error("asdads")
            server = self.getConfigurationValue("server")
            port = 10051
            if ":" in self.server:
                server, port = self.server.split(":")
            self.zabbix_sender = ZabbixSender(zabbix_server=server, port=port)
        self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData,
                             self.getConfigurationValue('store_interval_in_secs'),
                             maxsize=self.getConfigurationValue('backlog_size'))

    def getStartMessage(self):
        if self.agent_conf:
            return "Config: %s. Max buffer size: %d" % (self.agent_conf, self.getConfigurationValue('backlog_size'))
        else:
            return "Server: %s. Max buffer size: %d" % (self.getConfigurationValue("server"), self.getConfigurationValue('backlog_size'))

    def initAfterFork(self):
        BaseThreadedModule.initAfterFork(self)
        self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData,
                             self.getConfigurationValue('store_interval_in_secs'),
                             maxsize=self.getConfigurationValue('backlog_size'))

    def handleEvent(self, event):
        self.buffer.append(event)
        yield None

    def storeData(self, events):
        packet = []
        for event in events:
            if self.timestamp_field:
                try:
                    timestamp = event[self.timestamp_field]
                except KeyError:
                    timestamp = None
            hostname = mapDynamicValue(self.hostname, mapping_dict=event, use_strftime=True)
            for field_name in self.fields:
                try:
                    packet.append(ZabbixMetric(hostname, "%s%s" % (self.field_prefix, field_name), event[field_name], timestamp))
                except KeyError:
                    pass
                    #self.logger.warning("Could not send metrics for %s:%s. Field not found." % (hostname, field_name))
        response = self.zabbix_sender.send(packet)
        if response.failed != 0:
            self.logger.warning("%d of %d metrics were not processed correctly." % (response.total-response.processed, response.total))

    def shutDown(self):
        self.buffer.flush()
예제 #3
0
class RedisListSink(BaseThreadedModule):
    """
    Send events to a redis lists.

    list: Name of redis list to send data to.
    server: Redis server to connect to.
    port: Port redis server is listening on.
    db: Redis db.
    password: Redis password.
    format: Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'. If not set the whole event dict is send.
    store_interval_in_secs: Send data to redis in x seconds intervals.
    batch_size: Send data to redis if event count is above, even if store_interval_in_secs is not reached.
    backlog_size: Maximum count of events waiting for transmission. Events above count will be dropped.

    Configuration template:

    - RedisListSink:
       list:                            # <type: String; is: required>
       server:                          # <default: 'localhost'; type: string; is: optional>
       port:                            # <default: 6379; type: integer; is: optional>
       db:                              # <default: 0; type: integer; is: optional>
       password:                        # <default: None; type: None||string; is: optional>
       format:                          # <default: None; type: None||string; is: optional>
       store_interval_in_secs:          # <default: 5; type: integer; is: optional>
       batch_size:                      # <default: 500; type: integer; is: optional>
       backlog_size:                    # <default: 500; type: integer; is: optional>
    """

    module_type = "output"
    """Set module type"""
    can_run_forked = True

    def configure(self, configuration):
         # Call parent configure method
        BaseThreadedModule.configure(self, configuration)
        self.format = self.getConfigurationValue('format')
        self.list = self.getConfigurationValue('list')
        self.client = redis.StrictRedis(host=self.getConfigurationValue('server'),
                                          port=self.getConfigurationValue('port'),
                                          password=self.getConfigurationValue('password'),
                                          db=self.getConfigurationValue('db'))
        try:
            self.client.ping()
        except:
            etype, evalue, etb = sys.exc_info()
            self.logger.error("Could not connect to redis store at %s. Exception: %s, Error: %s." % (self.getConfigurationValue('server'),etype, evalue))
            self.lumbermill.shutDown()

    def getStartMessage(self):
        return "publishing to %s:%s -> %s. Max buffer size: %d" % (self.getConfigurationValue('server'),
                                                                   self.getConfigurationValue('port'),
                                                                   self.list,
                                                                   self.getConfigurationValue('backlog_size'))


    def initAfterFork(self):
        BaseThreadedModule.initAfterFork(self)
        self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size'))

    def storeData(self, buffered_data):
        try:
            self.client.rpush(self.list, *buffered_data)
            return True
        except:
            exc_type, exc_value, exc_tb = sys.exc_info()
            self.logger.error("Could not add event to redis list %s. Exception: %s, Error: %s." % (self.list, exc_type, exc_value))
            return False

    def handleEvent(self, event):
        if self.format:
            publish_data = mapDynamicValue(self.format, event)
        else:
            publish_data = event
        self.buffer.append(publish_data)
        yield None

    def shutDown(self):
        try:
            self.buffer.flush()
        except:
            pass
        BaseThreadedModule.shutDown(self)
예제 #4
0
class ZmqSink(BaseThreadedModule):
    """
    Sends events to zeromq.

    server: Server to connect to. Pattern: hostname:port.
    pattern: Either push or pub.
    mode: Whether to run a server or client. If running as server, pool size is restricted to a single process.
    topic: The channels topic.
    hwm: Highwatermark for sending socket.
    format: Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'. If not set the whole event dict is send msgpacked.
    store_interval_in_secs: Send data to redis in x seconds intervals.
    batch_size: Send data to redis if event count is above, even if store_interval_in_secs is not reached.
    backlog_size: Maximum count of events waiting for transmission. Events above count will be dropped.

    Configuration template:

    - ZmqSink:
       server:                          # <default: 'localhost:5570'; type: string; is: optional>
       pattern:                         # <default: 'push'; type: string; values: ['push', 'pub']; is: optional>
       mode:                            # <default: 'connect'; type: string; values: ['connect', 'bind']; is: optional>
       topic:                           # <default: None; type: None||string; is: optional>
       hwm:                             # <default: None; type: None||integer; is: optional>
       format:                          # <default: None; type: None||string; is: optional>
       store_interval_in_secs:          # <default: 5; type: integer; is: optional>
       batch_size:                      # <default: 500; type: integer; is: optional>
       backlog_size:                    # <default: 500; type: integer; is: optional>
    """

    module_type = "input"
    """Set module type"""
    can_run_forked = True

    def configure(self, configuration):
         # Call parent configure method
        BaseThreadedModule.configure(self, configuration)
        self.server = None
        self.topic = self.getConfigurationValue('topic')
        self.format = self.getConfigurationValue('format')
        self.mode = self.getConfigurationValue('mode')
        if self.mode == "bind":
            self.can_run_forked = False

    def initZmqContext(self):
        self.zmq_context = zmq.Context()
        if self.getConfigurationValue('pattern') == 'push':
            self.client = self.zmq_context.socket(zmq.PUSH)
        else:
            self.client = self.zmq_context.socket(zmq.PUB)
        if self.getConfigurationValue('hwm'):
            try:
                self.client.setsockopt(zmq.SNDHWM, self.getConfigurationValue('hwm'))
            except:
                self.client.setsockopt(zmq.HWM, self.getConfigurationValue('hwm'))
        server_name, server_port = self.getConfigurationValue('server').split(":")
        try:
            server_addr = socket.gethostbyname(server_name)
        except socket.gaierror:
            server_addr = server_name
        try:
            if self.getConfigurationValue('mode') == 'connect':
                self.client.connect('tcp://%s:%s' % (server_addr, server_port))
            else:
                self.client.bind('tcp://%s:%s' % (server_addr, server_port))
        except:
            etype, evalue, etb = sys.exc_info()
            self.logger.error("Could not connect to zeromq at %s. Exception: %s, Error: %s." % (self.getConfigurationValue('server'), etype, evalue))
            self.lumbermill.shutDown()

    def getStartMessage(self):
        return "%s. Max buffer size: %d" % (self.server, self.getConfigurationValue('backlog_size'))

    def initAfterFork(self):
        BaseThreadedModule.initAfterFork(self)
        self.initZmqContext()
        self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size'))

    def storeData(self, buffered_data):
        try:
            for data in buffered_data:
                #print "Sending %s.\n" % data
                self.client.send("%s" % data)
            return True
        except zmq.error.ContextTerminated:
            pass
        except:
            exc_type, exc_value, exc_tb = sys.exc_info()
            if exc_value in ['Interrupted system call', 'Socket operation on non-socket']:
                return False
            self.logger.error("Could not add events to zmq. Exception: %s, Error: %s." % (exc_type, exc_value))
            return False

    def handleEvent(self, event):
        if self.format:
            publish_data = mapDynamicValue(self.format, event)
        else:
            publish_data = msgpack.packb(event)
        if self.topic:
             publish_data = "%s %s" % (self.topic, publish_data)
        self.buffer.append(publish_data)
        yield None

    def shutDown(self):
        try:
            self.buffer.flush()
        except:
            pass
        try:
            self.client.close()
            self.zmq_context.term()
        except AttributeError:
            pass
        # Call parent shutDown method.
        BaseThreadedModule.shutDown(self)
예제 #5
0
class ElasticSearchSink(BaseThreadedModule):
    """
    Store the data dictionary in an elasticsearch index.

    The elasticsearch module takes care of discovering all nodes of the elasticsearch cluster.
    Requests will the be loadbalanced via round robin.

    action:     Either index or update. If update be sure to provide the correct doc_id.
    format:     Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'.
                If not set the whole event dict is send.
    nodes:      Configures the elasticsearch nodes.
    read_timeout: Set number of seconds to wait until requests to elasticsearch will time out.
    connection_type:    One of: 'thrift', 'http'.
    http_auth:  'user:password'.
    use_ssl:    One of: True, False.
    index_name: Sets the index name. Timepatterns like %Y.%m.%d and dynamic values like $(bar) are allowed here.
    doc_id:     Sets the es document id for the committed event data.
    routing:    Sets a routing value (@see: http://www.elasticsearch.org/blog/customizing-your-document-routing/)
                Timepatterns like %Y.%m.%d are allowed here.
    ttl:        When set, documents will be automatically deleted after ttl expired.
                Can either set time in milliseconds or elasticsearch date format, e.g.: 1d, 15m etc.
                This feature needs to be enabled for the index.
                @See: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-ttl-field.html
    sniff_on_start: The client can be configured to inspect the cluster state to get a list of nodes upon startup.
                    Might cause problems on hosts with multiple interfaces. If connections fail, try to deactivate this.
    sniff_on_connection_fail: The client can be configured to inspect the cluster state to get a list of nodes upon failure.
                              Might cause problems on hosts with multiple interfaces. If connections fail, try to deactivate this.
    consistency:    One of: 'one', 'quorum', 'all'.
    store_interval_in_secs:     Send data to es in x seconds intervals.
    batch_size: Sending data to es if event count is above, even if store_interval_in_secs is not reached.
    backlog_size:   Maximum count of events waiting for transmission. If backlog size is exceeded no new events will be processed.

    Configuration template:

    - ElasticSearchSink:
       action:                          # <default: 'index'; type: string; is: optional; values: ['index', 'update']>
       format:                          # <default: None; type: None||string; is: optional>
       nodes:                           # <type: string||list; is: required>
       read_timeout:                    # <default: 10; type: integer; is: optional>
       connection_type:                 # <default: 'urllib3'; type: string; values: ['urllib3', 'requests']; is: optional>
       http_auth:                       # <default: None; type: None||string; is: optional>
       use_ssl:                         # <default: False; type: boolean; is: optional>
       index_name:                      # <default: 'lumbermill-%Y.%m.%d'; type: string; is: optional>
       doc_id:                          # <default: '$(lumbermill.event_id)'; type: string; is: optional>
       routing:                         # <default: None; type: None||string; is: optional>
       ttl:                             # <default: None; type: None||integer||string; is: optional>
       sniff_on_start:                  # <default: False; type: boolean; is: optional>
       sniff_on_connection_fail:        # <default: False; type: boolean; is: optional>
       consistency:                     # <default: 'quorum'; type: string; values: ['one', 'quorum', 'all']; is: optional>
       store_interval_in_secs:          # <default: 5; type: integer; is: optional>
       batch_size:                      # <default: 500; type: integer; is: optional>
       backlog_size:                    # <default: 500; type: integer; is: optional>
    """

    module_type = "output"
    """Set module type"""
    def configure(self, configuration):
        # Call parent configure method.
        BaseThreadedModule.configure(self, configuration)
        for module_name in ['elasticsearch', 'urllib3', 'requests']:
            if self.getConfigurationValue('log_level') == 'info':
                logging.getLogger(module_name).setLevel(logging.WARN)
            else:
                # Set log level for elasticsarch library if configured to other than default.
                logging.getLogger(module_name).setLevel(self.logger.level)
        self.action = self.getConfigurationValue('action')
        self.format = self.getConfigurationValue('format')
        self.consistency = self.getConfigurationValue("consistency")
        self.ttl = self.getConfigurationValue("ttl")
        self.index_name = self.getConfigurationValue("index_name")
        self.routing_pattern = self.getConfigurationValue("routing")
        self.doc_id_pattern = self.getConfigurationValue("doc_id")
        self.es_nodes = self.getConfigurationValue("nodes")
        self.read_timeout = self.getConfigurationValue("read_timeout")
        if not isinstance(self.es_nodes, list):
            self.es_nodes = [self.es_nodes]
        if self.getConfigurationValue("connection_type") == 'urllib3':
            self.connection_class = elasticsearch.connection.Urllib3HttpConnection
        elif self.getConfigurationValue("connection_type") == 'requests':
            self.connection_class = elasticsearch.connection.RequestsHttpConnection

    def getStartMessage(self):
        return "Idx: %s. Max buffer size: %d" % (
            self.index_name, self.getConfigurationValue('backlog_size'))

    def initAfterFork(self):
        BaseThreadedModule.initAfterFork(self)
        # Init es client after fork as mentioned in https://elasticsearch-py.readthedocs.org/en/master/
        self.es = self.connect()
        if not self.es:
            self.lumbermill.shutDown()
            return
        # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here.
        self.buffer = Buffer(
            self.getConfigurationValue('batch_size'),
            self.storeData,
            self.getConfigurationValue('store_interval_in_secs'),
            maxsize=self.getConfigurationValue('backlog_size'))

    def connect(self):
        es = False
        tries = 0
        while tries < 5 and not es:
            try:
                # Connect to es node and round-robin between them.
                self.logger.debug("Connecting to %s." % self.es_nodes)
                es = elasticsearch.Elasticsearch(
                    self.es_nodes,
                    connection_class=self.connection_class,
                    timeout=self.read_timeout,
                    sniff_on_start=self.getConfigurationValue(
                        'sniff_on_start'),
                    sniff_on_connection_fail=self.getConfigurationValue(
                        'sniff_on_connection_fail'),
                    sniff_timeout=5,
                    maxsize=20,
                    use_ssl=self.getConfigurationValue('use_ssl'),
                    http_auth=self.getConfigurationValue('http_auth'))
            except:
                etype, evalue, etb = sys.exc_info()
                self.logger.warning(
                    "Connection to %s failed. Exception: %s, Error: %s." %
                    (self.es_nodes, etype, evalue))
                self.logger.warning(
                    "Waiting %s seconds before retring to connect." %
                    ((4 + tries)))
                time.sleep(4 + tries)
                tries += 1
                continue
        if not es:
            self.logger.error("Connection to %s failed. Shutting down." %
                              self.es_nodes)
            self.lumbermill.shutDown()
        else:
            self.logger.debug("Connection to %s successful." % self.es_nodes)
        return es

    def handleEvent(self, event):
        if self.format:
            publish_data = self.getConfigurationValue('format', event)
        else:
            publish_data = event
        self.buffer.append(publish_data)
        yield None

    def dataToElasticSearchJson(self, events):
        """
        Format data for elasticsearch bulk update.
        """
        json_data = []
        for event in events:
            index_name = mapDynamicValueInString(self.index_name,
                                                 event,
                                                 use_strftime=True).lower()
            event_type = event['lumbermill'][
                'event_type'] if 'lumbermill' in event and 'event_type' in event[
                    'lumbermill'] else 'Unknown'
            doc_id = mapDynamicValue(self.doc_id_pattern, event)
            routing = mapDynamicValue(self.routing_pattern, use_strftime=True)
            if not doc_id:
                self.logger.error(
                    "Could not find doc_id %s for event %s." %
                    (self.getConfigurationValue("doc_id"), event))
                continue
            header = {
                self.action: {
                    '_index': index_name,
                    '_type': event_type,
                    '_id': doc_id
                }
            }
            if self.routing_pattern:
                header['index']['_routing'] = routing
            if self.ttl:
                header['index']['_ttl'] = self.ttl
            if self.action == 'update':
                event = {'doc': event}
            try:
                json_data.append("\n".join(
                    (json.dumps(header), json.dumps(event), "\n")))
            except UnicodeDecodeError:
                etype, evalue, etb = sys.exc_info()
                self.logger.error(
                    "Could not json encode %s. Exception: %s, Error: %s." %
                    (event, etype, evalue))
        json_data = "".join(json_data)
        return json_data

    def storeData(self, events):
        json_data = self.dataToElasticSearchJson(events)
        try:
            #started = time.time()
            # Bulk update of 500 events took 0.139621019363.
            self.es.bulk(body=json_data, consistency=self.consistency)
            #print("Bulk update of %s events took %s." % (len(events), time.time() - started))
            return True
        except elasticsearch.exceptions.ConnectionError:
            try:
                self.logger.warning(
                    "Lost connection to %s. Trying to reconnect." %
                    (self.es_nodes, index_name))
                self.es = self.connect()
            except:
                time.sleep(.5)
        except:
            etype, evalue, etb = sys.exc_info()
            self.logger.error(
                "Server communication error. Exception: %s, Error: %s." %
                (etype, evalue))
            self.logger.debug("Payload: %s" % json_data)
            if "Broken pipe" in evalue or "Connection reset by peer" in evalue:
                self.es = self.connect()

    def shutDown(self):
        try:
            self.buffer.flush()
        except:
            pass
        BaseThreadedModule.shutDown(self)
예제 #6
0
class SQSSink(BaseThreadedModule):
    """
    Send messages to amazon sqs service.

    aws_access_key_id: Your AWS id.
    aws_secret_access_key: Your AWS password.
    region: The region in which to find your sqs service.
    queue: Queue name.
    format: Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'.
            If not set event.data will be send es MessageBody, all other fields will be send as MessageAttributes.
    store_interval_in_secs: Send data to redis in x seconds intervals.
    batch_size: Number of messages to collect before starting to send messages to sqs. This refers to the internal
                receive buffer of this plugin. When the receive buffer is maxed out, this plugin will always send
                the maximum of 10 messages in one send_message_batch call.
    backlog_size: Maximum count of events waiting for transmission. Events above count will be dropped.

    values: ['us-east-1', 'us-west-1', 'us-west-2', 'eu-central-1', 'eu-west-1', 'ap-southeast-1', 'ap-southeast-2', 'ap-northeast-1', 'sa-east-1', 'us-gov-west-1', 'cn-north-1']

    Configuration template:

    - SQSSink:
       aws_access_key_id:               # <type: string; is: required>
       aws_secret_access_key:           # <type: string; is: required>
       region:                          # <type: string; is: required>
       queue:                           # <type: string; is: required>
       format:                          # <default: None; type: None||string; is: optional>
       store_interval_in_secs:          # <default: 5; type: integer; is: optional>
       batch_size:                      # <default: 500; type: integer; is: optional>
       backlog_size:                    # <default: 500; type: integer; is: optional>
       receivers:
        - NextModule
    """

    module_type = "output"
    """Set module type"""
    def configure(self, configuration):
        # Call parent configure method
        BaseThreadedModule.configure(self, configuration)
        # Set boto log level.
        logging.getLogger('boto3').setLevel(logging.CRITICAL)
        logging.getLogger('botocore').setLevel(logging.CRITICAL)
        self.batch_size = self.getConfigurationValue('batch_size')
        self.format = self.getConfigurationValue('format')

    def getStartMessage(self):
        return "Queue: %s [%s]. Max buffer size: %d" % (
            self.getConfigurationValue('queue'),
            self.getConfigurationValue('region'),
            self.getConfigurationValue('backlog_size'))

    def initAfterFork(self):
        BaseThreadedModule.initAfterFork(self)
        self.buffer = Buffer(
            self.getConfigurationValue('batch_size'),
            self.storeData,
            self.getConfigurationValue('store_interval_in_secs'),
            maxsize=self.getConfigurationValue('backlog_size'))
        try:
            self.sqs_resource = boto3.resource(
                'sqs',
                region_name=self.getConfigurationValue('region'),
                api_version=None,
                use_ssl=True,
                verify=None,
                endpoint_url=None,
                aws_access_key_id=self.getConfigurationValue(
                    'aws_access_key_id'),
                aws_secret_access_key=self.getConfigurationValue(
                    'aws_secret_access_key'),
                aws_session_token=None,
                config=None)
            self.sqs_queue = self.sqs_resource.get_queue_by_name(
                QueueName=self.getConfigurationValue('queue'))
        except:
            etype, evalue, etb = sys.exc_info()
            self.logger.error(
                "Could not connect to sqs service. Exception: %s, Error: %s." %
                (etype, evalue))
            self.lumbermill.shutDown()

    def handleEvent(self, event):
        self.buffer.append(event)
        yield None

    def storeData(self, buffered_data):
        batch_messages = []
        for event in buffered_data:
            try:
                id = event['lumbermill']['event_id']
            except KeyError:
                id = "%032x%s" % (random.getrandbits(128), os.getpid())
            message = {'Id': id}
            if self.format:
                event = mapDynamicValue(self.format, event)
            else:
                try:
                    event = json.dumps(event)
                except:
                    etype, evalue, etb = sys.exc_info()
                    self.logger.warning(
                        "Error while encoding event data: %s to json. Exception: %s, Error: %s."
                        % (event, etype, evalue))
            message['MessageBody'] = event
            batch_messages.append(message)
            if len(batch_messages) % 10:
                self.sqs_queue.send_messages(Entries=batch_messages)
                batch_messages = []
        if len(batch_messages) > 0:
            self.send()

    def shutDown(self):
        self.buffer.flush()
예제 #7
0
class MongoDbSink(BaseThreadedModule):
    """
    Store incoming events in a mongodb.

    host: Mongodb server.
    database: Mongodb database name.
    collection: Mongodb collection name. Timepatterns like %Y.%m.%d and dynamic values like $(bar) are allowed here.
    optinonal_connection_params: Other optional parameters as documented in https://api.mongodb.org/python/current/api/pymongo/mongo_client.html
    format:     Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'.
                If not set the whole event dict is send.
    doc_id:     Sets the document id for the committed event data.
    store_interval_in_secs:     Send data to es in x seconds intervals.
    batch_size: Sending data to es if event count is above, even if store_interval_in_secs is not reached.
    backlog_size:   Maximum count of events waiting for transmission. If backlog size is exceeded no new events will be processed.

    Configuration template:

    - MongoDbSink:
       host:                            # <default: 'localhost:27017'; type: string; is: optional>
       database:                        # <default: 'lumbermill'; type: string; is: optional>
       collection:                      # <default: 'lumbermill-%Y.%m.%d'; type: string; is: optional>
       optinonal_connection_params:     # <default: {'serverSelectionTimeoutMS': 5}; type: dictionary; is: optional>
       format:                          # <default: None; type: None||string; is: optional>
       doc_id:                          # <default: '$(lumbermill.event_id)'; type: string; is: optional>
       store_interval_in_secs:          # <default: 5; type: integer; is: optional>
       batch_size:                      # <default: 500; type: integer; is: optional>
       backlog_size:                    # <default: 5000; type: integer; is: optional>
    """

    module_type = "output"
    """Set module type"""
    def configure(self, configuration):
        # Call parent configure method.
        BaseThreadedModule.configure(self, configuration)
        self.format = self.getConfigurationValue('format')
        self.collection = self.getConfigurationValue('collection')
        self.database = self.getConfigurationValue('database')
        self.doc_id_pattern = self.getConfigurationValue("doc_id")

    def getStartMessage(self):
        return "DB: %s. Max buffer size: %d" % (self.getConfigurationValue(
            'database'), self.getConfigurationValue('backlog_size'))

    def initAfterFork(self):
        BaseThreadedModule.initAfterFork(self)
        # Init monogdb client after fork.
        self.mongodb = self.connect()
        if not self.mongodb:
            self.lumbermill.shutDown()
            return
        # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here.
        self.buffer = Buffer(
            self.getConfigurationValue('batch_size'),
            self.storeData,
            self.getConfigurationValue('store_interval_in_secs'),
            maxsize=self.getConfigurationValue('backlog_size'))

    def connect(self):
        try:
            mongodb_client = pymongo.MongoClient(
                self.getConfigurationValue('host'),
                **self.getConfigurationValue('optinonal_connection_params'))
            self.logger.debug(str(mongodb_client.server_info()))
        except:
            etype, evalue, etb = sys.exc_info()
            self.logger.warning(
                "Connection to %s failed. Exception: %s, Error: %s." %
                (self.getConfigurationValue('host'), etype, evalue))
        if not mongodb_client:
            self.logger.error("Connection to %s failed. Shutting down." %
                              self.getConfigurationValue('host'))
            self.lumbermill.shutDown()
        else:
            self.logger.debug("Connection to %s successful." %
                              self.getConfigurationValue('host'))
        return mongodb_client

    def handleEvent(self, event):
        if self.format:
            publish_data = self.getConfigurationValue('format', event)
        else:
            publish_data = event
        self.buffer.append(publish_data)
        yield None

    def storeData(self, events):
        mongo_db = self.mongodb[self.database]
        bulk_objects = {}
        for event in events:
            collection_name = mapDynamicValueInString(
                self.collection, event, use_strftime=True).lower()
            doc_id = mapDynamicValue(self.doc_id_pattern, event)
            if not doc_id:
                self.logger.error("Could not find doc_id %s for event %s." %
                                  (self.doc_id_pattern, event))
                continue
            event['_id'] = doc_id
            if collection_name not in bulk_objects.keys():
                bulk_objects[collection_name] = mongo_db[
                    collection_name].initialize_ordered_bulk_op()
            try:
                bulk_objects[collection_name].insert(event)
            except:
                etype, evalue, etb = sys.exc_info()
                self.logger.error(
                    "Server communication error. Exception: %s, Error: %s." %
                    (etype, evalue))
                self.logger.debug("Payload: %s" % event)
                if "Broken pipe" in evalue or "Connection reset by peer" in evalue:
                    self.mongodb = self.connect()
        for collection_name, bulk_object in bulk_objects.iteritems():
            try:
                result = bulk_object.execute()
                self.logger.debug(str(result))
            except:
                etype, evalue, etb = sys.exc_info()
                self.logger.error(
                    "Server communication error. Exception: %s, Error: %s." %
                    (etype, evalue))

    def shutDown(self):
        try:
            self.buffer.flush()
        except:
            pass
        BaseThreadedModule.shutDown(self)
예제 #8
0
class MongoDbSink(BaseThreadedModule):
    """
    Store incoming events in a mongodb.

    host: Mongodb server.
    database: Mongodb database name.
    collection: Mongodb collection name. Timepatterns like %Y.%m.%d and dynamic values like $(bar) are allowed here.
    optinonal_connection_params: Other optional parameters as documented in https://api.mongodb.org/python/current/api/pymongo/mongo_client.html
    format:     Which event fields to send on, e.g. '$(@timestamp) - $(url) - $(country_code)'.
                If not set the whole event dict is send.
    doc_id:     Sets the document id for the committed event data.
    store_interval_in_secs:     Send data to es in x seconds intervals.
    batch_size: Sending data to es if event count is above, even if store_interval_in_secs is not reached.
    backlog_size:   Maximum count of events waiting for transmission. If backlog size is exceeded no new events will be processed.

    Configuration template:

    - MongoDbSink:
       host:                            # <default: 'localhost:27017'; type: string; is: optional>
       database:                        # <default: 'lumbermill'; type: string; is: optional>
       collection:                      # <default: 'lumbermill-%Y.%m.%d'; type: string; is: optional>
       optinonal_connection_params:     # <default: {'serverSelectionTimeoutMS': 5}; type: dictionary; is: optional>
       format:                          # <default: None; type: None||string; is: optional>
       doc_id:                          # <default: '$(lumbermill.event_id)'; type: string; is: optional>
       store_interval_in_secs:          # <default: 5; type: integer; is: optional>
       batch_size:                      # <default: 500; type: integer; is: optional>
       backlog_size:                    # <default: 5000; type: integer; is: optional>
    """

    module_type = "output"
    """Set module type"""

    def configure(self, configuration):
        # Call parent configure method.
        BaseThreadedModule.configure(self, configuration)
        self.format = self.getConfigurationValue('format')
        self.collection = self.getConfigurationValue('collection')
        self.database = self.getConfigurationValue('database')
        self.doc_id_pattern = self.getConfigurationValue("doc_id")

    def getStartMessage(self):
        return "DB: %s. Max buffer size: %d" % (self.getConfigurationValue('database'), self.getConfigurationValue('backlog_size'))

    def initAfterFork(self):
        BaseThreadedModule.initAfterFork(self)
        # Init monogdb client after fork.
        self.mongodb = self.connect()
        if not self.mongodb:
            self.lumbermill.shutDown()
            return
        # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here.
        self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size'))

    def connect(self):
        try:
            mongodb_client = pymongo.MongoClient(self.getConfigurationValue('host'), **self.getConfigurationValue('optinonal_connection_params'))
            self.logger.debug(str(mongodb_client.server_info()))
        except:
            etype, evalue, etb = sys.exc_info()
            self.logger.warning("Connection to %s failed. Exception: %s, Error: %s." % (self.getConfigurationValue('host'), etype, evalue))
        if not mongodb_client:
            self.logger.error("Connection to %s failed. Shutting down." % self.getConfigurationValue('host'))
            self.lumbermill.shutDown()
        else:
            self.logger.debug("Connection to %s successful." % self.getConfigurationValue('host'))
        return mongodb_client

    def handleEvent(self, event):
        if self.format:
            publish_data = self.getConfigurationValue('format', event)
        else:
            publish_data = event
        self.buffer.append(publish_data)
        yield None

    def storeData(self, events):
        mongo_db = self.mongodb[self.database]
        bulk_objects = {}
        for event in events:
            collection_name = mapDynamicValueInString(self.collection, event, use_strftime=True).lower()
            doc_id = mapDynamicValue(self.doc_id_pattern, event)
            if not doc_id:
                self.logger.error("Could not find doc_id %s for event %s." % (self.doc_id_pattern, event))
                continue
            event['_id'] = doc_id
            if collection_name not in bulk_objects.keys():
                bulk_objects[collection_name] = mongo_db[collection_name].initialize_ordered_bulk_op()
            try:
                bulk_objects[collection_name].insert(event)
            except:
                etype, evalue, etb = sys.exc_info()
                self.logger.error("Server communication error. Exception: %s, Error: %s." % (etype, evalue))
                self.logger.debug("Payload: %s" % event)
                if "Broken pipe" in evalue or "Connection reset by peer" in evalue:
                    self.mongodb = self.connect()
        for collection_name, bulk_object in bulk_objects.iteritems():
            try:
                result = bulk_object.execute()
                self.logger.debug(str(result))
            except:
                etype, evalue, etb = sys.exc_info()
                self.logger.error("Server communication error. Exception: %s, Error: %s." % (etype, evalue))

    def shutDown(self):
        try:
            self.buffer.flush()
        except:
            pass
        BaseThreadedModule.shutDown(self)
예제 #9
0
class FileSink(BaseThreadedModule):
    """
    Store all received events in a file.

    file_name: absolute path to filen. String my contain pythons strtime directives and event fields, e.g. %Y-%m-%d.
    format: Which event fields to use in the logline, e.g. '$(@timestamp) - $(url) - $(country_code)'
    store_interval_in_secs: sending data to es in x seconds intervals.
    batch_size: sending data to es if event count is above, even if store_interval_in_secs is not reached.
    backlog_size: maximum count of events waiting for transmission. Events above count will be dropped.
    compress: Compress output as gzip or snappy file. For this to be effective, the chunk size should not be too small.

    Configuration template:

    - FileSink:
       file_name:                       # <type: string; is: required>
       format:                          # <default: '$(data)'; type: string; is: optional>
       store_interval_in_secs:          # <default: 10; type: integer; is: optional>
       batch_size:                      # <default: 500; type: integer; is: optional>
       backlog_size:                    # <default: 500; type: integer; is: optional>
       compress:                        # <default: None; type: None||string; values: [None,'gzip','snappy']; is: optional>
    """

    module_type = "output"
    """Set module type"""
    can_run_forked = False

    def configure(self, configuration):
        # Call parent configure method
        BaseThreadedModule.configure(self, configuration)
        self.batch_size = self.getConfigurationValue('batch_size')
        self.backlog_size = self.getConfigurationValue('backlog_size')
        self.file_name = self.getConfigurationValue('file_name')
        self.format = self.getConfigurationValue('format')
        self.compress = self.getConfigurationValue('compress')
        self.file_handles = {}
        if self.compress == 'gzip':
            try:
                # Import module into namespace of object. Otherwise it will not be accessible when process was forked.
                self.gzip_module = __import__('gzip')
            except ImportError:
                self.logger.error(
                    'Gzip compression selected but gzip module could not be loaded.'
                )
                self.lumbermill.shutDown()
        if self.compress == 'snappy':
            try:
                self.snappy_module = __import__('snappy')
            except ImportError:
                self.logger.error(
                    'Snappy compression selected but snappy module could not be loaded.'
                )
                self.lumbermill.shutDown()
        self.buffer = Buffer(
            self.batch_size,
            self.storeData,
            self.getConfigurationValue('store_interval_in_secs'),
            maxsize=self.backlog_size)
        TimedFunctionManager.startTimedFunction(self.closeStaleFileHandles)

    def getStartMessage(self):
        return "File: %s. Max buffer size: %d" % (
            self.file_name, self.getConfigurationValue('backlog_size'))

    @setInterval(60)
    def closeStaleFileHandles(self):
        """
        Close and delete file handles that are unused since 5 minutes.
        """
        for path, file_handle_data in self.file_handles.items():
            last_used_time_ago = time.time() - file_handle_data['lru']
            if last_used_time_ago < 300:
                continue
            self.logger.info('Closing stale file handle for %s.' % (path))
            file_handle_data['handle'].close()
            self.file_handles.pop(path)

    def closeAllFileHandles(self):
        for path, file_handle_data in self.file_handles.items():
            self.logger.info('Closing file handle for %s.' % path)
            file_handle_data['handle'].close()
            self.file_handles.pop(path)

    def ensurePathExists(self, path):
        dirpath = os.path.dirname(path)
        if not os.path.exists(dirpath):
            os.makedirs(dirpath)

    def handleEvent(self, event):
        self.buffer.append(event)
        yield None

    def getOrCreateFileHandle(self, path, mode):
        file_handle = None
        try:
            file_handle = self.file_handles[path]['handle']
            self.file_handles[path]['lru'] = time.time()
        except KeyError:
            try:
                file_handle = open(path, mode)
                self.file_handles[path] = {
                    'handle': file_handle,
                    'lru': time.time()
                }
            except:
                etype, evalue, etb = sys.exc_info()
                self.logger.error(
                    'Could no open %s for writing. Exception: %s, Error: %s.' %
                    (path, etype, evalue))
        return file_handle

    def storeData(self, events):
        write_data = collections.defaultdict(str)
        for event in events:
            path = mapDynamicValue(self.file_name,
                                   mapping_dict=event,
                                   use_strftime=True)
            line = mapDynamicValue(self.format, mapping_dict=event)
            write_data["%s" % path] += line + "\n"
        for path, lines in write_data.items():
            try:
                self.ensurePathExists(path)
            except:
                etype, evalue, etb = sys.exc_info()
                self.logger.error(
                    'Could no create path %s. Events could not be written. Exception: %s, Error: %s.'
                    % (path, etype, evalue))
                return
            mode = "a+"
            if self.compress == 'gzip':
                path += ".gz"
                mode += "b"
                lines = self.compressGzip(lines)
            elif self.compress == 'snappy':
                path += ".snappy"
                lines = self.compressSnappy(lines)
                mode += "b"
            try:
                fh = self.getOrCreateFileHandle(path, mode)
                fh.write(lines)
                fh.flush()
                return True
            except:
                etype, evalue, etb = sys.exc_info()
                self.logger.error(
                    'Could no write event data to %s. Exception: %s, Error: %s.'
                    % (path, etype, evalue))

    def shutDown(self):
        self.buffer.flush()
        self.closeAllFileHandles()
        BaseThreadedModule.shutDown(self)

    def compressGzip(self, data):
        buffer = StringIO()
        compressor = self.gzip_module.GzipFile(mode='wb', fileobj=buffer)
        try:
            compressor.write(data)
        finally:
            compressor.close()
        return buffer.getvalue()

    def compressSnappy(self, data):
        return self.snappy_module.compress(data)
예제 #10
0
class ElasticSearchSink(BaseThreadedModule):
    """
    Store the data dictionary in an elasticsearch index.

    The elasticsearch module takes care of discovering all nodes of the elasticsearch cluster.
    Requests will the be loadbalanced via round robin.

    action:     Either index or update. If update be sure to provide the correct doc_id.
    fields:     Which event fields to send on, e.g. [timestamp, url, country_code].
                If not set the whole event dict is send.
    nodes:      Configures the elasticsearch nodes.
    read_timeout: Set number of seconds to wait until requests to elasticsearch will time out.
    connection_type:    One of: 'thrift', 'http'.
    http_auth:  'user:password'.
    use_ssl:    One of: True, False.
    index_name: Sets the index name. Timepatterns like %Y.%m.%d and dynamic values like $(bar) are allowed here.
    doc_id:     Sets the es document id for the committed event data.
    routing:    Sets a routing value (@see: http://www.elasticsearch.org/blog/customizing-your-document-routing/)
                Timepatterns like %Y.%m.%d are allowed here.
    ttl:        When set, documents will be automatically deleted after ttl expired.
                Can either set time in milliseconds or elasticsearch date format, e.g.: 1d, 15m etc.
                This feature needs to be enabled for the index.
                @See: http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-ttl-field.html
    sniff_on_start: The client can be configured to inspect the cluster state to get a list of nodes upon startup.
                    Might cause problems on hosts with multiple interfaces. If connections fail, try to deactivate this.
    sniff_on_connection_fail: The client can be configured to inspect the cluster state to get a list of nodes upon failure.
                              Might cause problems on hosts with multiple interfaces. If connections fail, try to deactivate this.
    store_interval_in_secs:     Send data to es in x seconds intervals.
    batch_size: Sending data to es if event count is above, even if store_interval_in_secs is not reached.
    backlog_size:   Maximum count of events waiting for transmission. If backlog size is exceeded no new events will be processed.

    Configuration template:

    - ElasticSearchSink:
       action:                          # <default: 'index'; type: string; is: optional; values: ['index', 'update']>
       fields:                          # <default: None; type: None||list; is: optional>
       nodes:                           # <type: string||list; is: required>
       read_timeout:                    # <default: 10; type: integer; is: optional>
       connection_type:                 # <default: 'urllib3'; type: string; values: ['urllib3', 'requests']; is: optional>
       http_auth:                       # <default: None; type: None||string; is: optional>
       use_ssl:                         # <default: False; type: boolean; is: optional>
       index_name:                      # <default: 'lumbermill-%Y.%m.%d'; type: string; is: optional>
       doc_id:                          # <default: '$(lumbermill.event_id)'; type: string; is: optional>
       doc_type:                        # <default: '$(lumbermill.event_type)'; type: string; is: optional>
       routing:                         # <default: None; type: None||string; is: optional>
       ttl:                             # <default: None; type: None||integer||string; is: optional>
       sniff_on_start:                  # <default: False; type: boolean; is: optional>
       sniff_on_connection_fail:        # <default: False; type: boolean; is: optional>
       store_interval_in_secs:          # <default: 5; type: integer; is: optional>
       batch_size:                      # <default: 500; type: integer; is: optional>
       backlog_size:                    # <default: 500; type: integer; is: optional>
    """

    module_type = "output"
    """Set module type"""

    def configure(self, configuration):
        # Call parent configure method.
        BaseThreadedModule.configure(self, configuration)
        for module_name in ['elasticsearch', 'urllib3', 'requests']:
            if self.getConfigurationValue('log_level') == 'info':
                logging.getLogger(module_name).setLevel(logging.WARN)
            else:
                # Set log level for elasticsarch library if configured to other than default.
                logging.getLogger(module_name).setLevel(self.logger.level)
        self.action = self.getConfigurationValue('action')
        self.fields = self.getConfigurationValue('fields')
        self.ttl = self.getConfigurationValue("ttl")
        self.index_name = self.getConfigurationValue("index_name")
        self.routing_pattern = self.getConfigurationValue("routing")
        self.doc_id_pattern = self.getConfigurationValue("doc_id")
        self.doc_type_pattern = self.getConfigurationValue("doc_type")
        self.doc_type_is_dynamic = self.isDynamicConfigurationValue("doc_type")
        self.es_nodes = self.getConfigurationValue("nodes")
        self.read_timeout = self.getConfigurationValue("read_timeout")
        if not isinstance(self.es_nodes, list):
            self.es_nodes = [self.es_nodes]
        if self.getConfigurationValue("connection_type") == 'urllib3':
            self.connection_class = elasticsearch.connection.Urllib3HttpConnection
        elif self.getConfigurationValue("connection_type") == 'requests':
            self.connection_class = elasticsearch.connection.RequestsHttpConnection

    def getStartMessage(self):
        return "Idx: %s. Max buffer size: %d" % (self.index_name, self.getConfigurationValue('backlog_size'))

    def initAfterFork(self):
        BaseThreadedModule.initAfterFork(self)
        # Init es client after fork as mentioned in https://elasticsearch-py.readthedocs.org/en/master/
        self.es = self.connect()
        if not self.es:
            self.lumbermill.shutDown()
            return
        # As the buffer uses a threaded timed function to flush its buffer and thread will not survive a fork, init buffer here.
        self.buffer = Buffer(self.getConfigurationValue('batch_size'), self.storeData, self.getConfigurationValue('store_interval_in_secs'), maxsize=self.getConfigurationValue('backlog_size'))

    def connect(self):
        es = False
        tries = 0
        while tries < 5 and not es:
            try:
                # Connect to es node and round-robin between them.
                self.logger.debug("Connecting to %s." % self.es_nodes)
                es = elasticsearch.Elasticsearch(self.es_nodes,
                                                 connection_class=self.connection_class,
                                                 timeout=self.read_timeout,
                                                 sniff_on_start=self.getConfigurationValue('sniff_on_start'),
                                                 sniff_on_connection_fail=self.getConfigurationValue('sniff_on_connection_fail'),
                                                 sniff_timeout=5,
                                                 maxsize=20,
                                                 use_ssl=self.getConfigurationValue('use_ssl'),
                                                 http_auth=self.getConfigurationValue('http_auth'))
            except:
                etype, evalue, etb = sys.exc_info()
                self.logger.warning("Connection to %s failed. Exception: %s, Error: %s." % (self.es_nodes, etype, evalue))
                self.logger.warning("Waiting %s seconds before retring to connect." % ((4 + tries)))
                time.sleep(4 + tries)
                tries += 1
                continue
        if not es:
            self.logger.error("Connection to %s failed. Shutting down." % self.es_nodes)
            self.lumbermill.shutDown()
        else:
            self.logger.debug("Connection to %s successful." % self.es_nodes)
        return es

    def handleEvent(self, event):
        if self.fields:
            publish_data = {}
            for field in self.fields:
                try:
                    publish_data.update(event[field])
                except KeyError:
                    continue
        else:
            publish_data = event
        self.buffer.append(publish_data)
        yield None

    def dataToElasticSearchJson(self, events):
        """
        Format data for elasticsearch bulk update.
        """
        json_data = []
        for event in events:
            index_name = mapDynamicValueInString(self.index_name, event, use_strftime=True).lower()
            doc_type = mapDynamicValueInString(self.doc_type_pattern, event)
            doc_id = mapDynamicValueInString(self.doc_id_pattern, event)
            routing = mapDynamicValue(self.routing_pattern, use_strftime=True)
            if not doc_id:
                self.logger.error("Could not find doc_id %s for event %s." % (self.getConfigurationValue("doc_id"), event))
                continue
            header = {self.action: {'_index': index_name,
                                    '_type': doc_type,
                                    '_id': doc_id}}
            if self.routing_pattern:
                header['index']['_routing'] = routing
            if self.ttl:
                header['index']['_ttl'] = self.ttl
            if self.action == 'update':
                event = {'doc': event}
            try:
                json_data.append("\n".join((json.dumps(header), json.dumps(event), "\n")))
            except UnicodeDecodeError:
                etype, evalue, etb = sys.exc_info()
                self.logger.error("Could not json encode %s. Exception: %s, Error: %s." % (event, etype, evalue))
        json_data = "".join(json_data)
        return json_data

    def storeData(self, events):
        json_data = self.dataToElasticSearchJson(events)
        try:
            #started = time.time()
            # Bulk update of 500 events took 0.139621019363.
            self.es.bulk(body=json_data)
            #print("Bulk update of %s events took %s." % (len(events), time.time() - started))
            return True
        except elasticsearch.exceptions.ConnectionError:
            try:
                self.logger.warning("Lost connection to %s. Trying to reconnect." % (self.es_nodes, self.index_name))
                self.es = self.connect()
            except:
                time.sleep(.5)
        except:
            etype, evalue, etb = sys.exc_info()
            self.logger.error("Server communication error. Exception: %s, Error: %s." % (etype, evalue))
            self.logger.debug("Payload: %s" % json_data)
            if "Broken pipe" in evalue or "Connection reset by peer" in evalue:
                self.es = self.connect()

    def shutDown(self):
        try:
            self.buffer.flush()
        except:
            pass
        BaseThreadedModule.shutDown(self)