Example #1
0
 def _build_event(self, events):
     if not events:
         return None
     if not isinstance(events, list):
         events = [events]
     evts = []
     for event in events:
         assert event.raw_data, "the raw data of events is empty"
         if event.is_unbroken:
             evt = unbroken_evt_fmt.format(
                 event.host or "",
                 event.source or "",
                 event.sourcetype or "",
                 event.time or "",
                 event.index or "",
                 scu.escape_cdata(event.raw_data),
                 "<done/>" if event.is_done else "",
             )
         else:
             evt = evt_fmt.format(
                 event.host or "",
                 event.source or "",
                 event.sourcetype or "",
                 event.time or "",
                 event.index or "",
                 scu.escape_cdata(event.raw_data),
             )
         evts.append(evt)
     return evts
    def _write_events(self, events, ckpts):
        loader = self._task_config[c.data_loader]
        brokers = self._task_config[c.kafka_brokers]

        evt_fmt = ("<event><host>{0}</host><source>{1}</source>"
                   "<sourcetype>kafka:topicEvent</sourcetype>"
                   "<index>{2}</index><data>{3}</data></event>")

        evts = (evt_fmt.format(brokers,
                               "kafka:{}:{}".format(msg.topic, msg.partition),
                               self._idx_tbl[msg.topic],
                               scutil.escape_cdata(msg.value))
                for msg in events)
        loader.write_events("<stream>{}</stream>".format("".join(evts)))
        del events[:]

        while 1:
            for key, ckpt in ckpts.iteritems():
                try:
                    self._store.update_state(key, ckpt)
                except Exception:
                    time.sleep(2)
                    logger.error("Failed to update ckpt for key=%s reason=%s",
                                 key, traceback.format_exc())
                    continue
            return
Example #3
0
    def _do_index_data(self):
        if self._api is None:
            return

        evt_fmt = ("<stream><event>"
                   "<time>{time}</time>"
                   "<source>{source}</source>"
                   "<sourcetype>{sourcetype}</sourcetype>"
                   "<index>{index}</index>"
                   "<data>{data}</data>"
                   "</event></stream>")

        task = self._task_config
        results = self._api(task)

        events = []
        size_total = 0
        for result in results:
            event = evt_fmt.format(source=task[tac.source],
                                   sourcetype=task[tac.sourcetype],
                                   index=task[tac.index],
                                   data=scutil.escape_cdata(result),
                                   time=time.time())
            size_total += len(event)
            events.append(event)
        logger.info("Send data for indexing.",
                    action="index",
                    size=size_total,
                    records=len(events))

        task["writer"].write_events("".join(events))
Example #4
0
 def _encode_to_utf8(self, decoder, chunk):
     try:
         data = decoder.decode(chunk)
         return scutil.escape_cdata(data)
     except Exception:
         self._logger.exception("Failed to decode data.",
                                encoding=self._config[asc.character_set])
         return None
Example #5
0
 def _do_format(self, evt, evt_fmt, index, host, source, sourcetype, time):
     evt = scutil.escape_cdata(evt)
     res = evt_fmt.format(index=index,
                          host=host,
                          source=source,
                          sourcetype=sourcetype,
                          time=time,
                          data=evt)
     return res
 def _encode_to_utf8(self, decoder, chunk):
     try:
         data = decoder.decode(chunk)
         return scutil.escape_cdata(data)
         # return xss.escape(data)
     except Exception:
         self._logger.error(
             "Failed to decode data by using encoding=%s, error=%s",
             self._config[asc.character_set], traceback.format_exc())
         return None
Example #7
0
    def _do_index(self, source):
        logger = self._logger
        all_data = [data for data in self._reader]
        size = sum((len(data) for data in all_data), 0)
        if not all_data:
            self.set_eof()
            return

        try:
            all_data = json.loads("".join(all_data))
        except ValueError:
            logger.error("Invalid key of CloudTrail file.")
            self.set_eof()
            return

        records = all_data.get("Records", [])
        blacklist = self._config[asc.ct_blacklist]
        if blacklist:
            blacklist = re.compile(blacklist)
        else:
            blacklist = None

        loader_service = self._loader_service

        events = []
        for record in records:
            if loader_service.stopped():
                break

            if blacklist is not None and blacklist.search(record["eventName"]):
                continue

            data = self.event_fmt.format(
                source=source,
                sourcetype=self._config[tac.sourcetype],
                index=self._config[tac.index],
                data=scutil.escape_cdata(json.dumps(record)))
            events.append(data)

        if events:
            logger.info("Indexed cloudtrail records.",
                        action="index",
                        num_reocords=len(records),
                        size=size)
            loader_service.write_events("".join(events))

        if not loader_service.stopped():
            self._key_store.increase_offset(len(all_data))
            self.set_eof()
 def _index_events(self, results, stream_name):
     evt_fmt = self._evt_fmt
     task = self._task_config
     region = task[tac.region]
     log_group_name = task[aclc.log_group_name]
     events = []
     for result in results:
         source = "{region}:{log_group}:{stream}".format(
             region=region, log_group=log_group_name, stream=stream_name)
         event = evt_fmt.format(
             source=source, sourcetype=task[tac.sourcetype],
             index=task[tac.index],
             data=scutil.escape_cdata(result["message"]),
             time=result["timestamp"] / 1000.0)
         events.append(event)
     task["writer"].write_events("".join(events))
Example #9
0
 def _do_format(self, evt, evt_fmt, index, host, source, sourcetype, time):
     evt = scutil.escape_cdata(evt)
     res = evt_fmt.format(index=index, host=host, source=source,
                          sourcetype=sourcetype, time=time, data=evt)
     return res