def _build_event(self, events): if not events: return None if not isinstance(events, list): events = [events] evts = [] for event in events: assert event.raw_data, "the raw data of events is empty" if event.is_unbroken: evt = unbroken_evt_fmt.format( event.host or "", event.source or "", event.sourcetype or "", event.time or "", event.index or "", scu.escape_cdata(event.raw_data), "<done/>" if event.is_done else "", ) else: evt = evt_fmt.format( event.host or "", event.source or "", event.sourcetype or "", event.time or "", event.index or "", scu.escape_cdata(event.raw_data), ) evts.append(evt) return evts
def _write_events(self, events, ckpts): loader = self._task_config[c.data_loader] brokers = self._task_config[c.kafka_brokers] evt_fmt = ("<event><host>{0}</host><source>{1}</source>" "<sourcetype>kafka:topicEvent</sourcetype>" "<index>{2}</index><data>{3}</data></event>") evts = (evt_fmt.format(brokers, "kafka:{}:{}".format(msg.topic, msg.partition), self._idx_tbl[msg.topic], scutil.escape_cdata(msg.value)) for msg in events) loader.write_events("<stream>{}</stream>".format("".join(evts))) del events[:] while 1: for key, ckpt in ckpts.iteritems(): try: self._store.update_state(key, ckpt) except Exception: time.sleep(2) logger.error("Failed to update ckpt for key=%s reason=%s", key, traceback.format_exc()) continue return
def _do_index_data(self): if self._api is None: return evt_fmt = ("<stream><event>" "<time>{time}</time>" "<source>{source}</source>" "<sourcetype>{sourcetype}</sourcetype>" "<index>{index}</index>" "<data>{data}</data>" "</event></stream>") task = self._task_config results = self._api(task) events = [] size_total = 0 for result in results: event = evt_fmt.format(source=task[tac.source], sourcetype=task[tac.sourcetype], index=task[tac.index], data=scutil.escape_cdata(result), time=time.time()) size_total += len(event) events.append(event) logger.info("Send data for indexing.", action="index", size=size_total, records=len(events)) task["writer"].write_events("".join(events))
def _encode_to_utf8(self, decoder, chunk): try: data = decoder.decode(chunk) return scutil.escape_cdata(data) except Exception: self._logger.exception("Failed to decode data.", encoding=self._config[asc.character_set]) return None
def _do_format(self, evt, evt_fmt, index, host, source, sourcetype, time): evt = scutil.escape_cdata(evt) res = evt_fmt.format(index=index, host=host, source=source, sourcetype=sourcetype, time=time, data=evt) return res
def _encode_to_utf8(self, decoder, chunk): try: data = decoder.decode(chunk) return scutil.escape_cdata(data) # return xss.escape(data) except Exception: self._logger.error( "Failed to decode data by using encoding=%s, error=%s", self._config[asc.character_set], traceback.format_exc()) return None
def _do_index(self, source): logger = self._logger all_data = [data for data in self._reader] size = sum((len(data) for data in all_data), 0) if not all_data: self.set_eof() return try: all_data = json.loads("".join(all_data)) except ValueError: logger.error("Invalid key of CloudTrail file.") self.set_eof() return records = all_data.get("Records", []) blacklist = self._config[asc.ct_blacklist] if blacklist: blacklist = re.compile(blacklist) else: blacklist = None loader_service = self._loader_service events = [] for record in records: if loader_service.stopped(): break if blacklist is not None and blacklist.search(record["eventName"]): continue data = self.event_fmt.format( source=source, sourcetype=self._config[tac.sourcetype], index=self._config[tac.index], data=scutil.escape_cdata(json.dumps(record))) events.append(data) if events: logger.info("Indexed cloudtrail records.", action="index", num_reocords=len(records), size=size) loader_service.write_events("".join(events)) if not loader_service.stopped(): self._key_store.increase_offset(len(all_data)) self.set_eof()
def _index_events(self, results, stream_name): evt_fmt = self._evt_fmt task = self._task_config region = task[tac.region] log_group_name = task[aclc.log_group_name] events = [] for result in results: source = "{region}:{log_group}:{stream}".format( region=region, log_group=log_group_name, stream=stream_name) event = evt_fmt.format( source=source, sourcetype=task[tac.sourcetype], index=task[tac.index], data=scutil.escape_cdata(result["message"]), time=result["timestamp"] / 1000.0) events.append(event) task["writer"].write_events("".join(events))