def process(self, log_groups, check_point_tracker): logs = PullLogResponse.loggroups_to_flattern_list(log_groups) logger.info("Get data from shard {0}, log count: {1}".format( self.shard_id, len(logs))) match_count = 0 sample_error_log = "" for log in logs: m = None for k, c in self.kw_check.items(): if k in log: m = c.search(log[k]) if m: logger.debug( 'Keyword detected for shard "{0}" with keyword: "{1}" in field "{2}", log: {3}' .format(self.shard_id, log[k], k, log)) if m: match_count += 1 sample_error_log = log if match_count: logger.info( "Keyword detected for shard {0}, count: {1}, example: {2}". format(self.shard_id, match_count, sample_error_log)) else: logger.debug("No keyword detected for shard {0}".format( self.shard_id)) self.save_checkpoint(check_point_tracker)
def process(self, log_groups, check_point_tracker): logs = PullLogResponse.loggroups_to_flattern_list(log_groups, time_as_str=True, decode_bytes=True) logger.info("Get data from shard {0}, log count: {1}".format(self.shard_id, len(logs))) for log in logs: # Put your sync code here to send to remote. # the format of log is just a dict with example as below (Note, all strings are unicode): # Python2: {u"__time__": u"12312312", u"__topic__": u"topic", u"field1": u"value1", u"field2": u"value2"} # Python3: {"__time__": "12312312", "__topic__": "topic", "field1": "value1", "field2": "value2"} event = {} event.update(self.default_fields) event['time'] = log[u'__time__'] event['fields'] = {} del log['__time__'] event['fields'].update(log) data = json.dumps(event, sort_keys=True) try: req = self.r.post(self.url, data=data, timeout=self.timeout) req.raise_for_status() except Exception as err: logger.debug("Failed to connect to remote Splunk server ({0}). Exception: {1}".format(self.url, err)) raise err # TODO: add some error handling here or retry etc. logger.info("Complete send data to remote") self.save_checkpoint(check_point_tracker)
def copy_data(shard_id, log_groups): log_count = PullLogResponse.get_log_count_from_group(log_groups) logger.info("Get data from shard {0}, log count: {1}".format( shard_id, log_count)) for log_group in log_groups.LogGroups: # update topic log_group.Topic += "_copied" put_method(log_group=log_group)
def process(self, log_groups, check_point_tracker): logger.info("TransformDataConsumer::process: get log groups") logs = PullLogResponse.loggroups_to_flattern_list(log_groups) c, r, p, f = _transform_events_to_logstore(self.runner, logs, self.to_client, self.to_project, self.to_logstore) self.count += c self.removed += r self.processed += p self.failed += f # save check point self.save_checkpoint(check_point_tracker)
def process(self, log_groups, check_point_tracker): logs = PullLogResponse.loggroups_to_flattern_list(log_groups, time_as_str=True, decode_bytes=True) logger.info("Get data from shard {0}, log count: {1}".format( self.shard_id, len(logs))) try: with SyslogClient(self.host, self.port, proto=self.protocol, timeout=self.timeout, cert_path=self.cert_path, client_cert=self.client_cert, client_key=self.client_key) as client: for log in logs: # Put your sync code here to send to remote. # the format of log is just a dict with example as below (Note, all strings are unicode): # Python2: {"__time__": "12312312", "__topic__": "topic", u"field1": u"value1", u"field2": u"value2"} # Python3: {"__time__": "12312312", "__topic__": "topic", "field1": "value1", "field2": "value2"} # suppose we only care about audit log timestamp = datetime.fromtimestamp(int(log[u'__time__'])) del log['__time__'] io = six.StringIO() first = True for k, v in six.iteritems(log): io.write("{0}{1}={2}".format(self.sep, k, v)) data = io.getvalue() client.log(data, facility=self.option.get("facility", None), severity=self.option.get("severity", None), timestamp=timestamp, program=self.option.get("tag", None), hostname=self.option.get("hostname", None)) except Exception as err: logger.debug( "Failed to connect to remote syslog server ({0}). Exception: {1}" .format(self.option, err)) # TODO: add some error handling here or retry etc. raise err logger.info("Complete send data to remote") self.save_checkpoint(check_point_tracker)
def pull_logs(self, project_name, logstore_name, shard_id, cursor, count=1000): """ batch pull log data from log service Unsuccessful opertaion will cause an LogException. :type project_name: string :param project_name: the Project name :type logstore_name: string :param logstore_name: the logstore name :type shard_id: int :param shard_id: the shard id :type cursor: string :param cursor: the start to cursor to get data :type count: int :param count: the required pull log package count, default 1000 packages :return: PullLogResponse :raise: LogException """ headers = {} headers['Accept-Encoding'] = '' headers['Accept'] = 'application/x-protobuf' params = {} resource = "/logstores/" + logstore_name + "/shards/" + str(shard_id) params['type'] = 'log' params['cursor'] = cursor params['count'] = str(count) (resp, header) = self._send("GET", project_name, None, resource, params, headers, "binary") return PullLogResponse(resp, header)
def process(self, log_groups, check_point_tracker): logger.info("TransformDataConsumer::process: get log groups") logs = PullLogResponse.loggroups_to_flattern_list(log_groups) _transform_events_to_logstore(self.runner, logs, self.to_client, self.to_project, self.to_logstore) # save check point current_time = time.time() if current_time - self.last_check_time > 3: try: self.last_check_time = current_time check_point_tracker.save_check_point(True) except Exception: import traceback traceback.print_exc() else: try: check_point_tracker.save_check_point(False) except Exception: import traceback traceback.print_exc() return None