def tva_delta_server_request_event_creator(): return EventCreator( Metadata([StringField("task"), StringField("sequence_number")]), RegexpParser( r"^(?P<task>TVA Delta Request Starting).+sequence number: (?P<sequence_number>\d+)\s*", return_empty_list=True), matcher=SubstringMatcher("TVA Delta Request Starting"))
def received_delta_server_notification_event_creator(): return EventCreator( Metadata([StringField("task"), StringField("sequence_number")]), RegexpParser( r"^(?P<task>Received Delta Server Notification) Sequence Number: (?P<sequence_number>\d+).*", return_empty_list=True), matcher=SubstringMatcher("Received Delta Server Notification"))
def tva_delta_server_response_event_creator(): return EventCreator( Metadata([ StringField("task"), StringField("status"), IntField("duration") ]), RegexpParser( r"^(?P<task>TVA Delta Server respond) with status code '(?P<status>\S+)' in '(?P<duration>\d+)' ms\s*", return_empty_list=True), matcher=SubstringMatcher("TVA Delta Server respond"))
def model_state_event_creator(): return EventCreator( Metadata([ StringField("task"), StringField("sequence_number"), StringField("number"), IntField("entities"), IntField("links"), IntField("channels"), IntField("events"), IntField("programs"), IntField("groups"), IntField("on_demand_programs"), IntField("broadcast_events"), ]), RegexpParser( r"^\[Model\] (?P<task>Model state after committing transaction) " r"\[Sequence number: (?P<sequence_number>\d+).*Number: (?P<number>\d+)\] " r"Entities: (?P<entities>\d+) - Links: (?P<links>\d+) - Channels: (?P<channels>\d+)" r" - Events: (?P<events>\d+) - Programs: (?P<programs>\d+) - Groups: (?P<groups>\d+)" r" - OnDemandPrograms: (?P<OnDemandPrograms>\d+) - BroadcastEvents: (?P<BroadcastEvents>\d+)\s*", return_empty_list=True), matcher=SubstringMatcher( "Model state after committing transaction"))
def create_event_creators(configuration): """ Tree of different parsers for all types of logs for THINK ANALYTICS :param configuration: YML config :return: Tree of event_creators """ timezone_name = configuration.property("timezone.name") timezones_priority = configuration.property("timezone.priority", "idc") duration_event_creator = MutateEventCreator(None, [ FieldsMapping([ "started_script", "finished_script", "finished_time", "@timestamp" ], "duration", duration_update) ]) concat_httpaccess_timestamp_event_creator = MutateEventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%d/%b/%Y:%H:%M:%S", timezone_name, timezones_priority, "@timestamp", include_timezone=True) ]), [ FieldsMapping(["date", "time"], "timestamp", agg_func=lambda x, y: (x + " " + y)[1:-1], remove_intermediate_fields=True) ]) concat_central_timestamp_event_creator = MutateEventCreator( Metadata([ ConfigurableTimestampField("timestamp", "%a %d/%m/%y %H:%M:%S", timezone_name, timezones_priority, "@timestamp") ]), [ FieldsMapping( ["date", "time"], "timestamp", remove_intermediate_fields=True) ]) traxis_profile_id_event_creator = MutateEventCreator(fields_mappings=[ FieldsMapping(["subscriberId"], "traxis-profile-id", lambda x: x, True) ]) content_item_id_event_creator = MutateEventCreator(fields_mappings=[ FieldsMapping(["contentItemId"], "crid", lambda x: "crid{}".format(x.split('crid')[-1]), False) ]) int_request_id_event_creator = MutateEventCreator(fields_mappings=[ FieldsMapping(["intRequestId"], "request-id", lambda x: x, True) ]) return MatchField( "source", { "localhost_access_log": SourceConfiguration( CompositeEventCreator().add_source_parser( EventCreator( Metadata([ StringField("date"), StringField("time"), StringField("ip"), StringField("thread"), StringField("http_method"), StringField("url"), StringField("http_version"), StringField("response_code"), StringField("response_time") ]), SplitterParser( delimiter=" ", is_trim=True))).add_intermediate_result_parser( concat_httpaccess_timestamp_event_creator ).add_intermediate_result_parser( EventWithUrlCreator(delete_source_field=True, keys_to_underscore=False)). add_intermediate_result_parser(traxis_profile_id_event_creator) .add_intermediate_result_parser(content_item_id_event_creator). add_intermediate_result_parser(int_request_id_event_creator), Utils.get_output_topic(configuration, "httpaccess")), "RE_SystemOut.log": SourceConfiguration( EventCreator( Metadata([ ConfigurableTimestampField("@timestamp", "%d/%m/%y %H:%M:%S.%f", timezone_name, timezones_priority, dayfirst=True, use_smart_parsing=True), StringField("level"), StringField("script"), StringField("message") ]), RegexpParser( r"^\[(?P<timestamp>\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}\.\d{3}\s\D+?)\] " r"(?P<level>\w+?)\s+?-\s+?(?P<script>\S+?)\s+?:\s+?(?P<message>.*)" )), Utils.get_output_topic(configuration, "resystemout")), "REMON_SystemOut.log": SourceConfiguration( EventCreator( Metadata([ ConfigurableTimestampField("@timestamp", None, timezone_name, timezones_priority, dayfirst=True, use_smart_parsing=True), StringField("level"), StringField("script"), StringField("type"), StringField("message") ]), RegexpParser( r"^\[(?P<timestamp>\d{2}\/\d{2}\/\d{2} \d{2}:\d{2}:\d{2}.\d{3}\s\D+?)\] " r"(?P<level>\w+?)\s+?-\s+?(?P<script>\S+?)\s+?:\s+?\[(?P<type>\S+?)\]\s+?-\s+?(?P<message>.*)" )), Utils.get_output_topic(configuration, "remonsystemout")), "Central.log": SourceConfiguration( CompositeEventCreator().add_source_parser( EventCreator( Metadata([ StringField("date"), StringField("time"), StringField("level"), StringField("message"), StringField("thread"), StringField("c0"), StringField("c1"), StringField("c2"), StringField("role") ]), CsvParser( ",", '"'))).add_intermediate_result_parser( concat_central_timestamp_event_creator), Utils.get_output_topic(configuration, "central")), "thinkenterprise.log": SourceConfiguration( EventCreator( Metadata([ ConfigurableTimestampField( "@timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_priority), StringField("level"), StringField("message") ]), RegexpParser( r"^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})" r":\s+?(?P<level>\w+?)\s+?-\s+?(?P<message>.*)")), Utils.get_output_topic(configuration, "thinkenterprise")), "gcollector.log": SourceConfiguration( EventCreator( Metadata([ ConfigurableTimestampField("@timestamp", "%Y-%m-%dT%H:%M:%S.%f", timezone_name, timezones_priority, include_timezone=True), StringField("process_uptime"), StringField("message") ]), RegexpParser( r"^(?P<timestamp>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}.\d{4})" r":\s+?(?P<process_uptime>\d+?\.\d{3}):\s+?(?P<message>.*)" )), Utils.get_output_topic(configuration, "gcollector")), "server.log": SourceConfiguration( EventCreator( Metadata([ ConfigurableTimestampField( "@timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_priority), StringField("level"), StringField("class_name"), StringField("thread"), StringField("message") ]), RegexpParser( r"^(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\s+?" r"(?P<level>\w+?)\s+?\[(?P<class_name>.+?)\]\s+?\((?P<thread>.+?)\)\s+?(?P<message>.*)" )), Utils.get_output_topic(configuration, "server")), "RE_Ingest.log": SourceConfiguration( CompositeEventCreator().add_source_parser( DictEventCreator( Metadata([ StringField("started_script"), ConfigurableTimestampField("timestamp", None, timezone_name, timezones_priority, "@timestamp", use_smart_parsing=True), StringField("message"), StringField("finished_script"), ConfigurableTimestampField("finished_time", None, timezone_name, timezones_priority, use_smart_parsing=True) ]), RegexpMatchesParser( r"Started\s+?(?P<started_script>.*?\.sh)\s+?" r"(?P<timestamp>\w+?\s+?\w+?\s+?\d{1,2}\s+?\d{2}:\d{2}:\d{2}\s+?\w+?\s+?\d{4})" r"(?P<message>(?:.|\s)*)Finished\s+?(?P<finished_script>.*?\.sh)\s+?" r"(?P<finished_time>\w+?\s+?\w+?\s+?\d{1,2}\s+?\d{2}:\d{2}:\d{2}\s+?\w+?\s+?\d{4}).*" ))).add_intermediate_result_parser( duration_event_creator), Utils.get_output_topic(configuration, "reingest")) })
def create_event_creators(configuration): timezone_name = configuration.property("timezone.name") timezones_priority = configuration.property("timezone.priority", "dic") return MatchField( "topic", { "traxis_cassandra_log_gen": SourceConfiguration( MultipleEventCreator([ EventCreator( Metadata([ StringField("level"), ConfigurableTimestampField( "@timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_priority), StringField("message") ]), RegexpParser( r"^.*?\:\s+(\S+)\s\[[^\]]*\]\s+(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}.\d+)\s+(\S[\s\S]+)" )), EventCreator( Metadata([ ConfigurableTimestampField( "@timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_priority), StringField("level"), StringField("message") ]), RegexpParser( r"^.*?\:\s(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}.\d+)\s(\S+)\s+\[[^\]]*\]\s+(\S[\s\S]+)" )) ]), Utils.get_output_topic(configuration, "general")), "traxis_cassandra_log_err": SourceConfiguration( MultipleEventCreator([ EventCreator( Metadata([ StringField("level"), ConfigurableTimestampField( "@timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_priority), StringField("message") ]), RegexpParser( r"^.*\:\s.*\:\s(\S+)\s\[[^\]]*\]\s+(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}.\d+)\s+(\S[\s\S]+)" )), EventCreator( Metadata([ StringField("level"), ConfigurableTimestampField( "@timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_priority), StringField("message") ]), RegexpParser( r"^.*\:\s(\S+)\s\[[^\]]*\]\s+(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}.\d+)\s+(\S[\s\S]+)" )), EventCreator( Metadata([ ConfigurableTimestampField( "@timestamp", "%Y-%m-%d %H:%M:%S,%f", timezone_name, timezones_priority), StringField("level"), StringField("message") ]), RegexpParser( r"^.*\:\s(\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}.\d+)\s(\S+)\s\[[^\]]*\]\s+(\S[\s\S]+)" )) ]), Utils.get_output_topic(configuration, "error")) })
def test_simple_pattern_without_full_match(self): parser = RegexpParser("(\w+)", match=False) self.assertEquals(["a", "b", "c"], parser.parse("a|b|c"))
def test_simple_pattern_not_matched_with_full_match(self): parser = RegexpParser("(d)") self.assertRaises(ParsingException, parser.parse, "a|b|c")
def test_simple_pattern_not_matched_without_full_match(self): parser = RegexpParser("(d)", match=False) self.assertEquals([], parser.parse("a|b|c"))