def test_regex_from_group_spans(self): text = r'Error on comp21' regex = r'Error on (comp(\d\d))' span_comp = Span(9, 15, pattern=r'comp(\d\d)') span_number = Span(13, 15, pattern=r'\d\d') group_spans = SpanList([span_comp, span_number]) regex_from_groups = regex_from_group_spans(group_spans, text) assert regex == regex_from_groups
def test_find_date_spans_by_force(self): text = r'2015-12-03 or [10/Oct/1999:21:15:05 +0500] "GET /index.html HTTP/1.0" 200 1043' spans = _find_date_spans_by_force(text) assert len(spans) == 3 spans = SpanList(spans).sort_by_start_and_end() dates = [text[s.start:s.end] for s in spans] assert dates[0] == '2015-12-03' assert dates[1] == '10/Oct/1999' assert dates[2] == '21:15:05 +0500'
def test_find_spans_by_regex(self): regexes = dict((re.compile(regex), regex) for regex in [r"\d+-\d+-\d\d", r"comp\d\d"]) text = r"2015-12-03 Data migration from comp36 to comp21 failed" spans = find_spans_by_regex(regexes, text) assert len(spans) == 3 spans = SpanList(spans).sort_by_start_and_end() groups = [text[s.start:s.end] for s in spans] assert groups[0] == '2015-12-03' assert groups[1] == 'comp36' assert groups[2] == 'comp21'