def test_word_with_or_without_leading_num(self) -> None: exp_outputs = [ ParseResult(), ParseResult(), ParseResult(), ParseResult(0, ''), ParseResult(0, ''), ] exp_outputs[3].add(6, '0brown') exp_outputs[4].add(4, '4fox') n_parser = NumParser() zero_or_more_num_parser = combinator.KleeneStar(n_parser) z_parser = AlphaParser() word_parser = combinator.KleeneStar(z_parser) leading_num_parser = combinator.AND(zero_or_more_num_parser, word_parser) print(leading_num_parser) parser_output = [] for inp in self.inp_strings_word: parser_output.append(leading_num_parser(inp)) for n, r in enumerate(parser_output): print(n, r) for n, (exp, out) in enumerate(zip(exp_outputs, parser_output)): print("[%d / %d] : comparing %s -> %s" % \ (n, len(parser_output), str(exp), str(out)) ) assert exp == out
def test_ab_concat_offset(self) -> None: a_parser = CharParser('a') b_parser = CharParser('b') ab_combo = combinator.AND(a_parser, b_parser) exp_a_only = ParseResult(1, 'a') exp_b_only = ParseResult(2, 'b') a_only = a_parser('ab', idx=0) assert exp_a_only == a_only b_only = b_parser('ab', idx=1) assert exp_b_only == b_only exp_a_result = ParseResult() exp_ab_result_0 = ParseResult(2, 'ab') exp_ab_result_1 = ParseResult() a_result = ab_combo('a', idx=0) assert exp_a_result == a_result ab_result_0 = ab_combo('ab', idx=0) assert exp_ab_result_0 == ab_result_0 ab_result_1 = ab_combo('ab', idx=1) assert exp_ab_result_1 == ab_result_1
def __call__(self, inp: str, parse_inp: ParseResult = None, idx: int = 0) -> ParseResult: prev_result = parse_inp if parse_inp is not None else ParseResult() start_idx = prev_result.last_idx() # keep parsing more tokens for as long as we can # NOTE: this implementation here seems much more complicated than # it needs to be.... while True: new_result = self.A(inp, idx=prev_result.last_idx()) if new_result.empty( ) or new_result.last_idx() <= prev_result.last_idx(): break prev_result = new_result parser_out = ParseResult() # if we dont accept partial results and haven't consumed the # whole input then return an empty result #if not self.accept_partial and result.last_idx() < len(inp): # return parser_out parser_out.add(prev_result.last_idx(), inp[start_idx:prev_result.last_idx()]) return parser_out
def test_parse_strings(self) -> None: exp_outputs_2 = [ ParseResult(), ParseResult(), ParseResult(3, "ayy"), ParseResult(), ParseResult(3, "ayy"), ParseResult(), ParseResult(), ParseResult(3, "ayy"), ParseResult(3, "ayy"), ] s_parser = StringParser('ayy') parser_outputs = [] for i in self.inp_strings: parser_outputs.append(s_parser(i, idx=0)) # display for n, o in enumerate(parser_outputs): print('s_parser(%s) produced : %s' % (self.inp_strings[n], str(o))) assert len(parser_outputs) == len(exp_outputs_2) for exp_out, test_out in zip(exp_outputs_2, parser_outputs): assert exp_out == test_out
def test_kleene_dot_num(self) -> None: p = NumParser() ks = combinator.KleeneDot(p) exp_outputs = [ ParseResult(0, ''), # [] ParseResult(1, '1'), # '1' ParseResult(1, '2'), # '2' ParseResult(2, '12'), # '12' ParseResult(3, '122'), # '122' ParseResult(3, '122'), # '122a' ParseResult(0, ''), # 'a112' ParseResult(1, '1'), # '1a2' ] # Parse the strings results = [] for inp in self.inp_strings_num: results.append(ks(inp)) print('%s results for each string :' % str(ks)) for n, r in enumerate(results): print(n, r, repr(ks), self.inp_strings_num[n]) assert len(results) == len(exp_outputs) for n, (exp, out) in enumerate(zip(exp_outputs, results)): print("[%d / %d] : comparing %s -> %s" % \ (n, len(results), str(exp), str(out)) ) assert exp == out
def __call__(self, inp:str, parse_inp:ParseResult=None, idx:int=0) -> ParseResult: if parse_inp is not None: idx = parse_inp.last_idx() else: idx = 0 parse_result = ParseResult() for target_idx, c in enumerate(inp[idx:]): if not c.isspace(): break parse_result.add(idx + target_idx + 1, inp[idx : idx + target_idx+1]) return parse_result
def test_kleene_star_char(self) -> None: p = CharParser('a') ks = combinator.KleeneStar(p) # expected outputs exp_outputs = [ ParseResult(0, ''), # '' ParseResult(0, ''), # 'a' ParseResult(0, ''), # 'aa' ParseResult(0, ''), # 'aaa' ParseResult(0, ''), # 'aaaa' ParseResult(0, ''), # 'aaabcdefg' ] # 'a' exp_outputs[1].add(1, 'a') # 'aa' exp_outputs[2].add(2, 'aa') # 'aaa' exp_outputs[3].add(3, 'aaa') # 'aaaa' exp_outputs[4].add(4, 'aaaa') # 'aaabcdefg' exp_outputs[5].add(3, 'aaa') # Parse the strings results = [] for inp in self.inp_strings_alpha: results.append(ks(inp)) print('%s results for each string :' % str(ks)) for n, r in enumerate(results): print(n, r, repr(ks), self.inp_strings_alpha[n]) assert len(results) == len(exp_outputs) for n, (exp, out) in enumerate(zip(exp_outputs, results)): print("[%d / %d] : comparing %s -> %s" % \ (n, len(results), str(exp), str(out)) ) assert exp == out # if we turn on partial match then we should get 'aaa' as the # result for the input string 'aaabcdefg' exp_partial_match = ParseResult(0, '') exp_partial_match.add(3, 'aaa') ks.accept_partial = True partial_match_result = ks(self.inp_strings_alpha[5]) assert exp_partial_match == partial_match_result
def test_left_combinator(self) -> None: tag_parser = CharParser("<") iden_parser = Identifier() left_combo = Left(tag_parser, iden_parser) expected_result = ParseResult(1, "<") parse_result = left_combo(self.test_element) assert parse_result == expected_result
def test_right_combinator(self) -> None: tag_parser = CharParser("<") iden_parser = Identifier() right_combo = Right(tag_parser, iden_parser) #expected_result = ParseResult(2, "/>") expected_result = ParseResult(14, "some-element/") parse_result = right_combo(self.test_element) assert parse_result == expected_result
def __call__(self, inp: str, parse_inp: ParseResult = None, idx: int = 0) -> ParseResult: prev_result = parse_inp if parse_inp is not None else ParseResult() start_idx = prev_result.last_idx() while True: new_result = self.A(inp, idx=prev_result.last_idx()) if new_result.empty( ) or new_result.last_idx() <= prev_result.last_idx(): break prev_result = new_result parser_out = ParseResult() parser_out.add(prev_result.last_idx(), inp[start_idx:prev_result.last_idx()]) return parser_out
def test_one_or_more(self) -> None: one_or_more = OneOrMore() expected_results = [ ParseResult(), ParseResult(17, "at least one char"), ParseResult(9, "0 or more"), ParseResult(9, "1 or more") ] parse_results = [] for s in self.other_comb_inputs: parse_results.append(one_or_more(s)) for n, res in enumerate(parse_results): print(n, res) assert len(parse_results) == len(expected_results) for res, exp_res in zip(parse_results, expected_results): assert res == exp_res
def test_identifier(self) -> None: iden_parser = Identifier() expected_results = [ ParseResult(26, "this-is-a-valid-identifier"), ParseResult(6, "valid2"), ParseResult() ] parse_results = [] for iden in self.iden_input: parse_results.append(iden_parser(iden)) # Display for n, res in enumerate(parse_results): print(n, res) assert len(parse_results) == len(expected_results) for res, exp_res in zip(parse_results, expected_results): assert res == exp_res
def test_tag_open_combinator(self) -> None: tag_parser = CharParser("<") iden_parser = Identifier() tag_open_combinator = AND(tag_parser, iden_parser) # NOTE: this is what the combinator outputs, but I am not quite happy # with it... expected_result = ParseResult(14, "<some-element/") parse_result = tag_open_combinator(self.test_element) assert parse_result == expected_result
def __call__(self, inp:str, parse_inp:ParseResult=None, idx:int=0) -> ParseResult: if parse_inp is not None: idx = parse_inp.last_idx() else: idx = 0 parse_result = ParseResult() for target_idx, c in enumerate(inp[idx:]): if c.isalpha(): continue elif target_idx > 0 and (c.isalnum() or c == '-'): continue else: break if target_idx == 0: return parse_result parse_result.add(idx + target_idx + 1, inp[idx : idx + target_idx+1]) return parse_result
def test_concatenation(self) -> None: a_parser = CharParser('a') b_parser = CharParser('b') # this combinator can recognise 'ab' ab_combo = combinator.AND(a_parser, b_parser) # expected outputs exp_outputs = [ ParseResult(), # '' ParseResult(), # 'a' ParseResult(2, 'ab'), # 'ab' ParseResult(), # 'aa' ParseResult(), # 'aaa' ParseResult(2, 'ab'), # 'aba' ] #from pudb import set_trace; set_trace() results = [] for inp in self.inp_strings: results.append(ab_combo(inp)) print('%s results for each string :' % str(ab_combo)) for n, r in enumerate(results): print(n, r) assert len(results) == len(exp_outputs) for n, (exp, out) in enumerate(zip(exp_outputs, results)): print("[%d / %d] : comparing %s -> %s" % \ (n, len(results), str(exp), str(out)) ) assert exp == out
def test_kleene_dot_char(self) -> None: p = CharParser('a') ks = combinator.KleeneDot(p) # expected outputs exp_outputs = [ ParseResult(0, ''), # '' ParseResult(1, 'a'), # 'a' ParseResult(2, 'aa'), # 'aa' ParseResult(3, 'aaa'), # 'aaa' ParseResult(4, 'aaaa'), # 'aaaa' ParseResult(3, 'aaa'), # 'aaabcdefg' ] # Parse the strings results = [] for inp in self.inp_strings_alpha: results.append(ks(inp)) print('%s results for each string :' % str(ks)) for n, r in enumerate(results): print(n, r, repr(ks), self.inp_strings_alpha[n]) assert len(results) == len(exp_outputs) for n, (exp, out) in enumerate(zip(exp_outputs, results)): print("[%d / %d] : comparing %s -> %s" % \ (n, len(results), str(exp), str(out)) ) assert exp == out
def test_word_with_trailing_num(self) -> None: exp_outputs = [ ParseResult(), ParseResult(), ParseResult(), ParseResult(), ParseResult(), ParseResult(7, 'jumped2'), ] z_parser = AlphaParser() n_parser = NumParser() word_parser = combinator.KleeneDot(z_parser) trailing_num_parser = combinator.AND(word_parser, n_parser) parser_output = [] for inp in self.inp_strings_word: parser_output.append(trailing_num_parser(inp)) for n, r in enumerate(parser_output): print(n, r) for n, (exp, out) in enumerate(zip(exp_outputs, parser_output)): print("[%d / %d] : comparing %s -> %s" % \ (n, len(parser_output), str(exp), str(out)) ) assert exp == out
def test_zero_or_more(self) -> None: c = CharParser('a') comb = combinator.ZeroOrMore(c) # expected outputs exp_outputs = [ ParseResult(1, ''), # '' ParseResult(1, 'a'), # 'a' ParseResult(2, 'aa'), # 'aa' ParseResult(3, 'aaa'), # 'aaa' ParseResult(4, 'aaaa'), # 'aaaa' ParseResult(3, 'aaa'), # 'aaabcdefg' ] # Parse the strings results = [] for inp in self.inp_strings_num: results.append(comb(inp)) print('%s results for each string :' % str(ks)) for n, r in enumerate(results): print(n, r, repr(comb), self.inp_strings_num[n]) assert len(results) == len(exp_outputs) for n, (exp, out) in enumerate(zip(exp_outputs, results)): print("[%d / %d] : comparing %s -> %s" % \ (n, len(results), str(exp), str(out)) ) assert exp == out
def __call__(self, inp: str, parse_inp: ParseResult = None, idx: int = 0) -> ParseResult: # Handle the 'zeroth' time empty_result = self.E(inp, parse_inp=parse_inp, idx=idx) result = self.A(inp, parse_inp=empty_result) if len(result) == 0: return empty_result if len(result) == 1: return result # run this zero or more times up to unlimited bound start_idx = result.last_idx() while True: new_result = self.A(inp, idx=result.last_idx()) if new_result.last_idx() <= result.last_idx() or new_result.empty( ): break result = new_result parser_out = ParseResult() parser_out.extend(empty_result) # if we dont accept partial results and haven't consumed the # whole input then return an empty result #if not self.accept_partial and result.last_idx() < len(inp): # return parser_out parser_out.add(result.last_idx(), inp[start_idx - 1:result.last_idx()]) return parser_out
def __call__(self, inp: str, parse_inp: ParseResult = None, idx: int = 0) -> ParseResult: start_idx = parse_inp.last_idx() if parse_inp is not None else idx a_result = self.A(inp, parse_inp, idx=idx) if a_result.empty() or a_result.last_idx() == start_idx: return ParseResult() b_result = self.B(inp, a_result) if b_result.last_idx() <= a_result.last_idx(): return ParseResult() parse_out = ParseResult() parse_out.extend(a_result) parse_out.update(b_result.last_idx(), b_result.last_str()) return parse_out
def test_alternation(self) -> None: boo_parser = StringParser('boo') boondoggle_parser = StringParser('boondoggle') exp_outputs = [ParseResult()] exp_outputs[0].add(3, 'boo') alt_combo = combinator.OR(boo_parser, boondoggle_parser) alt_result = alt_combo(self.inp_string_1) print(alt_combo) print(alt_result) assert alt_result == exp_outputs[0]
def __call__(self, inp: str, parse_inp: ParseResult = None, idx: int = 0) -> ParseResult: result = self.A(inp, parse_inp=parse_inp) if len(result) == 0: return result #if len(result) == 1: # return result # run this zero or more times up to unlimited bound start_idx = result.last_idx() while True: new_result = self.A(inp, idx=result.last_idx()) if new_result.last_idx() <= result.last_idx() or new_result.empty( ): break result = new_result parser_out = ParseResult() parser_out.add(result.last_idx(), inp[start_idx - 1:result.last_idx()]) return parser_out
def test_quoted_string(self) -> None: quoted_string = QuotedString() expected_results = [ # We don't get the double-quotes in the string since the Left # and Right combinators dont return them ParseResult(31, "\"this is a valid quoted string\""), ParseResult( ), # NOTE: with AND combinator this will be an empty result ParseResult(0, []), ParseResult() #ParseResult(2, "\"\""), ] parse_results = [] for s in self.test_strings: parse_results.append(quoted_string(s)) # Display for n, res in enumerate(parse_results): print(n, res) assert len(parse_results) == len(expected_results) for res, exp_res in zip(parse_results, expected_results): assert res == exp_res
def test_kleene_star_num(self) -> None: p = NumParser() ks = combinator.KleeneStar(p) exp_outputs = [ ParseResult(0, ''), # '' ParseResult(0, ''), # '1' ParseResult(0, ''), # '2' ParseResult(0, ''), # '12' ParseResult(0, ''), # '122' ParseResult(0, ''), # '122a' ParseResult(0, ''), # 'a112' ParseResult(0, ''), # '1a2' ] # '1' exp_outputs[1].add(1, '1') # '2' exp_outputs[2].add(1, '2') # '12' exp_outputs[3].add(2, '12') # '122' exp_outputs[4].add(3, '122') # '122a' exp_outputs[5].add(3, '122') # '1a2' exp_outputs[7].add(1, '1') # once we see 'a' we stop # Parse the strings results = [] for inp in self.inp_strings_num: results.append(ks(inp)) print('%s results for each string :' % str(ks)) for n, r in enumerate(results): print(n, r, repr(ks), self.inp_strings_num[n]) assert len(results) == len(exp_outputs) for n, (exp, out) in enumerate(zip(exp_outputs, results)): print("[%d / %d] : comparing %s -> %s" % \ (n, len(results), str(exp), str(out)) ) assert exp == out
def test_inp_string_single(self) -> None: alpha_parser = AlphaParser() exp_outputs = [ ParseResult(), # '' ParseResult(1, 'a'), # 'a' ParseResult(1, 'b'), # 'b' ParseResult(), # '1' ParseResult(), # '2' ParseResult(), # '{' ] parser_outputs = [] for inp in self.inp_strings_single: parser_outputs.append(alpha_parser(inp)) assert len(parser_outputs) == len(exp_outputs) for exp_out, test_out in zip(exp_outputs, parser_outputs): assert exp_out == test_out
def test_parse_char(self) -> None: exp_outputs_1 = [ ParseResult(), ParseResult(1, self.inp_strings_1[1][0]), ParseResult(1, self.inp_strings_1[2][0]), ParseResult(1, self.inp_strings_1[3][0]), ParseResult(1, self.inp_strings_1[4][0]), ParseResult() ] cparser = CharParser('a') parser_outputs = [] for i in self.inp_strings_1: parser_outputs.append(cparser(i, idx=0)) # display for n, o in enumerate(parser_outputs): print('Input %d [%s] produced : %s' % (n, self.inp_strings_1[n], str(o))) assert len(parser_outputs) == len(exp_outputs_1) for exp_out, test_out in zip(exp_outputs_1, parser_outputs): assert exp_out == test_out
def test_word_one_or_more_chars(self) -> None: exp_outputs = [ ParseResult(0, ''), ParseResult(3, 'the'), ParseResult(5, 'quick'), ParseResult(0, ''), ParseResult(0, ''), ParseResult(6, 'jumped') ] z_parser = AlphaParser() word_parser = combinator.KleeneDot(z_parser) parser_output = [] for inp in self.inp_strings_word: parser_output.append(word_parser(inp)) for n, r in enumerate(parser_output): print(n, r) for n, (exp, out) in enumerate(zip(exp_outputs, parser_output)): print("[%d / %d] : comparing %s -> %s" % \ (n, len(parser_output), str(exp), str(out)) ) assert exp == out