def get_interpretation(self, words): rulename = self.__class__.__name__ print("Trying to match wit h regex: {}".format(rulename)) match = refo.match(self.regex + Literal(_EOL), words + [_EOL]) if not match: print("No match") return None, None try: match = Match(match, words) result = self.interpret(match) except BadSemantic as error: print(str(error)) return None, None except AttributeError as error: print(str(error)) return None, None try: expression, userdata = result except TypeError: expression, userdata = result, None expression.rule_used = rulename return expression, userdata
def get_interpretation(self, words): rulename = self.__class__.__name__ logger.debug("Trying to match with regex: {}".format(rulename)) match = refo.match(self.regex + Literal(_EOL), words + [_EOL]) if not match: logger.debug("No match") return None, None try: match = Match(match, words) result = self.interpret(match) except BadSemantic as error: logger.debug(str(error)) return None, None try: expression, userdata = result except TypeError: expression, userdata = result, None #expression.rule_used = rulename for x in expression: x.rule_used = rulename return expression, userdata
def run_tests(rule_regexes, evidences, answers): predictions = [] real_labels = [] evidences_with_labels = [] colorama_init() formatter = TerminalEvidenceFormatter() for name, regex, answer in rule_regexes: title = "Matches for rule '{}' (value: {})".format(name, answer) print("\n{}\n{}".format(title, "-" * len(title))) anything_matched = False for evidence in evidences: tokens_to_match = generate_tokens_to_match(evidence) match = refo.match(regex, tokens_to_match) if match: anything_matched = True print(" * {}".format(formatter.colored_text(evidence))) if evidence in answers and answers[evidence] is not None: evidences_with_labels.append(evidence) real_labels.append(answers[evidence]) if match: predictions.append(answer) else: predictions.append(False) if not anything_matched: print(" nothing matched") print() if real_labels: results = result_dict_from_predictions(evidences_with_labels, real_labels, predictions) results.pop("end_time") keys = [ "true_positives", "true_negatives", "false_positives", "false_negatives", "precision", "recall", "accuracy", "f1", ] title = "Metrics" print("{}\n{}".format(title, "-" * len(title))) for key in keys: print("{:>15}: {:.2f}".format(key, results[key]))
def test_match2(self): # This regular expression is known to kill the python re module # because it exploits the fact that the implementation has exponential # worst case complexity. # Instead, this implementation has polinomial worst case complexity, # and therefore this test should finish in a reasonable time. N = 100 a = refo.Literal("a") string = "a" * N regex = refo.Question(a) * N + a * N m = refo.match(regex, string) self.assertNotEqual(m, None)
def match(self, evidence): subject_kind = evidence.left_entity_occurrence.entity.kind.name object_kind = evidence.right_entity_occurrence.entity.kind.name Subject = refo.Plus(ConditionPredicate(is_subj=True, kinds__has=subject_kind)) Object = refo.Plus(ConditionPredicate(is_obj=True, kinds__has=object_kind)) tokens_to_match = list(self.generate_tokens_to_match(evidence)) + [_EOL] for rule in self.rules: regex = rule(Subject, Object) + refo.Literal(_EOL) match = refo.match(regex, tokens_to_match) if match: return rule.answer
def test_match_path(self): seq = [[1, 2], # x and y [1], # x [1, 2, 3], # x, y and z [1, 2], # x and y [2, 3], # y and z [0, 4, 5], []] regex = refo.Star(self.y) + refo.Plus(self.x + self.z) m = refo.match(regex, seq, keep_path=True) self.assertIsInstance(m, Match) path = m.get_path() self.assertEqual([4, 1, 9, 1, 9], path)
def test_match_path(self): seq = [ [1, 2], # x and y [1], # x [1, 2, 3], # x, y and z [1, 2], # x and y [2, 3], # y and z [0, 4, 5], [] ] regex = refo.Star(self.y) + refo.Plus(self.x + self.z) m = refo.match(regex, seq, keep_path=True) self.assertIsInstance(m, Match) path = m.get_path() self.assertEqual([4, 1, 9, 1, 9], path)
def match(self, evidence): subject_kind = evidence.left_entity_occurrence.entity.kind.name object_kind = evidence.right_entity_occurrence.entity.kind.name Subject = refo.Plus( ConditionPredicate(is_subj=True, kinds__has=subject_kind)) Object = refo.Plus( ConditionPredicate(is_obj=True, kinds__has=object_kind)) tokens_to_match = list( self.generate_tokens_to_match(evidence)) + [_EOL] for rule in self.rules: regex = rule(Subject, Object) + refo.Literal(_EOL) match = refo.match(regex, tokens_to_match) if match: return rule.answer
def process_corpus(input_filename, output_filename): input_f = open(input_filename, 'r') original_corpus = pickle.load(input_f) input_f.close() for instance in original_corpus: words = instance['question'] rules = [] for regex in freebase_app.partial_rules: match = refo.match(regex + refo.Literal(_EOL), words + [_EOL]) if match: rules.append(repr(regex)) instance['question'] = (instance['question'], rules) output_f = open(output_filename, 'w') pickle.dump(original_corpus, output_f) output_f.close()
def get_semantics(self, words): rulename = self.__class__.__name__ logger.debug("Trying to match with regex: {}".format(rulename)) match = refo.match(self.regex + Literal(_EOL), words + [_EOL]) if not match: logger.debug("No match") return None, None try: match = Match(match, words) result = self.semantics(match) except BadSemantic as error: logger.debug(str(error)) return None, None try: expression, userdata = result except TypeError: expression, userdata = result, None expression.rule_used = rulename return expression, userdata
def match(self, evidence): tokens_to_match = generate_tokens_to_match(evidence) for regex, answer in self.rule_regexes: match = refo.match(regex, tokens_to_match) if match: return answer
import refo def path_function(x): def f(xs): if x in xs: return x * x return None return f x = refo.Predicate(path_function(1)) y = refo.Predicate(path_function(2)) z = refo.Predicate(path_function(3)) seq = [ [1, 2], # x and y [1], # x [1, 2, 3], # x, y and z [3], # z [0, 4, 5], [] ] regex = refo.Star(y) + refo.Plus(x + z) m = refo.match(regex, seq, keep_path=True) print(m.get_path())
def test_match1(self): regex = self.b + self.b + self.a + self.a + self.b strregex = re.compile("bbaab") m = refo.match(regex, self.seq) strm = strregex.match(self.string) self._eq_span_n_stuff(m, strm)
from refo import Literal, Question, match import re import time # This regular expression is known to kill the python re module # because it exploits the fact that the implementation has exponential # worst case complexity. # Instead, this implementation has polinomial worst case complexity, # and therefore this test should finish in a reasonable time. # You might want to try with N = 20, 30, 40, 100 to see what happens N = 30 a = Literal("a") string = "a" * N regex = Question(a) * N + a * N start = time.time() m = match(regex, string) end = time.time() print "Refo finished in {0:.2} seconds".format(end - start) regex = "(:?a?){{{0}}}a{{{0}}}".format(N) print "Trying", regex regex = re.compile(regex) start = time.time() regex.match(string) end = time.time() print "Python re finished in {0:.2} seconds".format(end - start)
import refo def path_function(x): def f(xs): if x in xs: return x * x return None return f x = refo.Predicate(path_function(1)) y = refo.Predicate(path_function(2)) z = refo.Predicate(path_function(3)) seq = [[1, 2], [1], [1, 2, 3], [3], [0, 4, 5], []] # x and y # x # x, y and z # z regex = refo.Star(y) + refo.Plus(x + z) m = refo.match(regex, seq, keep_path=True) print m.get_path()
#!/usr/bin/python from refo import Literal, Question, match import re import time # This regular expression is known to kill the python re module # because it exploits the fact that the implementation has exponential # worst case complexity. # Instead, this implementation has polinomial worst case complexity, # and therefore this test should finish in a reasonable time. # You might want to try with N = 20, 30, 40, 100 to see what happens N = 25 a = Literal("a") string = "a" * N regex = Question(a) * N + a * N start = time.time() m = match(regex, string) end = time.time() print "Refo finished in {0:.2} seconds".format(end - start) regex = "(:?a?){{{0}}}a{{{0}}}".format(N) regex = re.compile(regex) start = time.time() regex.match(string) end = time.time() print "Python re finished in {0:.2} seconds".format(end - start)
def inner(evidence): regex = compile_rule(rule_feature, relation) tokens_to_match = generate_tokens_to_match(evidence) return int(bool(refo.match(regex, tokens_to_match)))