def test_redict_speed(num_items, start=0, dictobj=None): if dictobj is None: d = ReDict() else: d = dictobj num_regexs = 1 for i in range(start, start + num_items): d["(f)(o)o? %d|b((a)(r)*) %d" % (i, i)] = i compile_start = time.time() d.compile() compile_time = time.time() - compile_start if num_items == 0: num_get_tests = 100 else: num_get_tests = num_items / 10 get_time = 0.0 for _ in range(num_get_tests): index = random.randrange(0, len(d)) text = "barrr %d" % index get_start = time.time() value = d[text] get_time += time.time() - get_start return compile_time, get_time / float(num_get_tests)
def __init__(self, lists=None): self.entry = ReDict() self.responses = ReDict() self.chains = [] self.chain = None self.chain_index = 0 self.contexts = [] if lists: self._build_from_lists(lists)
def test_clear(self): d = ReDict() testitems = {"q+": 4, "r*": 5, "s?": 6} for key, val in testitems.items(): d[key] = val self.assertEqual(d["qqq"], 4) self.assertEqual(len(testitems), len(d)) d.clear() self.assertEqual(0, len(d)) self.assertRaises(KeyError, d.__getitem__, "qqq")
def test_load_from_dict(self): testitems = {"x+": 1, "y?": 2, "z*": 3} d = ReDict() for key in testitems: d[key] = testitems[key] dumped = d.dump_to_dict() loaded_redict = ReDict().load_from_dict(dumped) self.assertEqual(testitems["x+"], loaded_redict["xxxx"]) self.assertEqual(testitems["y?"], loaded_redict["y"]) self.assertEqual(testitems["z*"], loaded_redict["zz"])
def test_pop(self): d = ReDict() d["a+"] = 1 d["b+"] = 2 self.assertEqual(2, len(d)) self.assertEqual(d["aaa"], 1) self.assertEqual(d["bbb"], 2) self.assertEqual(d.pop("b"), 2) self.assertEqual(1, len(d)) self.assertEqual(d["aaa"], 1) self.assertRaises(KeyError, d.__getitem__, "bbb")
def test_compile(self): # get type object for compiled regex retype = type(re.compile("a+")) d = ReDict() d["a"] = 1 d["b"] = 2 d["c"] = 3 self.assertFalse(d.compiled) d.compile() self.assertTrue(len(d.compiled) > 0) for c in d.compiled: self.assertTrue(isinstance(c, retype))
def add_chained_phrases(self, *pattern_response_pairs): """ Add multiple chained pattern/response pairs. A chain defines a sequence of pattern/response pairs that are expected in the order that they occur in the passed arguments to this method. Whenever a Responder is inside a context and input matching the first pattern/response pair in a chain is seen, the Responder will continually expect the next pattern in the current chain until another chain or another context is entered. When the last pattern in the chain is reached, Responders will continue expecting this pattern until another chain or context is entered. :param pattern_response_pairs: one or more pattern/response pairs, \ where a pattern/response pair is a tuple of the form \ ``(regexs, value)``, where ``regexs`` is a regular expression or \ list of regular expressions and ``value`` is an arbitrary object """ chain = [] for pair in pattern_response_pairs: pattern, response = _check_pattern_response_pair(pair) responsedict = ReDict() responsedict[pattern] = response chain.append(responsedict) self.chains.append(chain) return self
def test_groups(self): d = ReDict() num = 8 val1 = "hello" val2 = "world" val3 = "!" expr = "(.*) (.*) (.*)" d[expr] = num testinput = "%s %s %s" % (val1, val2, val3) self.assertEqual(num, d[testinput]) groups = d.groups() self.assertEqual(groups[0], val1) self.assertEqual(groups[1], val2) self.assertEqual(groups[2], val3)
def test_copy(self): d = ReDict() testitems = {"xyz+": 4, "ab*c": 5, "def?": 6} for key, val in testitems.items(): d[key] = val d2 = d.copy() self.assertEqual(len(d), len(d2)) for key, val in d.iteritems(): self.assertTrue(key in d2.keys()) self.assertTrue(val in d2.values()) self.assertEqual(d2["xyz"], d["xyz"]) self.assertEqual(d2["abbbc"], d["abbbc"]) self.assertEqual(d2["def"], d["def"])
def fill_redict(self, dictobj=None, numitems=1000): if not dictobj: dictobj = ReDict() testitems = {"((foo+|bar*) )?%d" % i: i for i in range(numitems)} for key, val in testitems.items(): dictobj[key] = val return testitems, dictobj
def test_groups_per_regex(self): d = ReDict() num_iterations = d.groups_per_regex * 3 for i in range(num_iterations): expr = "((f)(o)(o)*|(b)(a)(r)+) %d" % i d[expr] = i for i in range(num_iterations): self.assertEqual(i, d["foo %d" % i])
def test_delete_items(self): num_iterations = 50 d = ReDict() added = {} deleted = {} for i in range(num_iterations): expr = "(bar?|foo*) %d" % i added[expr] = i d[expr] = i # Randomly delete some items delete_count = random.randrange(20, 30) for _ in range(delete_count): key = random.choice(list(added.keys())) deleted[key] = added[key] del added[key] del d[key] # Verify deleted items are missing for key, value in d: if key in added: self.assertTrue(key in d.keys()) self.assertEqual(value, added[key]) elif key in deleted: self.assertFalse(key in d.keys()) try: _ = d[key] except KeyError: keyerror = True else: keyerror = False self.assertTrue(keyerror) else: raise RuntimeError("Malformed test data")
def test_value_can_be_arbitrary_object(self): d = ReDict() strval = "test string" boolval = False classval = self.__class__ funcval = self.setUpClass d["str"] = strval d["bool"] = boolval d["class"] = classval d["func"] = funcval self.assertIs(d["str"], strval) self.assertIs(d["bool"], boolval) self.assertIs(d["class"], classval) self.assertIs(d["func"], funcval)
def test_update(self): d1 = ReDict() d2 = ReDict() testitems = {"xyz+": 4, "ab*c": 5, "def?": 6} updateitems = {"q+": 1, "r*": 2, "s?": 3} for key, val in testitems.items(): d1[key] = val for key, val in updateitems.items(): d2[key] = val d1.update(d2) self.assertEqual(len(d1), len(testitems) + len(updateitems)) for key, val in testitems.items(): self.assertTrue(key in d1.keys()) self.assertTrue(val in d1.values()) for key, val in updateitems.items(): self.assertTrue(key in d1.keys()) self.assertTrue(val in d1.values())
def test_all_items_accessible(self): num_iterations = 50 d = ReDict() for i in range(num_iterations): expr = "(foo*|bar+) %d" % i d[expr] = i for i in range(num_iterations): test1 = "fo %d" % i test2 = "foo %d" % i test3 = "foooo %d" % i test4 = "bar %d" % i test5 = "barr %d" % i test6 = "barrrrr %d" % i for testval in [test1, test2, test3, test4, test5, test6]: self.assertEquals(i, d[testval])
text = "barrr %d" % index get_start = time.time() value = d[text] get_time += time.time() - get_start return compile_time, get_time / float(num_get_tests) step = 1000 max_value = 25000 iterations = max_value / step compile_times = [] get_times = [] d = ReDict() for i in range(iterations): compile_time, get_time = test_redict_speed(step, step * i, d) compile_times.append(compile_time) get_times.append(get_time) # worst caseno, chunking, 7.15 secs to compile with 25000 groups # better, chunking 600, 4.5 secs with 25000 groups # best (!), chunking 75, builtin 're' lib, 1.65 secs with 25000 groups test_values = range(0, max_value, step) plot(test_values, [compile_times, get_times], xlabel="Number of items in ReDict instance", ylabel="Time in seconds", legend=[ 'Time to compile ReDict instance', 'Time to fetch item from compiled ReDict'
class Context(object): """ Class representing a "discussion" context, allowing for a Responder that responds with contextual awareness """ def __init__(self, lists=None): self.entry = ReDict() self.responses = ReDict() self.chains = [] self.chain = None self.chain_index = 0 self.contexts = [] if lists: self._build_from_lists(lists) def compile(self): """ Compile all regular expressions contained in this context so they are ready for immediate matching """ if self.entry: self.entry.compile() if self.responses: self.responses.compile() if self.chains: for chain in self.chains: for responsedict in chain: responsedict.compile() return self def add_chained_phrases(self, *pattern_response_pairs): """ Add multiple chained pattern/response pairs. A chain defines a sequence of pattern/response pairs that are expected in the order that they occur in the passed arguments to this method. Whenever a Responder is inside a context and input matching the first pattern/response pair in a chain is seen, the Responder will continually expect the next pattern in the current chain until another chain or another context is entered. When the last pattern in the chain is reached, Responders will continue expecting this pattern until another chain or context is entered. :param pattern_response_pairs: one or more pattern/response pairs, \ where a pattern/response pair is a tuple of the form \ ``(regexs, value)``, where ``regexs`` is a regular expression or \ list of regular expressions and ``value`` is an arbitrary object """ chain = [] for pair in pattern_response_pairs: pattern, response = _check_pattern_response_pair(pair) responsedict = ReDict() responsedict[pattern] = response chain.append(responsedict) self.chains.append(chain) return self def add_entry_phrase(self, patterns, response): """ Add a pattern/response pair to be used as an entry point for this context. If input matching matching one of the patterns passed here is seen, Responders will return the corresponding response object and enter the context. :param patterns: regular expression or list of regular expressions. If \ the input passed to ``get_response`` matches one of these \ patterns, then the object passed here as ``response`` will be \ returned. :param object response: object to return from ``get_response`` if the \ passed input matches one of the regular expressions passed here as ``response``. """ pattern, response = _check_pattern_response_pair((patterns, response)) self.entry[pattern] = response return self def add_entry_phrases(self, *pattern_response_pairs): """ Add one or more pattern/response pairs to be used as entry points for this context. If input matching matching one of the patterns passed here is seen, Responders will return the corresponding response object and enter the context. :param pattern_response_pairs: one or more pattern/response pairs, \ where a pattern/response pair is a tuple of the form \ ``(regexs, value)``, where ``regexs`` is a regular expression or \ list of regular expressions and ``value`` is an arbitrary object """ for pair in pattern_response_pairs: self.add_entry_phrase(*pair) return self def add_response(self, patterns, response): """ Add a pattern/response pair that will be only be recognized when a Responder is in this context :param patterns: regular expression or list of regular \ expressions. If the input passed to ``get_response`` matches one \ of these patterns, then the object passed here as ``response`` \ will be returned. :param object response: object to return from ``get_response`` if the \ passed input matches one of the regular expressions passed here as ``response``. """ pattern, response = _check_pattern_response_pair((patterns, response)) self.responses[pattern] = response return self def add_responses(self, *pattern_response_pairs): """ Add one more more pattern/response pairs that will be only be recognized when a Responder is in this context :param pattern_response_pairs: one or more pattern/response pairs, \ where a pattern/response pair is a tuple of the form \ ``(regexs, value)``, where ``regexs`` is a regular expression or \ list of regular expressions and ``value`` is an arbitrary object """ for pair in pattern_response_pairs: self.add_response(*pair) return self def add_context(self, context): """ Add context that can only be entered when already in this context :param chatbot_utils.responder.Context context: context instance to add """ if not isinstance(context, Context): raise ValueError("add_context argument must be a Context instance") self.contexts.append(context) return self def add_contexts(self, *contexts): """ Add one or more context instances to this context :param chatbot_utils.responder.Context contexts: context instances to add """ for context in contexts: self.add_context(context) return self def _search_chains(self, text): for chain in self.chains: if (len(chain) > 0): resp, groups = _check_get_response(chain[0], text) if resp != NoResponse: return chain, resp, groups return None, NoResponse, None def _get_chained_response(self, text): if not self.chain: chain, response, groups = self._search_chains(text) if chain: self.chain = chain self.chain_index = 1 return response, groups return NoResponse, None responsedict = self.chain[self.chain_index] resp, groups = _check_get_response(responsedict, text) if resp != NoResponse: if self.chain_index < (len(self.chain) - 1): self.chain_index += 1 elif self.chain_index > 0: responsedict = self.chain[self.chain_index - 1] resp, groups = _check_get_response(responsedict, text) return resp, groups def get_response(self, text): """ Find a response object associated with a pattern in this context that matches 'text', and return it (if any). If no matching patterns can be found, 'text' itself will be returned. :param str text: input text to check for matching patterns against :return: tuple of the form ``(response, groups)``. ``response`` is the \ response object associated with the matching regular expression, \ if any, otherwise 'text'. ``groups`` is a tuple of subgroups from \ the regular expression match (as returned by \ re.MatchObject.groups), if any, otherwise None. """ resp, groups = self._get_chained_response(text) if resp != NoResponse: return resp, groups resp, groups = _check_get_response(self.responses, text) if resp == NoResponse: resp, groups = _check_get_response(self.entry, text) # If we got a response from anything other than a chain, make # sure we exit any current chains by setting self.chain = False if resp != NoResponse: self.chain = None return resp, groups
def __init__(self): self.responses = ReDict() self.default_response = NoResponse self.context = None self.contexts = []
class Responder(object): """ Represents a high-level responder object which can be used to register pattern/response pairs, and can accept input text to retrieve matching response objects """ def __init__(self): self.responses = ReDict() self.default_response = NoResponse self.context = None self.contexts = [] def compile(self): """ Compile all regular expressions contained in this responder (including contexts), so they are ready for matching immediately """ if self.responses: self.responses.compile() if self.contexts: for context in self.contexts: context.compile() return self def add_default_response(self, response): """ Set response to return when no other matching responses can be found :param response: object to return as default response """ self.default_response = response return self def add_response(self, patterns, response): """ Add a pattern/response pair that will always be recognized by a Responder, regardless of context :param list patterns: list of regular expressions. If the input passed \ to ``get_response`` matches one of these patterns, then the object \ passed here as ``response`` will be returned. :param object response: object to return from ``get_response`` if the \ passed input matches one of the regular expressions passed here as ``response``. """ pattern, response = _check_pattern_response_pair((patterns, response)) self.responses[pattern] = response return self def add_responses(self, *pattern_response_pairs): """ Add one or moe pattern/response pairs that will always be recognized by a Responder, regardless of context :param pattern_response_pairs: one or more pattern/response pairs, \ where a pattern/response pair is a tuple of the form \ ``(regexs, value)``, where ``regexs`` is a regular expression or \ list of regular expressions and ``value`` is an arbitrary object """ for pair in pattern_response_pairs: self.add_response(*pair) return self def add_context(self, context): """ Add context instance to this responder :param chatbot_utils.responder.Context context: context instance to add """ if not isinstance(context, Context): raise ValueError("add_context argument must be a Context instance") self.contexts.append(context) return self def add_contexts(self, *contexts): """ Add one or more context instances to this responder :param chatbot_utils.responder.Context contexts: context instances to add """ for context in contexts: self.add_context(context) return self def get_response(self, text): """ Find a response object associated with a pattern that matches 'text', and return it (if any). If no matching patterns can be found, 'text' itself will be returned. :param str text: input text to check for matching patterns against :return: tuple of the form ``(response, groups)``. ``response`` is the \ response object associated with the matching regular expression, \ if any, otherwise 'text'. ``groups`` is a tuple of subgroups from \ the regular expression match (as returned by \ re.MatchObject.groups), if any, otherwise None. """ response = NoResponse groups = None # If currently in a context, try to get a response from the context if self.context: response, groups = self.context.get_response(text) if response == NoResponse: # Try entering subcontexts contained in current context, if any context, response, groups = _attempt_context_entry( self.context.contexts, text) if context: self.context = context # If no contextual response is available, try to get a response from # the dict of contextless responses if response == NoResponse: response, groups = _check_get_response(self.responses, text) if response != NoResponse: # If we are currently in a context but only able to get a # matching response from the contextless dict, set the current # context to None if self.context: self.context = None else: # No contextless responses available, attempt context entry context, response, groups = _attempt_context_entry( self.contexts, text) if context: self.context = context else: response = self.default_response groups = None return response, groups