def assert_out_contains(actual, expected, encode=True, sequential=False): """ Utility assertion function. Helps keep the test code cleaner. :param actual: :param expected: a string or iterable of strings representing 1+ substrings that the output string should contain. :param encode: :param strict: :param sequential: """ __tracebackhide__ = True if expected is None: expected = [] elif is_string(expected): expected = [expected] if encode: if actual is not None: actual = actual.encode() if sequential: pos = 0 for i, s in enumerate(expected): if encode: if s is not None: s = s.encode() try: pos2 = actual.index(s, pos) # move pos to the beginning of the current match pos = pos2 except ValueError: msg = "Expected #{index} not found in Actual[{start}:{end}]." + \ "\nExpected #{index}: ({value})." + \ "\nActual: ...{actual}..." # TODO: set max_actual dynamically based on the verbosity setting # max_actual = 80 max_actual = 800 assert False, msg.format(index=repr(i), value=repr(s), start=repr(pos), end=repr(len(actual)), actual=repr(actual[pos:pos + max_actual])) # Note: if we disallow nested substrings here, it would make sense to do so when sequential==False, too. # # move pos to the end of the current match (so that nested substrings are ignored) # pos += len(s) else: for s in expected: assert s in actual
def find_distances(item1, item2, items, regex=False, regex_flags=None, verbose=False): """ Uses find_all() and get_index_distance_stats() to calculate distance stats for 2 items (or 2 sets of items) within a given list of items. E.g. The distances of 2 words (or 2 word sets) within a given list of words. Adapted from: http://stackoverflow.com/a/33389155 :param item1: the value (or pattern) to match/find. If it is not a string, it will be treated as an iterable of values/patterns to match. :param item2: the value (or pattern) to match/find. If it is not a string, it will be treated as an iterable of values/patterns to match. :param items: an iterable of items to match against. :param regex: If True, item will be treated as a regex pattern. :param regex_flags: Optional flags for re.search(). :return: >>> words = get_words(lorem_ipsum()) >>> find_distances(['lorem'], ['ipsum'], words) {'max': 893, 'mean': 402.56, 'min': 83} >>> words = get_words(lorem_ipsum()) >>> find_distances(['lorem', 'dolor'], ['consectetur', 'adipiscing'], words) {'max': 889, 'mean': 467.0740740740741, 'min': 3} >>> words = get_words(lorem_ipsum()) >>> w1 = ['^Pellentesque$'] >>> w2 = ['^Vivamus'] >>> find_distances(w1, w2, words, regex=True, regex_flags=re.IGNORECASE) {'max': 910, 'mean': 287.1212121212121, 'min': 21} """ def find_distinct_indexes(find_items, all_items, regex, regex_flags): all_indexes = set() for item in find_items: indexes = find_all(item, all_items, regex=regex, regex_flags=regex_flags) indexes = (index for (index, value) in indexes) all_indexes.update(indexes) return all_indexes def get_matches_detail(indexes, items): # details = ((i, items[i]) for i in indexes) # details = [(i, items[i]) for i in indexes] # details = {i: items[i] for i in indexes} # details = dict((items[i], i) for i in indexes) details = {} for i in indexes: value = items[i] if value not in details: details[value] = set() # details[value] = [] details[value].add(i) # details[value].append(i) return details if is_string(item1): item1 = [item1] if is_string(item2): item2 = [item2] indexes1 = find_distinct_indexes(item1, items, regex, regex_flags) indexes2 = find_distinct_indexes(item2, items, regex, regex_flags) d = get_index_distance_stats(indexes1, indexes2) if verbose: d.update({'matches1': get_matches_detail(indexes1, items)}) d.update({'matches2': get_matches_detail(indexes2, items)}) return d