def remove_repeated_long_strings(l, minlen=1000): """ Remove duplicated long strings efficiently using the Ukkonen algorithm. The function recursively removes repeated strings as long as they are longer than `minlen`. Note: If the longest string overlaps with its repeated counterpart, it is not removed and the algorithm returns. """ import ukkonen l = ' ' + l + ' ' s = ukkonen.getLongestRepeatedSubstring(l + '$') while len(s) > minlen: while s[0] != ' ': s = s[1:] while s[-1] != ' ': s = s[:-1] if l.count(s) == 1: return l.strip() l = l.replace(s, ' ') l = l + s l = l.replace(' ', ' ') s = ukkonen.getLongestRepeatedSubstring(l + '$') return l.strip()
def check(s): assert get_longest_repeated_substring_brute(s) == \ ukkonen.getLongestRepeatedSubstring(s+'$')