def normalize012(delta1, theta1): """Returns the normalized directive bi-sequence giving the same GPS word as (delta, theta)""" # Normalization of the letters order [delta, theta, substitution] = changeLettersOrder(delta1, theta1) # Normalization of the prefix [delta, theta] = initialNormalization(delta, theta) # The main algorithm: biseq = gpc.makeBiseq(delta, theta) applicableRule = [] applicableRule = isNormalized(biseq) # We do this look until there is no normalization rule to apply while applicableRule: biseq = applyRule(biseq, applicableRule) applicableRule = isNormalized(biseq) [delta, theta] = gpc.parseBiseq(biseq) gpc.verboseprint( 1, "at the end of isNormalized:" + str(gpc.parseBiseq(biseq))) delta, theta = changeLettersOrderBack(delta, theta, substitution) notchanged = (delta1 == delta) and (theta1 == theta) return [notchanged, delta, theta]
def isNormalized(biseq): """ Function looking is there is a bad prefix of a bad factor inside the preprocessed biseq. If so, it returns the bad prefix position and the correction to apply. If not, it returned an emply field (for now).""" gpc.verboseprint( 1, "at the beginning of isNormalized:" + str(gpc.parseBiseq(biseq))) matches = [] # Looking for bad prefixes for prefixRule in bad_prefixes: match = re.match(prefixRule[0], biseq) if match: gpc.verboseprint(1, "prefix rule: " + str(prefixRule)) index = match.end() - 2 return [index, prefixRule[1], 2] # bad prefix to repare # the third number is the length of the sequence we replace # so that we know where to continue in the original bi-sequence # If there is no bad prefix, we look for bad factors. We can do it in # this way because if we have a bad prefix, everything is normalized # up to its end and therefore there cannot be neither bad factors nor # other bad prefix before badfactor = findNextBadFactor(biseq) return badfactor # bad factor to repare
def make012Word(delta, theta, steps, seed=""): """Makes a GPS over {0,1,2} from sequences delta and theta.""" w = seed for step in range(0, steps): w = w + delta[step] if theta[step] == "R": w = gpc.makePalClosure(w) elif theta[step] in ["0", "1", "2"]: w = makeEipalClosure(w, theta[step]) else: print("wrong symbol") break gpc.verboseprint(1, "w{0} = {1}".format(step + 1, w)) return (w)
def makeEipalClosure(seq, i): """Makes E_i-th palindromic closure of a string.""" ei = Ei(i) if isEipal(seq, i) == True: return (seq) j = 1 while isEipal(seq[j:], i) != True: j = j + 1 gpc.verboseprint( 2, " {0} longest palindromic suffix : {1}".format( seq, seq[j:])) closure = seq pref = seq[j - 1::-1] for letter in pref: closure = closure + ei[int(letter)] return (closure)
def is012NormalizedNaive(delta, theta, steps): """Checks if delta and theta are normalized and if not, returns the beginning of the normalized sequence.""" w = "" l = 1 prefixes = [] for step in range(0, steps): w = w + delta[step] if theta[step] == "R": w = gpc.makePalClosure(w) elif theta[step] in ["0", "1", "2"]: w = makeEipalClosure(w, theta[step]) else: print("wrong symbol") break prefixes.append(w) gpc.verboseprint(1, "Prefixes from (delta, theta): " + str(prefixes)) gpc.verboseprint(1, "Obtained word: " + w) newdelta = delta[0] newtheta = "" while l <= len(w): prefix = w[:l] res = testPalindromicity(prefix) if res[0] == True: gpc.verboseprint(1, prefix) if l < len(w): newdelta = newdelta + w[l] newtheta = newtheta + res[1] l = l + 1 if newdelta == delta[:steps] and newtheta == theta[:steps]: return [True, newdelta, newtheta] else: return [False, newdelta, newtheta]
def findNextBadFactor(biseq): """ Searching for the next (most left) non-prefix rule to apply.""" matches = [] for rule in rules1: match = re.match('([012R][012R])*(' + rule + ')', biseq) if match: gpc.verboseprint( 1, "rule1: " + str(gpc.parseBiseq(rule)) + " in biseq " + str(gpc.parseBiseq(biseq))) index = match.end() - 2 # The position that would be corrected #Here follows the correction: matches.append([ index, rule[4] + "R" + rule[2] + #Ei(rule[3])[int(rule[2])] + rule[3], 2 ]) biseq = biseq[:index + 3] # There is no sense searching further for rule in rules2: match = re.match('([012R][012R])*(' + rule + ')', biseq) if match: gpc.verboseprint( 1, "rule2: " + str(gpc.parseBiseq(rule)) + " in biseq " + str(gpc.parseBiseq(biseq))) index = match.end() - 2 matches.append([index, rule[4] + rule[1] + rule[2] + "R", 2]) biseq = biseq[:index + 3] for rule in rules3: match = re.match('([012R][012R])*(' + rule + ')', biseq) if match: gpc.verboseprint( 1, "rule3: " + str(gpc.parseBiseq(rule)) + " in biseq " + str(gpc.parseBiseq(biseq))) index = match.end() - 2 matches.append([ index, rule[4] + Ei(rule[1])[int(rule[3])] + Ei(rule[1])[int(Ei(rule[3])[int(rule[2])])] + rule[1], 2 ]) biseq = biseq[:index + 3] for rule in rules4: match = re.match('([012R][012R])*(' + rule + ')', biseq) if match: gpc.verboseprint( 1, "rule4: " + str(gpc.parseBiseq(rule)) + " in biseq " + str(gpc.parseBiseq(biseq))) index = match.end() - 2 matches.append([ index, rule[6] + rule[1] + rule[2] + rule[3] + rule[4] + rule[5], 2 ]) # nema tu byt 4??? asi ne biseq = biseq[:index + 3] gpc.verboseprint(1, "all non-prefix matches: " + str(matches)) # Final "leading" prefix final = [] if matches: final = matches[0] for rule in matches[1:]: if rule[0] < final[0]: final = rule gpc.verboseprint(1, "Final change:" + str(final)) return final