コード例 #1
0
def GSTPrechelt(s1, s2, minimalMatchingLength):
    """TODO: Beschreibung
    """
    s1List = s1.split()
    s2List = s2.split()
    matches = set([])

    hashList = computeHashList(s1List, minimalMatchingLength)
    
    i = 0
    while (i < len(s2List)-minimalMatchingLength): #for i in xrange(len(s2List)-minimatchingLength):
        h = createKRHashValue(" ".join(s2List[i:i+minimalMatchingLength]))
        #get positions for hashvalue from string s1
        positions = hashList.get(h)
        if positions:
            for pos in positions:
                #check if substrings are equal
                j = 0
                while (pos+j<len(s1List) and i+j<len(s2List) and s1List[pos+j] == s2List[i+j]):
                    j += 1
                #try to extend the match
                if j >= minimalMatchingLength:
                    #match
                    matches = matches | set([(pos, i, j)]) #pos1, pos2, length
                   # foundMatch = True
                else:
                    #no match
                    pass
                    #foundMatch = False
                #insert match
        i += 1
        
    #return matches as List
    return reduceMatches(list(matches))
コード例 #2
0
def computeHashList(s, minimalMatchingLength):
    hashList = GSTHashtable()
    
    for i in xrange(len(s)-minimalMatchingLength):
        hashList.add(createKRHashValue(" ".join(s[i:i+minimalMatchingLength])), i)
    
    return hashList