예제 #1
0
파일: match.py 프로젝트: iamjabour/smurf
def findmatch(node,start, maxDist, minMatch):
#    for i in xrange(0, len(node.childNodes)):
        refid = start
        ref = node.childNodes[refid]
        matchId = False
        matchCount = 0

        j = refid
        while True:
            j += matchId - refid if matchId else 1

            if j >= len(node.childNodes):
                break

            c = node.childNodes[j]

            if __debug:
                print refid, ref.tags
                print j, c.tags #, c.text.replace('\n', '|').replace(' ', '')

            d = float(simpleTreeMatching(ref,c))/max(len(ref.tags), len(c.tags),1)

            if 1-d <= maxDist:
                matchCount += 1
                matchId = j if not matchId else matchId

        if matchCount >= minMatch:
            return matchId, matchCount

        return matchId, matchCount
예제 #2
0
파일: match.py 프로젝트: iamjabour/smurf
def match2(node, maxDist=0, height=3, tags=False, printtag=False):
    if __debug:
        print "Debug mode: eri.utils.match.match2()"

    s1 = None
    s2 = None
    result = [False] * len(node.childNodes)
    match = False
    _comp = 0

    for x in xrange(0,len(node.childNodes)):
        c = node.childNodes[x]

        #primary test to not match low height
        if x == 0 or c.height < height:
            s1 = c
            match = False
            _comp += 1
            continue

        s2 = c

        if printtag or __debug:
            print 'str:', s1.tag, s2.tag

        d = float(simpleTreeMatching(s1,s2))/max(len(s1.tags), len(s2.tags),1)

        if __debug:
            print 'distance:', 1-d

        #match test
        if 1-d <= maxDist :
            s1 = c
            result[x] = _comp
            result[x-1] = _comp if not match else result[x-1]
            match = True
        else:
            s1 = c
            match = False
            _comp += 1
    #for
    if __debug:
        print "return: eri.utils.match.match2()"

    return result
예제 #3
0
파일: match.py 프로젝트: iamjabour/smurf
def check(node, maxDist, r, first, last):
    if __debug:
        print "Check", maxDist, r, first, last

    maxmatch = False
    i = first+r
    while i+r <= last:
#        print i, i+r, last
        for k in xrange(0, r):
#            print k , first+k, i+k
            a = node.childNodes[first+k]
            b = node.childNodes[i+k]
            d = float(simpleTreeMatching(a,b))/max(len(a.tags), len(b.tags),1)
#            print a.tags,'\n', b.tags

            if 1-d <= maxDist:
                maxmatch = i+k
#                print maxmatch
            else:
#                print 'not match'
                return maxmatch

        i += r
    return maxmatch