Example #1
0
def processDomainsList(domains, table):
    #prepare multisets of retrived (type, elements) from database

    table = map(
        lambda x: x[:3] +
        (multiset(map(lambda y: (str(y[0]), y[1]), x[3])), ) + x[4:], table)

    for domain in domains:

        #TODO: map from domain to webpage URL. Is it needed?
        elementsWebpage = processWebpage(domain)
        elementsWebpage = multiset(elementsWebpage)

        elementsWebpage = spamsum.spamsum(serializeElements(elementsWebpage))

        notfound = True

        for row in table:

            sdeface = spamsum.spamsum(serializeElements(row[3]))
            #sdeface = row[3]

            #similarity = similarityIndex(map(lambda x: x[1], elementsWebpage), map(lambda x: x[1], sdeface))
            similarity = spamsum.match(elementsWebpage, sdeface)

            if similarity >= 70:  #TODO: Comparison Strategy!!

                notfound = False
                print "Defacement found at %s -> Notifier: %s, Signature ID: %s, Detected on: %s (%s%%)" % \
                                            (domain.strip(), row[0], row[2], row[1], similarity)
                break

        if notfound:
            print "No defacement found (%s)" % (domain.strip(), )
def alldist(filex, filey):
    xread = open(filex, 'r').read()
    yread = open(filey, 'r').read()
    lvd = jellyfish.levenshtein_distance(xread,yread)
    dlvd= jellyfish.damerau_levenshtein_distance(xread,yread)
    spsum = spamsum.match(xread,yread)
    spsum = 100 - spsum
    spsum = float(spsum/100.00)
#    print lvd
    res = float( lvd / 100.00 )
    dres= float(dlvd / 100.00 )
#    print res
#    print "Levenshtein Distance=",res
    jaro = jellyfish.jaro_distance(xread,yread)
## Added jaro-winkler distance by fahim 20111011
    jarowink = jellyfish.jaro_winkler(xread,yread)
    jaro = 1.0 - jaro
    jarowink = 1.0 - jarowink
#   print "Jaro Distance = ",jaro
    ham = jellyfish.hamming_distance(xread,yread)
    ham = float ( ham / 100.00)
    print "Hamming Distance = ", ham
#	print "KL-divergence between d1 and d2:", kldiv(tokenize(d1), tokenize(d2))
#	print "KL-divergence between d2 and d1:", kldiv(tokenize(d2), tokenize(d1))
#    print "Spamsum Match score: ", spsum
    kl = kldiv(tokenize(xread), tokenize(yread))

    return res, dres , jaro, jarowink, ham, kl, spsum
Example #3
0
 def test_match(self):
     self.assertEqual(
         spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s1)),
         100)
     self.assertEqual(
         spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s2)),
         72)
     self.assertEqual(
         spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s1)),
         72)
     self.assertEqual(
         spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s3)),
         0)
     self.assertEqual(
         spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s3)),
         0)
Example #4
0
def alldist(filex, filey):
    xread = open(filex, 'r').read()
    yread = open(filey, 'r').read()
    xhash = spamsum.spamsum(xread)
    yhash = spamsum.spamsum(yread)
    spsum = spamsum.match(xhash,yhash)
    spsum = 100 - spsum
    spsum = float(spsum/100.00)

    return spsum
Example #5
0
def alldist(filex, filey):
    xread = open(filex, 'r').read()
    yread = open(filey, 'r').read()
## Take Reverse and append to original read ##
#    rxread = xread + xread[::-1]
#   ryread = yread + yread[::-1]
#    xhash = spamsum.spamsum(xread)
#    yhash = spamsum.spamsum(yread)
    spsum = spamsum.match(xread,yread)
    spsum = 100 - spsum
    spsum = float(spsum/100.00)

    return spsum
Example #6
0
def alldist(filex, filey):
    xread = open(filex, "r").read()
    yread = open(filey, "r").read()
    ## Take Reverse and append to original read ##
    rxread = xread + xread[::-1]
    ryread = yread + yread[::-1]
    xhash = spamsum.spamsum(rxread)
    yhash = spamsum.spamsum(ryread)
    #    fx = open("/home/fimz/datasets/500-dataset/rev/test/"+filex+".rev", 'w')
    #   fx.write(xhash)
    #    fy = open("/home/fimz/datasets/500-dataset/rev/test/"+filey+".rev", 'w')
    #    fy.write(yhash)
    spsum = spamsum.match(xhash, yhash)
    spsum = 100 - spsum
    spsum = float(spsum / 100.00)

    return spsum
Example #7
0
def similarityIndex(elementsWebpage, sdefaces):

    matchesTable = []
    mSum = 0

    #print "length"
    #print len(elementsWebpage), len(sdefaces)

    for i in sdefaces:
        matchesTable.append([])
        for j in elementsWebpage:
            a = spamsum.spamsum(i)
            b = spamsum.spamsum(j)
            matchesTable[-1].append(spamsum.match(a, b))

    if len(sdefaces) > len(elementsWebpage):

        #iters = itertools.combinations(range(0, len(sdefaces)), len(elementsWebpage))

        for i in range(0, 10):

            s = random.sample(range(0, len(sdefaces)), len(elementsWebpage))

            matchesTableP = map(lambda x: matchesTable[x], s)

            maxSim = 0
            calculus(matchesTableP, [], 0)

            if maxSim > mSum:
                mSum = maxSim
    else:
        maxSim = 0
        #print matchesTable
        calculus(matchesTable, [], 0)
        mSum = maxSim

    return mSum * 1.0 / len(sdefaces)