def processDomainsList(domains, table): #prepare multisets of retrived (type, elements) from database table = map( lambda x: x[:3] + (multiset(map(lambda y: (str(y[0]), y[1]), x[3])), ) + x[4:], table) for domain in domains: #TODO: map from domain to webpage URL. Is it needed? elementsWebpage = processWebpage(domain) elementsWebpage = multiset(elementsWebpage) elementsWebpage = spamsum.spamsum(serializeElements(elementsWebpage)) notfound = True for row in table: sdeface = spamsum.spamsum(serializeElements(row[3])) #sdeface = row[3] #similarity = similarityIndex(map(lambda x: x[1], elementsWebpage), map(lambda x: x[1], sdeface)) similarity = spamsum.match(elementsWebpage, sdeface) if similarity >= 70: #TODO: Comparison Strategy!! notfound = False print "Defacement found at %s -> Notifier: %s, Signature ID: %s, Detected on: %s (%s%%)" % \ (domain.strip(), row[0], row[2], row[1], similarity) break if notfound: print "No defacement found (%s)" % (domain.strip(), )
def test_spamsum(self): self.assertEqual( spamsum.spamsum(self.s1), '3:kEvyc/sFIKwYclQY4MKLFE4Igu0uLzIKygn:kE6Ai3KQ/MKOgDKZn') self.assertEqual( spamsum.spamsum(self.s2), '3:kEvyc/sFIKwpErXLsCTApY4MKLFE4Igu0uLzWKIAYjtn:kE6Ai3jjTU/MKOgdK9Yjt' ) self.assertEqual(spamsum.spamsum(self.s3), '3:uZ3B:uZx')
def alldist(filex, filey): xread = open(filex, 'r').read() yread = open(filey, 'r').read() xhash = spamsum.spamsum(xread) yhash = spamsum.spamsum(yread) spsum = spamsum.match(xhash,yhash) spsum = 100 - spsum spsum = float(spsum/100.00) return spsum
def logTcp(tcp): tfilename = nids.param('filename') print tfilename pattern = re.compile(r'\/\w*.pcap') test = re.findall(pattern, tfilename) mystr = str(test).strip('[]\'/') temp = mystr.split(".") filename = temp[0] srcip = tcp.addr[0][0] srcport = tcp.addr[0][1] dstip = tcp.addr[1][0] dstport = tcp.addr[1][1] # client to server fname = "%s/%s-%s-%s-%s-%s-%s-CtoS.tcp" % (logdir, filename, int(time.time()), tcp.addr[0][0], tcp.addr[0][1], tcp.addr[1][0], tcp.addr[1][1]) try: f = open(fname, "w") except: print "unable to log to", logdir return toserver = tcp.server.data[:tcp.server.count] f.write(toserver) f.close() print "Client to Server: "+fname fname = "%s/%s-%s-%s-%s-%s-%s-CtoS-tcp.fuzz" % (logdir, filename, int(time.time()), srcip, srcport, dstip, dstport) try: g = open(fname, "w") except: print "unable to log to", logdir return g.write(spamsum.spamsum(toserver)) g.close() print "Client to Server Hashed :"+fname srcip = tcp.addr[0][0] srcport = tcp.addr[0][1] dstip = tcp.addr[1][0] dstport = tcp.addr[1][1] # server to client fname = "%s/%s-%s-%s-%s-%s-%s-StoC.tcp" % (logdir, filename, int(time.time()), tcp.addr[1][0], tcp.addr[1][1], tcp.addr[0][0], tcp.addr[0][1]) f = open(fname, "w") toclient = tcp.client.data[:tcp.client.count] f.write(toclient) f.close() print "Server to Client: "+fname fname = "%s/%s-%s-%s-%s-%s-%s-StoC-tcp.fuzz" % (logdir, filename, int(time.time()), dstip, dstport, srcip, srcport) try: g = open(fname, "w") except: print "unable to log to", logdir return g.write(spamsum.spamsum(toclient)) g.close() print "Sever to Client Hashed: "+fname
def logPkt(addr, payload, proto=17): tfilename = nids.param('filename') print tfilename pattern = re.compile(r'\/\w*.pcap') test = re.findall(pattern, tfilename) mystr = str(test).strip('[]\'/') temp = mystr.split(".") filename = temp[0] # log a single packet, for UDP and other IP (non-TCP) ip_p = {'proto1':'icmp', 'proto2':'igmp', 6:'tcp', 17:'udp', 41:'ipv6', 47:'gre', 50:'esp', 51:'ah', 58:'icmp6', 94:'ipip', 115:'l2tp', 255:'raw'} if proto == 17: fname = "%s/%s-%s-%s-%s-%s-%s.udp" % (logdir, filename, int(time.time()), addr[0][0], addr[0][1], addr[1][0], addr[1][1]) else: proto = ip_p.get(proto, proto) print proto # pktproto = ip_p[proto] fname = "%s/%s-%s-%s-%s.%s" % (logdir, filename, int(time.time()), long2ip(addr[0]), long2ip(addr[1]), proto) f = open(fname, "w") f.write(payload) f.close() print fname fname = fname + ".fuzz" try: g = open(fname, "w") except: print "unable to log to", logdir return g.write(spamsum.spamsum(payload)) g.close() print fname
def test_match(self): self.assertEqual( spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s1)), 100) self.assertEqual( spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s2)), 72) self.assertEqual( spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s1)), 72) self.assertEqual( spamsum.match(spamsum.spamsum(self.s1), spamsum.spamsum(self.s3)), 0) self.assertEqual( spamsum.match(spamsum.spamsum(self.s2), spamsum.spamsum(self.s3)), 0)
def alldist(filex, filey): xread = open(filex, "r").read() yread = open(filey, "r").read() ## Take Reverse and append to original read ## rxread = xread + xread[::-1] ryread = yread + yread[::-1] xhash = spamsum.spamsum(rxread) yhash = spamsum.spamsum(ryread) # fx = open("/home/fimz/datasets/500-dataset/rev/test/"+filex+".rev", 'w') # fx.write(xhash) # fy = open("/home/fimz/datasets/500-dataset/rev/test/"+filey+".rev", 'w') # fy.write(yhash) spsum = spamsum.match(xhash, yhash) spsum = 100 - spsum spsum = float(spsum / 100.00) return spsum
def calculateFuzzy(elements): print "**********************************************************************************************\n" print elements pics = {} for key, value in elements.iteritems(): if key in ('images', 'backgroundImages'): for url in value: try: if not url in pics: pic = urllib2.urlopen(url).read() pics[url] = pic except (urllib2.HTTPError, urllib2.URLError) as e: print "Not able to download image: %s\n" % (url, ) pics[url] = None except ValueError as e: if 'unknown url type' in str(e): print "Incorrectly formatted URL.\n" pics[url] = None else: raise e elements[key] = filter(lambda (x, y, z): not y == None, \ map( lambda x: (None, None, x) if pics[x] == None else (pics[x], spamsum.spamsum(pics[x]), x) , value)) elif key in ('alerts', 'texts'): elements[key] = map( lambda x: (x, spamsum.spamsum(x.encode('utf-8')), None), value) else: #music elements[key] = map( lambda x: (x.split(u'?')[0], spamsum.spamsum(x.split(u'?')[0]), x), value) return elements
def similarityIndex(elementsWebpage, sdefaces): matchesTable = [] mSum = 0 #print "length" #print len(elementsWebpage), len(sdefaces) for i in sdefaces: matchesTable.append([]) for j in elementsWebpage: a = spamsum.spamsum(i) b = spamsum.spamsum(j) matchesTable[-1].append(spamsum.match(a, b)) if len(sdefaces) > len(elementsWebpage): #iters = itertools.combinations(range(0, len(sdefaces)), len(elementsWebpage)) for i in range(0, 10): s = random.sample(range(0, len(sdefaces)), len(elementsWebpage)) matchesTableP = map(lambda x: matchesTable[x], s) maxSim = 0 calculus(matchesTableP, [], 0) if maxSim > mSum: mSum = maxSim else: maxSim = 0 #print matchesTable calculus(matchesTable, [], 0) mSum = maxSim return mSum * 1.0 / len(sdefaces)
sport= pkt[Ether][IP][TCP].sport dstport= pkt[Ether][IP][TCP].dport flags= pkt.sprintf("%TCP.flags%") except IndexError: sport="none" dstport="none" flags="none" ##Raw Payload try: rawpay = pkt[Raw].load except IndexError: hashres="none" if len(rawpay) > 0: hashres = spamsum.spamsum(rawpay) ##Header Hash header = srcip+","+str(sport)+","+dstip+","+str(dstport)+","+str(prot)+","+str(id)+","+flags hhash = spamsum.spamsum( hhash ) # File name if TCP or UDP if prot==6 or prot==17 : output=n+"-"+srcip+"-"+str(prot)+"."+str(dstport) else: output=n+"-"+srcip+"."+str(prot) # Print Output print output,"\t", srcip, sport," > ", dstip, dstport, prot, id, flags, hashres file = open(output, "w") cw = csv.writer(file)
listing = os.listdir(datasetdir) selection = [] count = 1 print "List length= ", len(listing) for infile in listing: print "Current file is: " + datasetdir + infile # print "File Submitted Successfully" #temphash = infile.split(".") myfile = datasetdir + infile # fhash = temphash[0] f = open(myfile,'r').read() # res = re.findall(r'"scan_id": "(\w.*)",',f) # if res: mhash = hashlib.md5(f).hexdigest() fhash = spamsum.spamsum(f) #f.close() print "Iteration: ", count print "md5 Hash is: " + mhash print "Fuzzy Hash is: " + fhash reader = csv.reader(open("/home/fimz/Dev/tillmann/dataset/labels.txt", "r"), delimiter='\t') for rows in reader: if mhash in rows: print rows print "Label = ", rows[1] label = rows[1] logfile = label + "-" + mhash + ".fuzz" fp = open("./profiles/" + logfile, 'w') fp.write( fhash ) print "Written to file successfully" count = count + 1
sport= pkt[Ether][IP][TCP].sport dstport= pkt[Ether][IP][TCP].dport flags= pkt.sprintf("%TCP.flags%") except IndexError: sport="0" dstport="0" flags="0" ##Raw Payload try: rawpay = pkt[Raw].load except IndexError: hashres="none" if len(rawpay) > 0: hashres = spamsum.spamsum(rawpay) # File name if TCP or UDP, currently on they are supported if prot==6 or prot==17 : output=str(n)+"-"+srcip+"-"+str(prot)+"."+str(dstport) else: output=str(n)+"-"+srcip+"."+str(prot) print "test" # Print Output print output,"\t", srcip, sport," > ", dstip, dstport, prot, id, flags, hashres file = open(output, "w") cw = csv.writer(file) cw.writerow([srcip,sport,dstip,dstport,prot,id,flags,hashres]) n=str(int(n)+1) file.close()
selection = [] count = 1 print "List length= ", len(listing) for infile in listing: print "Current file is: " + datasetdir + infile # print "File Submitted Successfully" #temphash = infile.split(".") myfile = datasetdir + infile # fhash = temphash[0] f = open(myfile,'r').read() # res = re.findall(r'"scan_id": "(\w.*)",',f) # if res: orig = f final = f + f[::-1] mhash = hashlib.md5(orig).hexdigest() fhash = spamsum.spamsum(final) #f.close() print "Iteration: ", count print "md5 Hash is: " + mhash print "Fuzzy Hash is: " + fhash # f = open("/home/Dev/tillmann/dataset/labels.txt",'r') #res = subprocess.Popen('grep "ff731f3c6a580ef165bdcf7aa08f1fff" labels.txt | gawk \'{print $2}\'', shell=True, stdout=subprocess.PIPE) reader = csv.reader(open("/home/fimz/Dev/datasets/tillmann/dataset/labels.txt", "r"), delimiter='\t') for rows in reader: if mhash in rows: print rows print "Label = ", rows[1] label = rows[1] logfile = label + "-" + mhash + ".rev" fp = open("./profiles-rev/" + logfile, 'w') fp.write( fhash )