Example #1
0
    def test_compare(self):
        res = ssdeep.compare(
            "3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C",
            "3:AXGBicFlIHBGcL6wCrFQEv:AXGH6xLsr2C"
        )
        assert res == 22

        res = ssdeep.compare(
            b"3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C",
            b"3:AXGBicFlIHBGcL6wCrFQEv:AXGH6xLsr2C"
        )
        assert res == 22
Example #2
0
 def _calc_score(self, lt_new):
     try:
         import ssdeep
     except ImportError:
         raise ImportError(
                 "ltgroup algorithm <ssdeep> needs python package ssdeep")
     ret = []
     h1 = ssdeep.hash(str(lt_new))
     if self._mem_hash:
         if len(self._d_hash) == 0:
             # initialize d_hash
             for lt in self._lttable:
                 h = ssdeep.hash(str(lt))
                 self._d_hash[lt.ltid] = h
         for ltid, lt_temp in enumerate(self._lttable):
             h2 = self._d_hash[lt_temp.ltid]
             score = ssdeep.compare(h1, h2)
             ret.append((ltid, score))
         self._d_hash[lt_new.ltid] = h1
     else:
         for lt_temp in self._lttable:
             ltid = lt_temp.ltid
             score = hash_score(str(lt_new), str(lt_temp))
             ret.append((ltid, score))
     return ret
Example #3
0
def fuzz_search_fast(id,p,fuzz):
    #print("searching fuzz")   
    block=int(fuzz.split(':')[0])
    lap=500
    client=MongoClient(env["metadata"]["host"],env["metadata"]["port"])
    db=client[env["db_metadata_name"]]
    coll_meta=db[env["db_metadata_collection"]]

    f1=coll_meta.find({},{"file_id":1,p:1})
    l=[]
    for f in f1:
        l.append(f)
    #print("comparando")
    dic={}
    for a in l:
        res=-1
        try:
            f_comp=a[p]
            block_comp=int(f_comp.split(':')[0])
            if(block_comp <=block+lap and block_comp>=block-lap):
                res=ssdeep.compare(f_comp,fuzz)
                if(res>0):
                    dic[a["file_id"]]=res
        except Exception, e:
            print str(e)
            #print(str(res)+"------"+str(a[p])+"-----"+str(a["file_id"]))
            continue
Example #4
0
 def testCompareHash(self):
     self.assertEqual(
         ssdeep.compare(
             "3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C",
             "3:AXGBicFlIHBGcL6wCrFQEv:AXGH6xLsr2C"
         ),
         22
     )
Example #5
0
 def _check_duplicated(self, hash_md5, hash_ssdeep, doc_size):
     if self._database.get_doc(hash_md5):
         raise error.DocumentDuplicatedExact()
     eps = max(0.5 * doc_size, 102400)
     lower_size = max(0, doc_size - eps)
     upper_size = doc_size + eps
     docs = self._database.get_similar_docs(lower_size, upper_size)
     for doc in docs:
         score = ssdeep.compare(hash_ssdeep, doc.hash_ssdeep)
         if score >= self.SSDEEP_THRESHOLD:
             raise error.DocumentDuplicatedSimilar()
Example #6
0
def vectorize_with_sparse_features(sparse_feature_set, feature_count, c2_data):
    vector = lil_matrix((1, feature_count), dtype=np.float)
    for index, (offset, code, ssdeep_hash) in sparse_feature_set:
        if offset not in c2_data:
            continue
        if c2_data[offset]["code"] == code:
            d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash)
            d = float(d) / float(100.0)
            vector[0, index] = d

    return vector
Example #7
0
def vectorize(feature_set, c2_data):
    vector = np.zeros((len(feature_set),), dtype=np.float)
    for index, (offset, code, ssdeep_hash) in enumerate(feature_set):
        if offset not in c2_data:
            continue
        if c2_data[offset]["code"] == code:
            d = ssdeep.compare(c2_data[offset]["content_ssdeep"], ssdeep_hash)
            d = float(d) / float(100.0)
            vector[index] = d

    return vector
Example #8
0
def process_sigs(cursor, sig_list, bin_list):
    """ Process the function signatures

    Go through every function and compare it to functions in every binary
    Get the highest score per function and add it to a score_list
    cursor - the database cursor
    sig_list - the list of function signatures for analysis
    bin_list - the list of binaries in the current database to compare to"""

    score_list = []

    maxval = 0
    for row in bin_list:
        cursor.execute("SELECT count(hash) FROM functions WHERE binaryid=?", (row[0],))
        maxval += int(cursor.fetchone()[0])
    maxval = maxval*len(sig_list)
    if progressbar:
        widgets = [" ", progressbar.Bar(marker="#"), " ", progressbar.Percentage(), " ", progressbar.ETA()]
        pbar = progressbar.ProgressBar(widgets=widgets,
                           maxval=maxval).start()
    else:
        pbar = None
    i = 0
    for row in bin_list:
        function_score_list = []

        for sig in sig_list:
            highest_score = 0
            cursor.execute("SELECT hash FROM functions WHERE binaryid=?",
                           (row[0], ))
            # h means hash, hash is a keyword in Python
            # so we can't use it

            for h in cursor.fetchall():
                strength = ssdeep.compare(sig, h[0])

                if strength > highest_score:
                    highest_score = strength

                i += 1
                if pbar:
                    pbar.update(i)
                elif i % 10000 == 0 or i == maxval:
                    print("%d / %d Done" % (i, maxval))

            function_score_list.append(highest_score)

        score_list.append(function_score_list)
    if pbar:
        pbar.finish()
    return score_list
Example #9
0
def main(mailFields, key, msgMailRequest):
    """Decides if a spam is new or old.
    Takes following parameters:
    a. mailFields - parsed spam fields,
    b. key - spam file name,
    c. msgMailRequest - original spam that is to be relayed.
    
    Passes spam to shivaaddnewrecord module if spam is new or list is empty.
    Else, passes spam to shivaprocessold module.
    """
    logging.info("[+]Inside shivadecide module.")
    records = server.QueueReceiver.records

    # Checking if we have any item in our global list.
    # If no item: then we will directly push spam details into the list
    # Else: Do the processing.

    if not records:
        shivaaddnewrecord.main(mailFields, key, msgMailRequest)

    else:
        if mailFields['text']:
            threshold = 75
        else:
            threshold = 85

        oriLen   = int(mailFields['len'])
        minLen, maxLen = int(oriLen * 0.90), int(oriLen * 1.10)

        count = 0
        for record in records:

            if record['len'] >= minLen and record['len'] <= maxLen:

                if mailFields['s_id'] is record['s_id']:
                    shivaprocessold.main(mailFields, record['s_id'], key, msgMailRequest)

                else:
                    ratio = ssdeep.compare(mailFields['ssdeep'], record['ssdeep'])

                    if ratio >= threshold:
                        shivaprocessold.main(mailFields, record['s_id'], key, msgMailRequest)
                        break

            count += 1

        if count == len(records):
            shivaaddnewrecord.main(mailFields, key, msgMailRequest)
Example #10
0
    def execute(self, input_data):
        ''' Execute method '''
        my_ssdeep = input_data['meta_deep']['ssdeep']
        my_md5 = input_data['meta_deep']['md5']

        # For every PE sample in the database compute my ssdeep fuzzy match
        results = self.c.batch_work_request('meta_deep', {'type_tag':'pe','subkeys':['md5','ssdeep']})
        sim_list = []
        for result in results:
            if result['md5'] != my_md5:
                sim_list.append({'md5':result['md5'], 'sim':ssd.compare(my_ssdeep, result['ssdeep'])})

        # Sort and return the sim_list (with some logic for threshold)
        sim_list.sort(key=itemgetter('sim'), reverse=True)
        output_list = [sim for sim in sim_list if sim['sim'] > 0]
        return {'md5': my_md5, 'sim_list':output_list}
Example #11
0
def creategraph(fuzzyhashes, threshold=50):
    G = nx.Graph()
    checkedhashes = set()

    # iterate over keys in fuzzyhashes
    for k in fuzzyhashes.iterkeys():
        # calculate similarity to all *remaining* hashes
        for l in fuzzyhashes.iterkeys():
            if (k != l) and l not in checkedhashes:
                sim = ssdeep.compare(fuzzyhashes[k], fuzzyhashes[l])
                # if similarity is >= threshold, add it to the graph
                if sim >= threshold:
                    G.add_edge(k, l, weight=sim)
        checkedhashes.add(k)

    return G
Example #12
0
def comparetrees(dir1, dir2, diffs):
    """
    Compare all subdirectories and files in two directory trees
    Same files have a matching score of 100
    Symlinks have a matching score of 100
    Different files have a matching score calculated using ssdeep (0 to 100)
    """
    names1 = os.listdir(dir1)
    names2 = os.listdir(dir2)    
    comparedirs(dir1, dir2, diffs, names1, names2)
    common = intersect(names1, names2)
    missed = common[:]

    # compare contents of files in common
    for name in common:
        path1 = os.path.join(dir1, name)
        path2 = os.path.join(dir2, name)
        if os.path.isfile(path1) and os.path.isfile(path2):
            missed.remove(name)
            file1 = open(path1, 'rb')
            file2 = open(path2, 'rb')
            while True:
                bytes1 = file1.read(blocksize)
                bytes2 = file2.read(blocksize)
                if (not bytes1) and (not bytes2):   # same file
                    print '  100 matches','/'.join(path1.split('/')[1:])
                    diffs.append(100)
                    break
                if bytes1 != bytes2:    # different content
                    score = ssdeep.compare(ssdeep.hash_from_file(path1),ssdeep.hash_from_file(path2))
                    print str(score).rjust(5),'differs','/'.join(path1.split('/')[1:])
                    diffs.append(score)
                    break

    # recur to compare directories in common
    for name in common:
        path1 = os.path.join(dir1, name)
        path2 = os.path.join(dir2, name)
        if os.path.isdir(path1) and os.path.isdir(path2):
            missed.remove(name)
            comparetrees(path1, path2, diffs)

    # same name but not both files or dirs (symlinks)
    for name in missed:
        diffs.append(100)
        print('    - ignored '+name+' (symlink)')
def main():
    all_hashes = {'malicious': [], 'benign': []}
    app_malicious_map = {} # mapping from android app names to 1 or 0 for malware or goodware
    similarity_buckets = ['similarity_limit_0', 'similarity_limit_0.2', 'similarity_limit_0.4', 'similarity_limit_0.6', 'similarity_limit_0.8', 'similarity_limit_1.0']
    root_dir = os.getcwd()
    for i, directory in enumerate(['benign_apk', 'malicious_apk']):
        os.chdir(directory)
        with open(directory.split('_')[0] + '_apk_ssdeep.csv') as hashes:
            for j, line in enumerate(hashes):
                if j == 0: continue
                b64hash = line.split(',')[0]
                app_name = line.split(',')[-1].split('/')[-1][:-2]
                app_malicious_map[app_name] = [1,0] if i else [0,1]
                all_hashes['malicious' if i else 'benign'].append((app_name, b64hash))
        os.chdir(root_dir)
    all_apps = {} # mapping from each app to its similarity score and classification
    num_zero = {}
    num_each = {}
    for category in all_hashes:
        num_zero[category] = 0
        num_each[category] = 0
        for app_and_hash in all_hashes[category]:
            similarity_scores = []
            this_score = app_and_hash[1]
            for i in range(1000):
                other_score = random.choice(all_hashes[category])[1]
                similarity_scores.append(ssdeep.compare(this_score, other_score))
            score = numpy.mean(similarity_scores)
            num_each[category] += 1
            if score == 0: num_zero[category] += 1
            bit_vector = []
            last_limit = -0.01
            for limit in similarity_buckets:
                float_limit = float(limit.split('_')[-1])
                if score <= float_limit and score > last_limit:
                    bit_vector.append(1)
                else:
                    bit_vector.append(0)
                last_limit = float_limit
            if not any(bit_vector): # score > 1
                bit_vector[-1] = 1
            all_apps[app_and_hash[0]] = {'vector': bit_vector, 'malicious': app_malicious_map[app_and_hash[0]]}
    with open('app_hash_vectors.json', 'w') as outfile:
        json.dump({'features': similarity_buckets, 'apps': all_apps}, outfile)
    print('{} of {} malicious apps and {} of {} benign apps had zero similarity found'.format(num_zero['malicious'], num_each['malicious'], num_zero['benign'], num_zero['benign']))
    print('Wrote data on ' + str(len(all_apps)) + ' apps to a file.')
Example #14
0
def compare_ssdeep(payload1, payload2):
    """
    Compare binary payloads with ssdeep to determine

    :param bytes payload1: Binary content to compare
    :param bytes payload2: Binary content to compare

    :returns: Match score from 0 (no match) to 100
    :type: int or None

    """

    payload1_hash = get_ssdeep(payload1)
    payload2_hash = get_ssdeep(payload2)

    try:
        match = ssdeep.compare(payload1_hash, payload2_hash)
    except:
        match = None

    return match
def fuzz_search_fast(id, p, fuzz):
    block = int(fuzz.split(':')[0])
    lap = 500
    coll_meta = db[envget("db_metadata_collection")]

    f1 = coll_meta.find({}, {"file_id": 1, p: 1})
    l = []
    for f in f1:
        l.append(f)
    dic = {}
    for a in l:
        res = -1
        try:
            f_comp = a[p]
            block_comp = int(f_comp.split(':')[0])
            if(block_comp <= block + lap and block_comp >= block - lap):
                res = ssdeep.compare(f_comp, fuzz)
                if(res > 0):
                    dic[a["file_id"]] = res
        except Exception, e:
            logging.exception(
                "fuzz_search_fast(id=" + str(id) + ",p=" + str(p) + ",fuzz=" + str(fuzz))
            continue
Example #16
0
def searchFuzzy(fuzz, limit, thresh):
    client = MongoClient(envget('metadata.host'), envget('metadata.port'))
    db = client[envget('db_metadata_name')]
    coll_meta = db["db_metadata_collection"]

    f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}).limit(limit)
    l = []
    for f in f1:
        l.append(f)

    ret = {}
    for a in l:
        res = -1
        try:
            res = ssdeep.compare(a["fuzzy_hash"], fuzz)
        except InternalError:
            print(str(res) + "------" +
                  str(a["fuzzy_hash"]) + "-----" + str(a["file_id"]))
            continue
        if(res >= thresh):
            ret[a["file_id"]] = res

    return ret
Example #17
0
    def test_compare(self):
        with pytest.raises(TypeError):
            ssdeep.compare(
                "3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C",
                None
            )

        with pytest.raises(TypeError):
            ssdeep.compare(
                None,
                "3:AXGBicFlIHBGcL6wCrFQEv:AXGH6xLsr2C"
            )

        with pytest.raises(ssdeep.InternalError):
            ssdeep.compare(
                "3:AXGBicFlgVNhBGcL6wCrFQEv:AXGHsNhxLsr2C",
                ""
            )
Example #18
0
    def run(self):
        if self.option_extdns:
            if self.nameservers:
                resolv = Resolver(configure=False)
                resolv.nameservers = self.nameservers
            else:
                resolv = Resolver()
                resolv.search = []

            resolv.lifetime = REQUEST_TIMEOUT_DNS * REQUEST_RETRIES_DNS
            resolv.timeout = REQUEST_TIMEOUT_DNS
            EDNS_PAYLOAD = 1232
            resolv.use_edns(edns=True, ednsflags=0, payload=EDNS_PAYLOAD)

            if hasattr(resolv, 'resolve'):
                resolve = resolv.resolve
            else:
                resolve = resolv.query

        if self.option_geoip:
            geo = geoip()

        while not self.kill_received:
            try:
                domain = self.jobs.get(block=False)
            except queue.Empty:
                self.kill_received = True
                return

            if self.option_extdns:
                nxdomain = False
                dns_ns = False
                dns_a = False
                dns_aaaa = False
                dns_mx = False

                try:
                    domain['dns-ns'] = self.__answer_to_list(
                        resolve(domain['domain-name'],
                                rdtype=dns.rdatatype.NS))
                    dns_ns = True
                except NXDOMAIN:
                    nxdomain = True
                except NoNameservers:
                    domain['dns-ns'] = ['!ServFail']
                except DNSException as e:
                    self.__debug(e)

                if nxdomain is False:
                    try:
                        domain['dns-a'] = self.__answer_to_list(
                            resolve(domain['domain-name'],
                                    rdtype=dns.rdatatype.A))
                        dns_a = True
                    except NoNameservers:
                        domain['dns-a'] = ['!ServFail']
                    except DNSException as e:
                        self.__debug(e)

                    try:
                        domain['dns-aaaa'] = self.__answer_to_list(
                            resolve(domain['domain-name'],
                                    rdtype=dns.rdatatype.AAAA))
                        dns_aaaa = True
                    except NoNameservers:
                        domain['dns-aaaa'] = ['!ServFail']
                    except DNSException as e:
                        self.__debug(e)

                if nxdomain is False and dns_ns is True:
                    try:
                        domain['dns-mx'] = self.__answer_to_list(
                            resolve(domain['domain-name'],
                                    rdtype=dns.rdatatype.MX))
                        dns_mx = True
                    except NoNameservers:
                        domain['dns-mx'] = ['!ServFail']
                    except DNSException as e:
                        self.__debug(e)
            else:
                try:
                    ip = socket.getaddrinfo(domain['domain-name'], 80)
                except socket.gaierror as e:
                    if e.errno == -3:
                        domain['dns-a'] = ['!ServFail']
                except Exception as e:
                    self.__debug(e)
                else:
                    domain['dns-a'] = list()
                    domain['dns-aaaa'] = list()
                    for j in ip:
                        if '.' in j[4][0]:
                            domain['dns-a'].append(j[4][0])
                        if ':' in j[4][0]:
                            domain['dns-aaaa'].append(j[4][0])
                    domain['dns-a'] = sorted(domain['dns-a'])
                    domain['dns-aaaa'] = sorted(domain['dns-aaaa'])
                    dns_a = True
                    dns_aaaa = True

            if self.option_mxcheck:
                if dns_mx is True:
                    if domain['domain-name'] != self.domain_init:
                        if self.__mxcheck(domain['dns-mx'][0],
                                          self.domain_init,
                                          domain['domain-name']):
                            domain['mx-spy'] = True

            if self.option_geoip:
                if dns_a is True:
                    try:
                        country = geo.country_by_addr(domain['dns-a'][0])
                    except Exception as e:
                        self.__debug(e)
                        pass
                    else:
                        if country:
                            domain['geoip-country'] = country.split(',')[0]

            if self.option_banners:
                if dns_a is True:
                    banner = self.__banner_http(domain['dns-a'][0],
                                                domain['domain-name'])
                    if banner:
                        domain['banner-http'] = banner
                if dns_mx is True:
                    banner = self.__banner_smtp(domain['dns-mx'][0])
                    if banner:
                        domain['banner-smtp'] = banner

            if self.option_ssdeep:
                if dns_a is True or dns_aaaa is True:
                    try:
                        req = requests.get(
                            self.uri_scheme + '://' + domain['domain-name'] +
                            self.uri_path + self.uri_query,
                            timeout=REQUEST_TIMEOUT_HTTP,
                            headers={'User-Agent': self.useragent},
                            verify=False)
                    except Exception as e:
                        self.__debug(e)
                        pass
                    else:
                        if req.status_code // 100 == 2 and req.url.split(
                                '?')[0] != self.ssdeep_effective_url:
                            ssdeep_curr = ssdeep.hash(''.join(
                                req.text.split()).lower())
                            domain['ssdeep-score'] = ssdeep.compare(
                                self.ssdeep_init, ssdeep_curr)

            self.jobs.task_done()
Example #19
0
	def run(self):
		while not self.kill_received:
			domain = self.jobs.get()
			if module_dnspython:
				resolv = dns.resolver.Resolver()
				resolv.lifetime = REQUEST_TIMEOUT_DNS
				resolv.timeout = REQUEST_TIMEOUT_DNS

				try:
					ns = resolv.query(domain['domain'], 'NS')
					domain['ns'] = str(sorted(ns)[0])[:-1].lower()
				except Exception:
					pass

				if 'ns' in domain:
					try:
						ns = resolv.query(domain['domain'], 'A')
						domain['a'] = str(sorted(ns)[0])
					except Exception:
						pass
	
					try:
						ns = resolv.query(domain['domain'], 'AAAA')
						domain['aaaa'] = str(sorted(ns)[0])
					except Exception:
						pass

					try:
						ns = resolv.query(domain['domain'], 'MX')
						mx = str(sorted(ns)[0].exchange)[:-1].lower()
						if mx: domain['mx'] = mx
					except Exception:
						pass
			else:
				try:
					ip = socket.getaddrinfo(domain['domain'], 80)
				except Exception:
					pass
				else:
					for j in ip:
						if '.' in j[4][0]:
							domain['a'] = j[4][0]
							break
					for j in ip:
						if ':' in j[4][0]:
							domain['aaaa'] = j[4][0]
							break

			if module_whois and args.whois:
				if 'ns' in domain and 'a' in domain:
					try:
						whoisdb = whois.query(domain['domain'])
						domain['created'] = str(whoisdb.creation_date).replace(' ', 'T')
						domain['updated'] = str(whoisdb.last_updated).replace(' ', 'T')
					except Exception:
						pass

			if module_geoip and geoip_db and args.geoip:
				if 'a' in domain:
					gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE)
					try:
						country = gi.country_name_by_addr(domain['a'])
					except Exception:
						pass
					else:
						if country:
							domain['country'] = country.split(',')[0]

			if args.banners:
				if 'a' in domain:
					banner = self.__banner_http(domain['a'], domain['domain'])
					if banner:
						domain['banner-http'] = banner
				if 'mx' in domain:
					banner = self.__banner_smtp(domain['mx'])
					if banner:
						domain['banner-smtp'] = banner

			if args.ssdeep and module_requests and module_ssdeep and self.orig_domain_ssdeep:
				if 'a' in domain:
					try:
						req = requests.get('http://' + domain['domain'], timeout=REQUEST_TIMEOUT_HTTP)
						fuzz_domain_ssdeep = ssdeep.hash(req.text)
					except Exception:
						pass
					else:
						domain['ssdeep'] = ssdeep.compare(self.orig_domain_ssdeep, fuzz_domain_ssdeep)

			self.jobs.task_done()
Example #20
0
    def run(self):
        while not self.kill_received:
            domain = self.jobs.get()

            if self.option_extdns:
                resolv = dns.resolver.Resolver()
                resolv.lifetime = REQUEST_TIMEOUT_DNS
                resolv.timeout = REQUEST_TIMEOUT_DNS

                try:
                    ans = resolv.query(domain['domain'], 'SOA')
                    domain['ns'] = str(
                        sorted(ans)[0]).split(' ')[0][:-1].lower()
                except Exception:
                    pass

                if 'ns' in domain:
                    try:
                        ans = resolv.query(domain['domain'], 'A')
                        domain['a'] = str(sorted(ans)[0])
                    except Exception:
                        pass

                    try:
                        ans = resolv.query(domain['domain'], 'AAAA')
                        domain['aaaa'] = str(sorted(ans)[0])
                    except Exception:
                        pass

                    try:
                        ans = resolv.query(domain['domain'], 'MX')
                        mx = str(sorted(ans)[0].exchange)[:-1].lower()
                        if mx: domain['mx'] = mx
                    except Exception:
                        pass
            else:
                try:
                    ip = socket.getaddrinfo(domain['domain'], 80)
                except Exception:
                    pass
                else:
                    for j in ip:
                        if '.' in j[4][0]:
                            domain['a'] = j[4][0]
                            break
                    for j in ip:
                        if ':' in j[4][0]:
                            domain['aaaa'] = j[4][0]
                            break

            if self.option_mxcheck:
                if 'mx' in domain:
                    if domain['domain'] is not self.domain_orig:
                        if self.__mxcheck(domain['mx'], self.domain_orig,
                                          domain['domain']):
                            domain['mx-spy'] = True

            if self.option_whois:
                if 'ns' in domain and 'a' in domain:
                    try:
                        whoisdb = whois.query(domain['domain'])
                        domain['created'] = str(whoisdb.creation_date).replace(
                            ' ', 'T')
                        domain['updated'] = str(whoisdb.last_updated).replace(
                            ' ', 'T')
                    except Exception:
                        pass

            if self.option_geoip:
                if 'a' in domain:
                    gi = GeoIP.open(
                        FILE_GEOIP,
                        GeoIP.GEOIP_INDEX_CACHE | GeoIP.GEOIP_CHECK_CACHE)
                    try:
                        country = gi.country_name_by_addr(domain['a'])
                    except Exception:
                        pass
                    else:
                        if country:
                            domain['country'] = country.split(',')[0]

            if self.option_banners:
                if 'a' in domain:
                    banner = self.__banner_http(domain['a'], domain['domain'])
                    if banner:
                        domain['banner-http'] = banner
                if 'mx' in domain:
                    banner = self.__banner_smtp(domain['mx'])
                    if banner:
                        domain['banner-smtp'] = banner

            if self.option_ssdeep:
                if 'a' in domain:
                    try:
                        req = requests.get(self.uri_scheme + '://' +
                                           domain['domain'] + self.uri_path +
                                           self.uri_query,
                                           timeout=REQUEST_TIMEOUT_HTTP)
                        ssdeep_fuzz = ssdeep.hash(req.text)
                    except Exception:
                        pass
                    else:
                        domain['ssdeep'] = ssdeep.compare(
                            self.ssdeep_orig, ssdeep_fuzz)

            self.jobs.task_done()
Example #21
0
File: gp.py Project: FUMVar/FUMVar
def difference(fbytes1, fbytes2):
    hash1 = ssdeep.hash(fbytes1)
    hash2 = ssdeep.hash(fbytes2)

    return 100 - ssdeep.compare(hash1, hash2)
Example #22
0
def cmd():
    res_line = "Between {} and {}, there is {:.2f} similiarity" # just formatting
    hash1 = ssdeep.hash_from_file(sys.argv[1]) #first file, first arg
    hash2 = ssdeep.hash_from_file(sys.argv[2])  # second file, second arg
    result = ssdeep.compare(hash1, hash2) # completes a fuzzy hash on 2 files supplied by cmd args
    print(res_line.format(sys.argv[1], sys.argv[2], result ))
Example #23
0
def compare_files(file_path1, file_path2):
    hash1 = get_import_table_hash(file_path1)
    hash2 = get_import_table_hash(file_path2)
    return compare(hash1, hash2)
Example #24
0
    def handle(self):
        self.cluster_start()
        # parepare ssdeep_lists
        ssdeep_sets = set()
        for path in self.file_lists:
            with open(path, 'r') as f:
                for line in f:
                    line = line.strip()
                    if len(line) == 0:
                        continue
                    ssdeep_sets.add(line)
        ssdeep_lists = list(ssdeep_sets)

        # print '> ssdeep cluster'
        for path in ssdeep_lists:
            if ',' in path:
                shash, path = path.split(',', 1)
                path = path.strip('"')
            else:
                shash = path
            self.hashes[path] = shash
            self.sha256s[path] = hashlib.sha256(path).hexdigest()

            block_size, chunk, double_chunk = self.process_ssdeep_hash(
                self.hashes[path])

            similar_to = self.insert2db(block_size, chunk,
                                        path) | self.insert2db(
                                            block_size * 2, double_chunk, path)

            h = self.hashes[path]
            self.matches[path] = set()
            for other in similar_to:
                score = ssdeep.compare(h, self.hashes[other])
                if score > self.similar_score:
                    self.matches[path].add(other)
                    self.matches[other].add(path)
                    if path not in self.scores:
                        self.scores[path] = {}
                    if other not in self.scores[path]:
                        self.scores[path][other] = score

                    if other not in self.scores:
                        self.scores[other] = {}
                    if path not in self.scores[other]:
                        self.scores[other][path] = score

        # ssdeep groups
        for path in self.matches.keys():
            in_a_group = False
            for g in xrange(len(self.groups)):
                if path in self.groups[g]:
                    in_a_group = True
                    continue
                should_add = True
                for h in self.groups[g]:
                    if h not in self.matches[path]:
                        should_add = False
                if should_add:
                    self.groups[g].append(path)
                    in_a_group = True
            if not in_a_group:
                self.groups.append([path])

        for g in xrange(len(self.groups)):
            self.groups[g].sort()

        self.cluster_finish()
Example #25
0
    def run(self):
        while not self.kill_received:
            domain = self.jobs.get()
            if module_dnspython:
                resolv = dns.resolver.Resolver()
                resolv.lifetime = REQUEST_TIMEOUT_DNS
                resolv.timeout = REQUEST_TIMEOUT_DNS

                try:
                    ns = resolv.query(domain['domain'], 'NS')
                    domain['ns'] = str(sorted(ns)[0])[:-1].lower()
                except Exception:
                    pass

                if 'ns' in domain:
                    try:
                        ns = resolv.query(domain['domain'], 'A')
                        domain['a'] = str(sorted(ns)[0])
                    except Exception:
                        pass

                    try:
                        ns = resolv.query(domain['domain'], 'AAAA')
                        domain['aaaa'] = str(sorted(ns)[0])
                    except Exception:
                        pass

                    try:
                        ns = resolv.query(domain['domain'], 'MX')
                        mx = str(sorted(ns)[0].exchange)[:-1].lower()
                        if mx: domain['mx'] = mx
                    except Exception:
                        pass
            else:
                try:
                    ip = socket.getaddrinfo(domain['domain'], 80)
                except Exception:
                    pass
                else:
                    for j in ip:
                        if '.' in j[4][0]:
                            domain['a'] = j[4][0]
                            break
                    for j in ip:
                        if ':' in j[4][0]:
                            domain['aaaa'] = j[4][0]
                            break

            if module_whois and args.whois:
                if 'ns' in domain and 'a' in domain:
                    try:
                        whoisdb = whois.query(domain['domain'])
                        domain['created'] = str(whoisdb.creation_date).replace(
                            ' ', 'T')
                        domain['updated'] = str(whoisdb.last_updated).replace(
                            ' ', 'T')
                    except Exception:
                        pass

            if module_geoip and geoip_db and args.geoip:
                if 'a' in domain:
                    gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE)
                    try:
                        country = gi.country_name_by_addr(domain['a'])
                    except Exception:
                        pass
                    else:
                        if country:
                            domain['country'] = country.split(',')[0]

            if args.banners:
                if 'a' in domain:
                    banner = self.__banner_http(domain['a'], domain['domain'])
                    if banner:
                        domain['banner-http'] = banner
                if 'mx' in domain:
                    banner = self.__banner_smtp(domain['mx'])
                    if banner:
                        domain['banner-smtp'] = banner

            if args.ssdeep and module_requests and module_ssdeep and self.orig_domain_ssdeep:
                if 'a' in domain:
                    try:
                        req = requests.get('http://' + domain['domain'],
                                           timeout=REQUEST_TIMEOUT_HTTP)
                        fuzz_domain_ssdeep = ssdeep.hash(req.text)
                    except Exception:
                        pass
                    else:
                        domain['ssdeep'] = ssdeep.compare(
                            self.orig_domain_ssdeep, fuzz_domain_ssdeep)

            self.jobs.task_done()
Example #26
0
screenshotPath = '/root/Desktop/vnchash/arena/'

for file in os.listdir(screenshotPath):
    screenshot = Image.open(screenshotPath + file)
    screenshot = screenshot.resize((10, 10))
    screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=20)

    hash = imagehash.dhash(screenshot)


    print str(hash) + '            ' + file

def hashFileCreator():
    screenshot = Image.open('/root/Desktop/vnchash/ubuntu/ubuntu200.17.220.25%3A02.jpg')
    screenshot = screenshot.resize((100, 100))
    screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=10)
    screenshot.save('/root/Desktop/vnchash/ubuntu/compressed.gif')
    hash = ssdeep.hash_from_file('/root/Desktop/vnchash/ubuntu/compressed.gif')
    print hash

def compareHashes()
    hashone = ssdeep.hash_from_file('/root/Desktop/vnchash/win7/win7hash.jpg')
    hashtwo = ssdeep.hash_from_file('/root/Desktop/vnchash/win7/win7hash.jpg-temp.jpg')

    print hashone
    print hashtwo

    print ssdeep.compare(hashone, hashtwo)

compareHashes()
 def compare_fuzzy_hash(self, file):
     # Validation prior to comparesement.
     if len(file.file_fuzzy_hash) > 1:
         return ssdeep.compare(file.file_fuzzy_hash[-2],
                               file.file_fuzzy_hash[-1])
     return None
Example #28
0
#! /usr/bin/env python3
# ハッシュを投げて比較
# Usage: ./ssdeep_compare.py HASH1 HASH2
import ssdeep
import sys
print(ssdeep.compare(sys.argv[1], sys.argv[2]))
Example #29
0
File: amit.py Project: abpolym/amit
def compare_ssdeep(hash1, hash2):
    return ssdeep.compare(hash1, hash2)
Example #30
0
    else:
        return (dir1, dir2)


if __name__ == '__main__':
    dir1, dir2 = getargs()
    diffs = []
    totalscore = 0

    # command line arguments are both dirs
    if os.path.isdir(dir1) & os.path.isdir(dir2):
        print '\nSCORE RESULT  PATH'
        comparetrees(dir1, dir2, diffs)
        if not diffs:
            print('No diffs found\n')
        else:
            for score in diffs:
                totalscore += score
            print '\nTotal files compared:', len(diffs)
            print 'Overall match score: ', str(totalscore / len(diffs)) + '%\n'
    else:
        try:
            # command line arguments are both files
            score = ssdeep.compare(ssdeep.hash_from_file(dir1),
                                   ssdeep.hash_from_file(dir2))
            print 'Overall match score: ', str(score) + '%\n'

        except:
            print 'Invalid Files/Folders: Aborting...'
            sys.exit(1)
Example #31
0
with open(filename1, 'rb') as fp:
    process_list = pickle.load(fp)

print process_list
#print process_list
#process_list.append('Searchindexer.')
#print process_list
#print "Computing hashes... \n"
baseSSdeep = []

for prosesses in process_list:
    baseSSdeep.append(ssdeep.hash(prosesses))
#print baseSSdeep
print "\n ----- Levenshtein distance in process names ----- \n"
#print "Computing score for each item... \n"

scores = []
levScores = []

for a, b in itertools.combinations(baseSSdeep, 2):
    scores.append(ssdeep.compare(a, b))
#print scores

for a, b in itertools.combinations(process_list, 2):
    score = (levenshtein(a, b))
    if score <= 2 and score != 0:
        print "Close match found"
        print str(a) + " - " + str(b) + "- Score: " + str(score) + "\n"
#print "Scores Levenshtein:"
#print levScores
Example #32
0
         shivanotifyerrors.notifydeveloper("[-] Error (Module shivamaindb.py) - executing fetchfromdb %s" % e)
     
 temprecords = tempDb.fetchall()
 mainrecords = mainDb.fetchall()
 
 for t_record in temprecords:
     maxlen, minlen = int(t_record[2] * 1.10), int(t_record[2] * 0.90)
     count = 0
     
     for m_record in mainrecords:
         if m_record[2] >= minlen and m_record[2] <= maxlen:
             if t_record[0] == m_record[0]:
                 update(t_record[0], m_record[0])
             
             else:
                 ratio = ssdeep.compare(t_record[1], m_record[1])
                 # Increase the comparison ratio when length is smaller
                 if (int(t_record[2]) <= 150 and ratio >= 95) or (int(t_record[2]) > 150 and ratio >= 80):
                     update(t_record[0], m_record[0])
                 else:
                     count += 1
         else:
             count += 1
     
     if count == len(mainrecords):
         insert(t_record[0])
         
 # At last update whitelist recipients
 group_concat_max_len = "SET SESSION group_concat_max_len = 20000"
 #whitelist = "INSERT INTO `whitelist` (`id`, `recipients`) VALUES ('1', (SELECT GROUP_CONCAT(DISTINCT `to`) FROM `spam` WHERE `totalCounter` < 30)) ON DUPLICATE KEY UPDATE `recipients` = (SELECT GROUP_CONCAT(DISTINCT `to`) FROM `spam` WHERE `totalCounter` < 30)"
 
Example #33
0
    def searchFiles(self):
        try:
            if os.listdir(self.confirmPath):
                fileName = random.choice(os.listdir(self.confirmPath))
                filePath = os.path.join(self.confirmPath, fileName)
            else:
                raise Exception("Empty confirm virus sample folder.")

            print("Reference File For Fuzzy Hash: {}".format(filePath))
            refHash = ssdeep.hash_from_file(filePath)
            print("Fuzzy Hash Of Reference File: {}\n".format(refHash))
            # Preprocess the total files count
            fileCounter = 0
            for filePath in listdir(self.confirmPath):
                fileCounter += 1

            if fileCounter == 1:
                self.confirmPathFileHash.append(refHash)
                shutil.copy(filePath, self.inputFilesPath)
            else:
                with tqdm(total=fileCounter,
                          unit="files",
                          desc="Fuzzy find in confirm path: ") as pbar:
                    for traverseFilePath in listdir(self.confirmPath):
                        pbar.update(1)
                        pbar.set_postfix(file=filePath.split(os.path.sep)[-1:])
                        if filePath == traverseFilePath:
                            self.confirmPathFileHash.append(refHash)
                            shutil.copy(traverseFilePath, self.inputFilesPath)
                            continue
                        tmpHash = ssdeep.hash_from_file(traverseFilePath)
                        # print("File: ", traverseFilePath, " - ", ssdeep.compare(refHash, tmpHash))
                        if ssdeep.compare(refHash,
                                          tmpHash) >= self.confirmFilesPercent:
                            self.confirmPathFileHash.append(tmpHash)
                            shutil.copy(traverseFilePath, self.inputFilesPath)
                        else:
                            shutil.copy(traverseFilePath, self.probablePath)
            print("\n")

            fileCounter = 0
            for filePath in listdir(self.probablePath):
                fileCounter += 1

            if fileCounter == 0:
                raise Exception("Empty probable virus sample folder.")
            else:
                with tqdm(total=fileCounter,
                          unit="files",
                          desc="Fuzzy find in probable path: ") as pbar:
                    for traverseFilePath in listdir(self.probablePath):
                        pbar.update(1)
                        pbar.set_postfix(file=filePath.split(os.path.sep)[-1:])
                        tmpHash = ssdeep.hash_from_file(traverseFilePath)
                        for fileHash in self.confirmPathFileHash:
                            # print("File: ", traverseFilePath, " - ", ssdeep.compare(refHash, tmpHash))
                            if ssdeep.compare(
                                    fileHash,
                                    tmpHash) >= self.probableFilesPercent:
                                shutil.copy(traverseFilePath,
                                            self.inputFilesPath)
                                break
            print("\n")
        except Exception as error:
            raise Exception(error)
            sys.exit(1)
Example #34
0
	def run(self):
		while not self.kill_received:
			domain = self.jobs.get()
			if MODULE_DNSPYTHON:
				resolv = dns.resolver.Resolver()
				resolv.lifetime = REQUEST_TIMEOUT_DNS
				resolv.timeout = REQUEST_TIMEOUT_DNS

				try:
					ans = resolv.query(domain['domain'], 'SOA')
					domain['ns'] = str(sorted(ans)[0]).split(' ')[0][:-1].lower()
				except Exception:
					pass

				if 'ns' in domain:
					try:
						ans = resolv.query(domain['domain'], 'A')
						domain['a'] = str(sorted(ans)[0])
					except Exception:
						pass

					try:
						ans = resolv.query(domain['domain'], 'AAAA')
						domain['aaaa'] = str(sorted(ans)[0])
					except Exception:
						pass

					try:
						ans = resolv.query(domain['domain'], 'MX')
						mx = str(sorted(ans)[0].exchange)[:-1].lower()
						if mx: domain['mx'] = mx
					except Exception:
						pass
			else:
				try:
					ip = socket.getaddrinfo(domain['domain'], 80)
				except Exception:
					pass
				else:
					for j in ip:
						if '.' in j[4][0]:
							domain['a'] = j[4][0]
							break
					for j in ip:
						if ':' in j[4][0]:
							domain['aaaa'] = j[4][0]
							break

			if MODULE_WHOIS and args.whois:
				if 'ns' in domain and 'a' in domain:
					try:
						whoisdb = whois.query(domain['domain'])
						domain['created'] = str(whoisdb.creation_date).replace(' ', 'T')
						domain['updated'] = str(whoisdb.last_updated).replace(' ', 'T')
					except Exception:
						pass

			if MODULE_GEOIP and DB_GEOIP and args.geoip:
				if 'a' in domain:
					gi = GeoIP.open(FILE_GEOIP, GeoIP.GEOIP_INDEX_CACHE | GeoIP.GEOIP_CHECK_CACHE)
					try:
						country = gi.country_name_by_addr(domain['a'])
					except Exception:
						pass
					else:
						if country:
							domain['country'] = country.split(',')[0]

			if args.banners:
				if 'a' in domain:
					banner = self.__banner_http(domain['a'], domain['domain'])
					if banner:
						domain['banner-http'] = banner
				if 'mx' in domain:
					banner = self.__banner_smtp(domain['mx'])
					if banner:
						domain['banner-smtp'] = banner

			if args.ssdeep and MODULE_REQUESTS and MODULE_SSDEEP and self.orig_domain_ssdeep:
				if 'a' in domain:
					try:
						req = requests.get(self.uri_scheme + '://' + domain['domain'] + self.uri_path + self.uri_query, timeout=REQUEST_TIMEOUT_HTTP)
						fuzz_domain_ssdeep = ssdeep.hash(req.text)
					except Exception:
						pass
					else:
						domain['ssdeep'] = ssdeep.compare(self.orig_domain_ssdeep, fuzz_domain_ssdeep)

			self.jobs.task_done()
from pymongo import MongoClient
import ssdeep

fuzzy_to_compare = "12288:lTurEUKhROhnCzrwsrsNuRIHZB62atXtjBIuMAI0VpnJJyeVxy5la8AJv:lqrEJhROh8rwKsNrDK9xM3cJyeg0Jv"

client = MongoClient(envget('metadata.host'), envget('metadata.port'))
db = client[envget('db_metadata_name')]
coll_meta = db[envget('db_metadata_collection')]
print("loading")
f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1})
results = []
for f in f1:
    results.append(f)
print("compearing")
count = 0
reset = 0
for a in results:
    try:
        res = ssdeep.compare(a["fuzzy_hash"], fuzzy_to_compare)
    except Exception, e:
        print str(e)
        continue
    if(res >= 50):
        print("%s - %s" % (res, a["file_id"]))

    # print count
    # reset+=1; count+=1
    # if(reset>=1000):
    #    print(str(count)+" procesados")
    #    reset=0
Example #36
0
def hardcode():
    hash1 = ssdeep.hash_from_file('{location of first executable}')  # be sure to fill in the location of the executable
    hash2 = ssdeep.hash_from_file('{location of second executable}')
    print(ssdeep.compare(hash1, hash2))
Example #37
0
	def run(self):
		while not self.kill_received:
			domain = self.jobs.get()

			if self.option_extdns:
				resolv = dns.resolver.Resolver()
				resolv.lifetime = REQUEST_TIMEOUT_DNS
				resolv.timeout = REQUEST_TIMEOUT_DNS

				try:
					ans = resolv.query(domain['domain'], 'SOA')
					domain['ns'] = str(sorted(ans)[0]).split(' ')[0][:-1].lower()
				except Exception:
					pass

				if 'ns' in domain:
					try:
						ans = resolv.query(domain['domain'], 'A')
						domain['a'] = str(sorted(ans)[0])
					except Exception:
						pass

					try:
						ans = resolv.query(domain['domain'], 'AAAA')
						domain['aaaa'] = str(sorted(ans)[0])
					except Exception:
						pass

					try:
						ans = resolv.query(domain['domain'], 'MX')
						mx = str(sorted(ans)[0].exchange)[:-1].lower()
						if mx: domain['mx'] = mx
					except Exception:
						pass
			else:
				try:
					ip = socket.getaddrinfo(domain['domain'], 80)
				except Exception:
					pass
				else:
					for j in ip:
						if '.' in j[4][0]:
							domain['a'] = j[4][0]
							break
					for j in ip:
						if ':' in j[4][0]:
							domain['aaaa'] = j[4][0]
							break

			if self.option_mxcheck:
				if 'mx' in domain:
					if domain['domain'] is not self.domain_orig: 
						if self.__mxcheck(domain['mx'], self.domain_orig, domain['domain']):
							domain['mx-spy'] = True

			if self.option_whois:
				if 'ns' in domain and 'a' in domain:
					try:
						whoisdb = whois.query(domain['domain'])
						domain['created'] = str(whoisdb.creation_date).replace(' ', 'T')
						domain['updated'] = str(whoisdb.last_updated).replace(' ', 'T')
					except Exception:
						pass

			if self.option_geoip:
				if 'a' in domain:
					gi = GeoIP.open(FILE_GEOIP, GeoIP.GEOIP_INDEX_CACHE | GeoIP.GEOIP_CHECK_CACHE)
					try:
						country = gi.country_name_by_addr(domain['a'])
					except Exception:
						pass
					else:
						if country:
							domain['country'] = country.split(',')[0]

			if self.option_banners:
				if 'a' in domain:
					banner = self.__banner_http(domain['a'], domain['domain'])
					if banner:
						domain['banner-http'] = banner
				if 'mx' in domain:
					banner = self.__banner_smtp(domain['mx'])
					if banner:
						domain['banner-smtp'] = banner

			if self.option_ssdeep:
				if 'a' in domain:
					try:
						req = requests.get(self.uri_scheme + '://' + domain['domain'] + self.uri_path + self.uri_query, timeout=REQUEST_TIMEOUT_HTTP, headers={'User-Agent': 'Mozilla/5.0 (dnstwist)'})
						#ssdeep_fuzz = ssdeep.hash(req.text.replace(' ', '').replace('\n', ''))
						ssdeep_fuzz = ssdeep.hash(req.text)
					except Exception:
						pass
					else:
						if req.status_code / 100 == 2:
							domain['ssdeep'] = ssdeep.compare(self.ssdeep_orig, ssdeep_fuzz)

			self.jobs.task_done()
Example #38
0
            for dico in dico_path_set:
                # Opening dico
                if dico == filedicopath_today:
                    opened_dico.append([dico, today_dico])
                else:
                    with open(dico, 'r') as fp:
                        opened_dico.append([dico, json.load(fp)])

            #retrieve hash from paste
            paste_hash = PST._get_p_hash()

            # Go throught the Database of the dico (of the month)
            threshold_dup = 99
            for dico_name, dico in opened_dico:
                for dico_key, dico_hash in dico.items():
                    percent = ssdeep.compare(dico_hash, paste_hash)
                    if percent > threshold_dup:
                        db = dico_name[-6:]
                        # Go throught the Database of the dico filter (month)
                        r_serv_dico = dico_redis[db]

                        # index of paste
                        index_current = r_serv_dico.get(dico_hash)
                        paste_path = r_serv_dico.get(index_current)
                        if paste_path != None:
                            hash_dico[dico_hash] = (paste_path, percent)

                        #print 'comparing: ' + str(dico_hash[:20]) + '  and  ' + str(paste_hash[:20]) + ' percentage: ' + str(percent)
                        print '   ' + PST.p_path[44:] + ', ' + paste_path[
                            44:] + ', ' + str(percent)
print("%s documentos encontrados"%(f,))

#for a in f:
#    print(a["file_id"])


f=coll_meta.count({"particular_header.packer_detection":"False"})
print("%s documentos falsos"%(f,))


f=coll_meta.count({"particular_header.packer_detection":"Unknown"})
print("%s documentos desconocidos"%(f,))
"""

f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1})
l = []
for f in f1:
    l.append(f)

count = 0
for a in l:
    count += 1
    for b in l[count:]:
        res = ssdeep.compare(a["fuzzy_hash"], b["fuzzy_hash"])
        if(res > 0):
            print("%s - %s - %s" % (res, a["file_id"], b["file_id"]))

    print("***** %s ******" % (count,))

    # raw_input()
Example #40
0
 def compare(self, hash1, hash2):
     """Compare hashes."""
     return ssdeep.compare(hash1, hash2)
Example #41
0
    def run_ssdeep_scans(self):
        with self.data_lock:
            cid_list = list(self.crits_data['indicators'].keys())
        for cid in cid_list:
            # Ignore processed indicators
            with self.data_lock:
                if self.crits_data['indicators'][cid]['completed']:
                    continue

            log.info('Running ssdeep scan on indicator {}'.format(cid))
            find_p = Popen([
                'find', '/mnt/storage', '-maxdepth', '2', '-mindepth', '2',
                '-type', 'f', '-not', '-name', '"*.*"'
            ],
                           stdout=subprocess.PIPE)
            head_p = Popen(['head', '-n', self.scan_count],
                           stdin=find_p.stdout,
                           stdout=subprocess.PIPE)
            stdout, stderr = head_p.communicate()
            files = stdout.decode('ascii').splitlines()
            failed = False
            for f in files:
                file_hash = ssdeep.hash_from_file(f)
                with self.data_lock:
                    indicator_hash = self.crits_data['indicators'][cid][
                        'value']
                percent_match = ssdeep.compare(file_hash, indicator_hash)
                if percent_match > self.match_threshold:
                    # This indicator fails FAQueue
                    log.info(
                        'Indicator {} failed with percentage of {}'.format(
                            cid, percent_match))
                    failed = True
                    # CarbonBlack returns a json report of the file with details
                    report = self._get_json_report(f)
                    if report:
                        with self.data_lock:
                            self.crits_data['indicators'][cid][
                                'results'].append({
                                    'file_matched': f,
                                    'score': percent_match,
                                    'report': report,
                                    'total_hits': 1
                                })
                        self._attach_json_report_observables(cid, report)
                    else:
                        with self.data_lock:
                            self.crits_data['indicators'][cid][
                                'results'].append({
                                    'file_matched': f,
                                    'score': percent_match,
                                    'total_hits': 1
                                })
            if failed:
                with self.data_lock:
                    self.crits_data['indicators'][cid][
                        'status'] = 'In Progress'
                    self.crits_data['indicators'][cid]['completed'] = True
            else:
                with self.data_lock:
                    self.crits_data['indicators'][cid]['status'] = 'Analyzed'
                    self.crits_data['indicators'][cid]['completed'] = True
sys.path.insert(0, path)
import ssdeep

fuzzy_to_compare = "12288:lTurEUKhROhnCzrwsrsNuRIHZB62atXtjBIuMAI0VpnJJyeVxy5la8AJv:lqrEJhROh8rwKsNrDK9xM3cJyeg0Jv"

client = MongoClient(env["metadata"]["host"], env["metadata"]["port"])
db = client[env["db_metadata_name"]]
coll_meta = db[env["db_metadata_collection"]]
print("loading")
f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1})
l = []
for f in f1:
    l.append(f)
print("compearing")
count = 0
reset = 0
for a in l:
    try:
        res = ssdeep.compare(a["fuzzy_hash"], fuzzy_to_compare)
    except Exception, e:
        print str(e)
        continue
    if (res >= 50):
        print("%s - %s" % (res, a["file_id"]))

    #print count
    #reset+=1; count+=1
    #if(reset>=1000):
    #    print(str(count)+" procesados")
    #    reset=0
Example #43
0
def get_ssdeep_comparison(first, second):
    diff = ssdeep.compare(first, second)
    return diff
Example #44
0
    def run(self):
        while not self.kill_received:
            try:
                domain = self.jobs.get(block=False)
            except queue.Empty:
                self.kill_received = True
                return

            domain['domain-name'] = domain['domain-name'].encode(
                'idna').decode()

            if self.option_extdns:
                if self.nameservers:
                    resolv = dns.resolver.Resolver(configure=False)
                    resolv.nameservers = self.nameservers
                else:
                    resolv = dns.resolver.Resolver()

                resolv.lifetime = REQUEST_TIMEOUT_DNS * REQUEST_RETRIES_DNS
                resolv.timeout = REQUEST_TIMEOUT_DNS

                nxdomain = False
                dns_ns = False
                dns_a = False
                dns_aaaa = False
                dns_mx = False

                try:
                    domain['dns-ns'] = self.__answer_to_list(
                        resolv.query(domain['domain-name'],
                                     rdtype=dns.rdatatype.NS))
                    dns_ns = True
                except dns.resolver.NXDOMAIN:
                    nxdomain = True
                    pass
                except dns.resolver.NoNameservers:
                    domain['dns-ns'] = ['!ServFail']
                    pass
                except DNSException:
                    pass

                if nxdomain is False:
                    try:
                        domain['dns-a'] = self.__answer_to_list(
                            resolv.query(domain['domain-name'],
                                         rdtype=dns.rdatatype.A))
                        dns_a = True
                    except dns.resolver.NoNameservers:
                        domain['dns-a'] = ['!ServFail']
                        pass
                    except DNSException:
                        pass

                    try:
                        domain['dns-aaaa'] = self.__answer_to_list(
                            resolv.query(domain['domain-name'],
                                         rdtype=dns.rdatatype.AAAA))
                        dns_aaaa = True
                    except dns.resolver.NoNameservers:
                        domain['dns-aaaa'] = ['!ServFail']
                        pass
                    except DNSException:
                        pass

                if nxdomain is False and dns_ns is True:
                    try:
                        domain['dns-mx'] = self.__answer_to_list(
                            resolv.query(domain['domain-name'],
                                         rdtype=dns.rdatatype.MX))
                        dns_mx = True
                    except dns.resolver.NoNameservers:
                        domain['dns-mx'] = ['!ServFail']
                        pass
                    except DNSException:
                        pass
            else:
                try:
                    ip = socket.getaddrinfo(domain['domain-name'], 80)
                except socket.gaierror as e:
                    if e.errno == -3:
                        domain['dns-a'] = ['!ServFail']
                    pass
                except Exception:
                    pass
                else:
                    domain['dns-a'] = list()
                    domain['dns-aaaa'] = list()
                    for j in ip:
                        if '.' in j[4][0]:
                            domain['dns-a'].append(j[4][0])
                        if ':' in j[4][0]:
                            domain['dns-aaaa'].append(j[4][0])
                    domain['dns-a'] = sorted(domain['dns-a'])
                    domain['dns-aaaa'] = sorted(domain['dns-aaaa'])
                    dns_a = True
                    dns_aaaa = True

            if self.option_mxcheck:
                if dns_mx is True:
                    if domain['domain-name'] is not self.domain_orig:
                        if self.__mxcheck(domain['dns-mx'][0],
                                          self.domain_orig,
                                          domain['domain-name']):
                            domain['mx-spy'] = True

            if self.option_whois:
                if nxdomain is False and dns_ns is True:
                    try:
                        whoisdb = whois.query(domain['domain-name'])
                        domain['whois-created'] = str(
                            whoisdb.creation_date).split(' ')[0]
                        domain['whois-updated'] = str(
                            whoisdb.last_updated).split(' ')[0]
                    except Exception:
                        pass

            if self.option_geoip:
                if dns_a is True:
                    gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE)
                    try:
                        country = gi.country_name_by_addr(domain['dns-a'][0])
                    except Exception:
                        pass
                    else:
                        if country:
                            domain['geoip-country'] = country.split(',')[0]

            if self.option_banners:
                if dns_a is True:
                    banner = self.__banner_http(domain['dns-a'][0],
                                                domain['domain-name'])
                    if banner:
                        domain['banner-http'] = banner
                if dns_mx is True:
                    banner = self.__banner_smtp(domain['dns-mx'][0])
                    if banner:
                        domain['banner-smtp'] = banner

            if self.option_ssdeep:
                if dns_a is True or dns_aaaa is True:
                    try:
                        req = requests.get(
                            self.uri_scheme + '://' + domain['domain-name'] +
                            self.uri_path + self.uri_query,
                            timeout=REQUEST_TIMEOUT_HTTP,
                            headers={'User-Agent': self.useragent},
                            verify=False)
                    except Exception:
                        pass
                    else:
                        if req.status_code // 100 == 2 and req.url.split(
                                '?')[0] != self.ssdeep_effective_url:
                            ssdeep_curr = ssdeep.hash(''.join(
                                req.text.split()).lower())
                            domain['ssdeep-score'] = ssdeep.compare(
                                self.ssdeep_init, ssdeep_curr)

            domain['domain-name'] = domain['domain-name'].encode().decode(
                'idna')
            self.jobs.task_done()
Example #45
0
    def run(self):
        while not self.kill_received:
            domain = self.jobs.get()
            if module_dnspython:
                resolv = dns.resolver.Resolver()
                # resolv.lifetime = REQUEST_TIMEOUT_DNS
                resolv.timeout = REQUEST_TIMEOUT_DNS

                try:
                    ns = resolv.query(domain["domain"], "NS")
                    domain["ns"] = str(ns[0])[:-1].lower()
                except Exception:
                    pass

                if "ns" in domain:
                    try:
                        ns = resolv.query(domain["domain"], "A")
                        domain["a"] = str(ns[0])
                    except Exception:
                        pass

                    try:
                        ns = resolv.query(domain["domain"], "AAAA")
                        domain["aaaa"] = str(ns[0])
                    except Exception:
                        pass

                    try:
                        mx = resolv.query(domain["domain"], "MX")
                        domain["mx"] = str(mx[0].exchange)[:-1].lower()
                    except Exception:
                        pass
            else:
                try:
                    ip = socket.getaddrinfo(domain["domain"], 80)
                except Exception:
                    pass
                else:
                    for j in ip:
                        if "." in j[4][0]:
                            domain["a"] = j[4][0]
                            break
                    for j in ip:
                        if ":" in j[4][0]:
                            domain["aaaa"] = j[4][0]
                            break

            if module_whois and args.whois:
                if "ns" in domain and "a" in domain:
                    try:
                        whoisdb = whois.query(domain["domain"])
                        domain["created"] = str(whoisdb.creation_date).replace(" ", "T")
                        domain["updated"] = str(whoisdb.last_updated).replace(" ", "T")
                    except Exception:
                        pass

            if module_geoip and args.geoip:
                if "a" in domain:
                    gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE)
                    try:
                        country = gi.country_name_by_addr(domain["a"])
                    except Exception:
                        pass
                    else:
                        if country:
                            domain["country"] = country

            if args.banners:
                if "a" in domain:
                    banner = self.__banner_http(domain["a"], domain["domain"])
                    if banner:
                        domain["banner-http"] = banner
                if "mx" in domain:
                    banner = self.__banner_smtp(domain["mx"])
                    if banner:
                        domain["banner-smtp"] = banner

            if args.ssdeep and module_requests and module_ssdeep and self.orig_domain_ssdeep:
                if "a" in domain:
                    try:
                        req = requests.get("http://" + domain["domain"], timeout=REQUEST_TIMEOUT_HTTP)
                        fuzz_domain_ssdeep = ssdeep.hash(req.text)
                    except Exception:
                        pass
                    else:
                        domain["ssdeep"] = ssdeep.compare(self.orig_domain_ssdeep, fuzz_domain_ssdeep)

            self.jobs.task_done()
Example #46
0
def is_malware_to_focus(elffile):
    return \
        ssdeep.compare(elffile.features['lstrfuzzy'], '12:TKLJUWLLSQzisKFl1oXNt87U9fPG9K1pSzMT:WfzirnYVPG7gT') >= 50 or \
        ssdeep.compare(elffile.features['lstrfuzzy'], '12:lQn5o+ZirjsgAk3MRW7ll8+XS+wPKs0fI:OndZirjbp5l3Ex0fI') >= 50 or \
        ssdeep.compare(elffile.features['lstrfuzzy'], '12:GXkVn5o+ZirjsgAk3MRW7ll8+XS+wPKs04:GXAndZirjbp5l3Ex04') >= 50
#! /usr/bin/env python3
# ファイルを投げて比較
# Usage: ./file_ssdeep_compare.py FILE1 FILE2
import ssdeep
import sys
print(
    ssdeep.compare(ssdeep.hash_from_file(sys.argv[1]),
                   ssdeep.hash_from_file(sys.argv[2])))
Example #48
0
 def decide(self, data):
     feature = self.feature.get_feature(data)
     if not feature:
         return False
     return ssdeep.compare(feature, self.fuzzyhash) > self.threshold
Example #49
0
        exeSql.execute(fetchfromdb)
    except mdb.Error, e:
        print e
        
    dbrecords = exeSql.fetchall()
    
    maxlen, minlen = int(record['len'] * 1.10), int(record['len'] * 0.90)
    count = 0
    
    for d_record in dbrecords:
        if d_record[2] >= minlen and d_record[2] <= maxlen:
            if record['s_id'] == d_record[0]:
                update(record, d_record[0])
            
            else:
                ratio = ssdeep.compare(record['ssdeep'], d_record[1])
                #if ratio >= 85:
                if (int(record['len']) <= 150 and ratio >=95) or (int(record['len']) > 150 and ratio >= 80):
                    update(record, d_record[0])
                else:
                    count += 1
        else:
            count += 1
    
    if count == len(dbrecords):
        insert(record)

def insert(record):
        
    # Inserting data in main db
    print "Inserting new spam!"
Example #50
0
            # Open selected dico range
            opened_dico = []
            for dico_name in dico_range_list:
                opened_dico.append([dico_name, dico_redis[dico_name]])

            # retrieve hash from paste
            paste_hashes = PST._get_p_hash()

            # Go throught the Database of the dico (of the month)
            for curr_dico_name, curr_dico_redis in opened_dico:
                for hash_type, paste_hash in paste_hashes.items():
                    for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type):

                        try:
                            if hash_type == 'ssdeep':
                                percent = 100-ssdeep.compare(dico_hash, paste_hash)
                            else:
                                percent = tlsh.diffxlen(dico_hash, paste_hash)
                                if percent > 100:
                                    percent = 100

                            threshold_duplicate = threshold_set[hash_type]
                            if percent < threshold_duplicate:
                                percent = 100 - percent if hash_type == 'ssdeep' else percent #recovert the correct percent value for ssdeep
                                # Go throught the Database of the dico filter (month)
                                r_serv_dico = dico_redis[curr_dico_name]

                                # index of paste
                                index_current = r_serv_dico.get(dico_hash)
                                index_current = index_current
                                paste_path = r_serv_dico.get(index_current)
Example #51
0
    return status, newhash


if __name__ == "__main__":
    args = docopt(__doc__, version="0.1")

    if args["hash"]:
        status, newhash = fetchpage(args["<url>"])
        print("[ {} ] for {}: {}".format(status, args["<url>"], newhash))
    elif args["check"]:
        catcherror = 0
        try:
            status, newhash = fetchpage(args["<url>"])
            verdict = "Verdict Never Set."
            color = "green"
            compare = ssdeep.compare(newhash, args["<lasthash>"])
            if compare == 0:
                verdict = "Site Changed Completely."
                color = "magenta"
            elif compare == 100:
                verdict = "Site Unchanged."
                color = "green"
            elif compare < 100 and compare >= 80:
                verdict = "Site Modified Slightly."
                color = "yellow"
            elif compare < 80 and compare >= 50:
                verdict = "Site Modified Significantly."
                color = "red"
            elif compare < 50 and compare > 0:
                verdict = "Site Modified Heavily."
                color = "magenta"
Example #52
0
 def compare(self, h1, h2):
     return ssdeep.compare(str(h1), str(h2))
 def compare(self, other):
     return ssdeep.compare(self.hash_value, other.hash_value)
Example #54
0
def main():
	parser = argparse.ArgumentParser(
	description='''Find similar-looking domains that adversaries can use to attack you.  
	Can detect fraud, phishing attacks and corporate espionage. Useful as an additional 
	source of targeted threat intelligence.''',
	epilog='''Questions? Complaints? You can reach the author at <*****@*****.**>'''
	)

	parser.add_argument('domain', help='domain name to check')
	parser.add_argument('-c', '--csv', action='store_true', help='print output in CSV format')
	parser.add_argument('-r', '--registered', action='store_true', help='show only registered domain names')
	parser.add_argument('-w', '--whois', action='store_true', help='perform lookup for WHOIS creation/modification date (slow)')
	parser.add_argument('-g', '--geoip', action='store_true', help='perform lookup for GeoIP location')
	parser.add_argument('-b', '--banners', action='store_true', help='determine HTTP and SMTP service banners')
	parser.add_argument('-s', '--ssdeep', action='store_true', help='fetch web pages and compare fuzzy hashes to evaluate similarity')

	if len(sys.argv) < 2:
		parser.print_help()
		sys.exit(0)

	global args
	args = parser.parse_args()

	display(ST_BRIGHT + FG_MAGENTA + 
'''     _           _            _     _   
  __| |_ __  ___| |___      _(_)___| |_ 
 / _` | '_ \/ __| __\ \ /\ / / / __| __|
| (_| | | | \__ \ |_ \ V  V /| \__ \ |_ 
 \__,_|_| |_|___/\__| \_/\_/ |_|___/\__| %s

''' % __version__ + FG_RESET)
	
	if not validate_domain(args.domain):
		sys.stderr.write('ERROR: invalid domain name!\n')
		sys.exit(-1)

	domains = fuzz_domain(args.domain.lower())

	if not module_dnspython:
		sys.stderr.write('NOTICE: Missing module: dnspython - DNS features limited!\n')
	if not module_geoip and args.geoip:
		sys.stderr.write('NOTICE: Missing module: GeoIP - geographical location not available!\n')
	if not module_whois and args.whois:
		sys.stderr.write('NOTICE: Missing module: whois - database not accessible!\n')
	if not module_ssdeep and args.ssdeep:
		sys.stderr.write('NOTICE: Missing module: ssdeep - fuzzy hashes not available!\n')
	if not module_requests and args.ssdeep:
		sys.stderr.write('NOTICE: Missing module: Requests - web page downloads not possible!\n')

	if args.ssdeep and module_ssdeep and module_requests:
		display('Fetching web page from: http://' + args.domain.lower() + '/ [following redirects] ... ')
		try:
			req = requests.get('http://' + args.domain.lower(), timeout=2)
		except:
			display('Failed!\n')
			args.ssdeep = False			
			pass
		else:
			display('%d %s (%d bytes)\n' % (req.status_code, req.reason, len(req.text)))
			orig_domain_ssdeep = ssdeep.hash(req.text)

	display('Processing %d domains ' % len(domains))

	signal.signal(signal.SIGINT, sigint_handler)

	total_hits = 0

	for i in range(0, len(domains)):
		if module_dnspython:
			resolv = dns.resolver.Resolver()
			resolv.lifetime = 1
			resolv.timeout = 1

			try:
				ns = resolv.query(domains[i]['domain'], 'NS')
				domains[i]['ns'] = str(ns[0])[:-1].lower()
			except:
				pass

			if 'ns' in domains[i]:
				try:
					ns = resolv.query(domains[i]['domain'], 'A')
					domains[i]['a'] = str(ns[0])
				except:
					pass
	
				try:
					ns = resolv.query(domains[i]['domain'], 'AAAA')
					domains[i]['aaaa'] = str(ns[0])
				except:
					pass

				try:
					mx = resolv.query(domains[i]['domain'], 'MX')
					domains[i]['mx'] = str(mx[0].exchange)[:-1].lower()
				except:
					pass
		else:
			try:
				ip = socket.getaddrinfo(domains[i]['domain'], 80)
			except:
				pass
			else:
				for j in ip:
					if '.' in j[4][0]:
						domains[i]['a'] = j[4][0]
						break
				for j in ip:
					if ':' in j[4][0]:
						domains[i]['aaaa'] = j[4][0]
						break

		if module_whois and args.whois:
			if 'ns' in domains[i] or 'a' in domains[i]:
				try:
					whoisdb = whois.query(domains[i]['domain'])
					domains[i]['created'] = str(whoisdb.creation_date).replace(' ', 'T')
					domains[i]['updated'] = str(whoisdb.last_updated).replace(' ', 'T')
				except:
					pass

		if module_geoip and args.geoip:
			if 'a' in domains[i]:
				gi = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE)
				try:
					country = gi.country_name_by_addr(domains[i]['a'])
				except:
					pass
				else:
					if country:
						domains[i]['country'] = country

		if args.banners:
			if 'a' in domains[i]:
				banner = http_banner(domains[i]['a'], domains[i]['domain'])
				if banner:
					domains[i]['banner-http'] = banner
			if 'mx' in domains[i]:
				banner = smtp_banner(domains[i]['mx'])
				if banner:
					domains[i]['banner-smtp'] = banner

		if module_ssdeep and module_requests and args.ssdeep:
			if 'a' in domains[i]:
				try:
					req = requests.get('http://' + domains[i]['domain'], timeout=1)
					fuzz_domain_ssdeep = ssdeep.hash(req.text)
				except:
					pass
				else:
					domains[i]['ssdeep'] = ssdeep.compare(orig_domain_ssdeep, fuzz_domain_ssdeep)

		if 'a' in domains[i] or 'ns' in domains[i]:
			display(FG_YELLOW + '!' + FG_RESET)
			total_hits += 1
		else:
			display('.')

	display(' %d hit(s)\n\n' % total_hits)

	display_csv('Generator,Domain,A,AAAA,MX,NS,Country,Created,Updated,SSDEEP\n')

	for i in domains:
		info = ''

		if 'a' in i:
			info += i['a']
			if 'country' in i:
				info += FG_CYAN + '/' + i['country'] + FG_RESET
			if 'banner-http' in i:
				info += ' %sHTTP:%s"%s"%s' % (FG_GREEN, FG_CYAN, i['banner-http'], FG_RESET)
		elif 'ns' in i:
			info += '%sNS:%s%s%s' % (FG_GREEN, FG_CYAN, i['ns'], FG_RESET)

		if 'aaaa' in i:
			info += ' ' + i['aaaa']

		if 'mx' in i:
			info += ' %sMX:%s%s%s' % (FG_GREEN, FG_CYAN, i['mx'], FG_RESET)
			if 'banner-smtp' in i:
				info += ' %sSMTP:%s"%s"%s' % (FG_GREEN, FG_CYAN, i['banner-smtp'], FG_RESET)

		if 'created' in i and 'updated' in i and i['created'] == i['updated']:
			info += ' %sCreated/Updated:%s%s%s' % (FG_GREEN, FG_CYAN, i['created'], FG_RESET)
		else:
			if 'created' in i:
				info += ' %sCreated:%s%s%s' % (FG_GREEN, FG_CYAN, i['created'], FG_RESET)
			if 'updated' in i:
				info += ' %sUpdated:%s%s%s' % (FG_GREEN, FG_CYAN, i['updated'], FG_RESET)

		if 'ssdeep' in i:
			if i['ssdeep'] > 0:
				info += ' %sSSDEEP:%s%d%%%s' % (FG_GREEN, FG_CYAN, i['ssdeep'], FG_RESET)

		if not info:
			info = '-'

		if (args.registered and info != '-') or not args.registered:
			display('%s%-15s%s %-15s %s\n' % (FG_BLUE, i['type'], FG_RESET, i['domain'], info))
			display_csv(
			'%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n' % (i.get('type'), i.get('domain'), i.get('a', ''),
			i.get('aaaa', ''), i.get('mx', ''), i.get('ns', ''), i.get('country', ''),
			i.get('created', ''), i.get('updated', ''), str(i.get('ssdeep', '')))
			)

	display(FG_RESET + ST_RESET)

	return 0
            soup = BeautifulSoup(doc2, "lxml")
            doc2 = soup.get_text()
            doc2_hash = ssdeep.hash(doc2)
        except:
            doc2 = rawdoc2
            doc2_hash = ssdeep.hash(doc2)

        entropy1 = H(doc2.replace(' ', ''), range_printable)
        entropy2 = Entropy(doc2)

        doc1TokenList = re.split(r'(\d+|\W+)', doc1)
        doc2TokenList = re.split(r'(\d+|\W+)', doc2)
        kldScore = KLD(doc1TokenList, doc2TokenList)
        kldScore = 0
        simScore = compare(doc1, doc2)
        hashSimScore = ssdeep.compare(doc1_hash, doc2_hash)
        normalizedSimScore = ((90 - simScore) / 90) * 100

        try:
            print('insert into ' + dbname + ' values (NULL,\'' + url +
                  '\',\'' + citedByNode + '\',\'' + initialDate +
                  '\',\'copy\',' + str(count) + ',\'' + htype + '\',\'' +
                  laststatus + '\',\'' + statuschain + '\',\'' + citeDate +
                  '\',' + size + ',\'' + sim + '\',\'' + hash + '\',' +
                  str(normalizedSimScore) + ',' + str(entropy1) + ',' +
                  str(entropy2) + ',' + str(kldScore) + ');')
        except:
            print(' a copy failure occurred ')

        # try:
        #   print ("\"%s\",\"%s\",\"%s\",%f,%f,%f" % (url, original, copy, simScore, hashSimScore, normalizedSimScore))