def compare(self, hash1, hash2): if hash1 == '-' or hash2 == '-': return '-' distance = tlsh.diffxlen(hash1, hash2) #if distance > TLSH_MAX_SIMILARITY: # return '-' return distance
def match_hash(self, signal_str: str) -> t.List[signal_base.SignalMatch]: matches = [] if len(signal_str) == EXPECT_TLSH_HASH_LENGTH: for x in TEMP_MATCH_IMPLEMNTATION_CHECK_DB: if tlsh.diffxlen(x[0], signal_str) <= TLSH_CONFIDENT_MATCH_THRESHOLD: matches.append(signal_base.SignalMatch(x[1][0], x[1][1])) return matches
def compare_hash( cls, hash1: str, hash2: str, distance_threshold: t.Optional[int] = None ) -> signal_base.HashComparisonResult: if distance_threshold is None: distance_threshold = TLSH_CONFIDENT_MATCH_THRESHOLD dist = tlsh.diffxlen(hash1, hash2) return signal_base.HashComparisonResult.from_dist( dist, distance_threshold)
def TO_OVERWRITE_compute_distance(self, pic1: picture_class.Picture, pic2: picture_class.Picture): dist = None if self.conf.ALGO == configuration.ALGO_TYPE.TLSH: dist = tlsh.diff(pic1.hash, pic2.hash) elif self.conf.ALGO == configuration.ALGO_TYPE.TLSH_NO_LENGTH: dist = tlsh.diffxlen(pic1.hash, pic2.hash) else: raise Exception( "Invalid algorithm type for TLSH execution handler during distance computing : " + str(self.conf.ALGO.name)) return dist
def tlsh_score(response, site, alert): """ Caculate TLSH Score. :param response: Http response. :param site: Site Object. :param alert: Alert Integer. :return: alert, score :rtype: int, int """ fuzzy_hash = tlsh.hash(bytes(response.text, 'utf-8')) score = tlsh.diffxlen(site.content_fuzzy_hash, fuzzy_hash) if score > 160: alert += 4 Site.objects.filter(pk=site.pk).update(content_fuzzy_hash=fuzzy_hash) return alert, score
def match_hash(self, signal_str: str) -> t.List[signal_base.SignalMatch]: matches = [] try: import tlsh except: warnings.warn( "Matching a tlsh hash requires additional libraries already be installed; install threatexchange with the [pdf] extra", category=UserWarning, ) return [] if len(signal_str) == EXPECT_TLSH_HASH_LENGTH: for tlsh_hash, signal_attr in self.state.items(): if (tlsh.diffxlen(tlsh_hash, signal_str) <= TLSH_CONFIDENT_MATCH_THRESHOLD): matches.append( signal_base.SignalMatch( signal_attr.labels, signal_attr.first_descriptor_id)) return matches
def compare(self, h1, h2): return tlsh.diffxlen(h1, h2)
for dico_name in dico_range_list: opened_dico.append([dico_name, dico_redis[dico_name]]) # retrieve hash from paste paste_hashes = PST._get_p_hash() # Go throught the Database of the dico (of the month) for curr_dico_name, curr_dico_redis in opened_dico: for hash_type, paste_hash in paste_hashes.items(): for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type): try: if hash_type == 'ssdeep': percent = 100-ssdeep.compare(dico_hash, paste_hash) else: percent = tlsh.diffxlen(dico_hash, paste_hash) if percent > 100: percent = 100 threshold_duplicate = threshold_set[hash_type] if percent < threshold_duplicate: percent = 100 - percent if hash_type == 'ssdeep' else percent #recovert the correct percent value for ssdeep # Go throught the Database of the dico filter (month) r_serv_dico = dico_redis[curr_dico_name] # index of paste index_current = r_serv_dico.get(dico_hash) index_current = index_current paste_path = r_serv_dico.get(index_current) paste_path = paste_path paste_date = r_serv_dico.get(index_current+'_date')
print('Across ' + str(txn_count) + ' transactions (occurrences).') else: print('An error has occurred while summing amounts by account:' + n) print(response.status_code) '''Everything we just did, is now a API Endpoint: pivot/by/btc''' response = requests.get( 'https://ransomcoindb.concinnity-risks.com/api/v2/pivot/by/btc/13AM4VW2dhxYgXeQepoHkHSQuy6NgaEb94?currency=usd&limit=1000', headers={ 'accept': 'application/json', 'x-api-key': api_key }, ) print('Everything we just did, is now a API Endpoint: pivot/by/btc') trx_function_data = pd.DataFrame.from_dict(response.json()) print(trx_function_data) '''Let us go back to TLSH though to show you some magic''' wcry_bindist = [] for i in df.tlsh: for j in df.tlsh: if i != j: wcry_bindist.append(tlsh.diffxlen(i, j)) print('Let us look for new information with binary distance tricks.') print('A good threshold for this set of binaries would be: ' + str(sum(wcry_bindist) / len(wcry_bindist))) print('Min distance is: ' + str(min(wcry_bindist))) print('Max distance is: ' + str(max(wcry_bindist))) print( 'Thus further work could identify similar malware based on the TLSH distance, even if it does not have the BTC addresses within it.' )
opened_dico = [] for dico_name in dico_range_list: opened_dico.append([dico_name, dico_redis[dico_name]]) # retrieve hash from paste paste_hashes = PST._get_p_hash() # Go throught the Database of the dico (of the month) for curr_dico_name, curr_dico_redis in opened_dico: for hash_type, paste_hash in paste_hashes.iteritems(): for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type): try: if hash_type == 'ssdeep': percent = 100-ssdeep.compare(dico_hash, paste_hash) else: percent = tlsh.diffxlen(dico_hash, paste_hash) threshold_duplicate = threshold_set[hash_type] if percent < threshold_duplicate: percent = 100 - percent if hash_type == 'ssdeep' else percent #recovert the correct percent value for ssdeep # Go throught the Database of the dico filter (month) r_serv_dico = dico_redis[curr_dico_name] # index of paste index_current = r_serv_dico.get(dico_hash) paste_path = r_serv_dico.get(index_current) if paste_path != None: hash_dico[dico_hash] = (hash_type, paste_path, percent) print '['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent) except Exception,e:
def simTLSH(h1,h2): return tlsh.diffxlen(h1,h2)
def detector(inputDict, inputRepo): componentDB = {} componentDB = readComponentDB() fres = open(resultPath + "result_" + inputRepo, 'w') aveFuncs = getAveFuncs() for OSS in componentDB: commonFunc = [] repoName = OSS.split('_sig')[0] totOSSFuncs = float(aveFuncs[repoName]) if totOSSFuncs == 0.0: continue comOSSFuncs = 0.0 for hashval in componentDB[OSS]: if hashval in inputDict: commonFunc.append(hashval) comOSSFuncs += 1.0 if (comOSSFuncs / totOSSFuncs) >= theta: verPredictDict = {} allVerList, idx2Ver = readAllVers(repoName) for eachVersion in allVerList: verPredictDict[eachVersion] = 0.0 weightDict = readWeigts(repoName) with open(initialDBPath + OSS, 'r', encoding="UTF-8") as fi: jsonLst = json.load(fi) for eachHash in jsonLst: hashval = eachHash["hash"] verlist = eachHash["vers"] if hashval in commonFunc: for addedVer in verlist: verPredictDict[ idx2Ver[addedVer]] += weightDict[hashval] sortedByWeight = sorted(verPredictDict.items(), key=lambda x: x[1], reverse=True) predictedVer = sortedByWeight[0][0] predictOSSDict = {} with open(repoFuncPath + repoName + '/fuzzy_' + predictedVer + '.hidx', 'r', encoding="UTF-8") as fo: body = ''.join(fo.readlines()).strip() for eachLine in body.split('\n')[1:]: ohash = eachLine.split('\t')[0] opath = eachLine.split('\t')[1] predictOSSDict[ohash] = opath.split('\t') used = 0 unused = 0 modified = 0 strChange = False for ohash in predictOSSDict: flag = 0 for thash in inputDict: if ohash == thash: used += 1 nflag = 0 for opath in predictOSSDict[ohash]: for tpath in inputDict[thash]: if opath in tpath: nflag = 1 if nflag == 0: strChange = True flag = 1 else: score = tlsh.diffxlen(ohash, thash) if int(score) <= 30: modified += 1 nflag = 0 for opath in predictOSSDict[ohash]: for tpath in inputDict[thash]: if opath in tpath: nflag = 1 if nflag == 0: strChange = True flag = 1 if flag == 0: unused += 1 fres.write('\t'.join([ inputRepo, repoName, predictedVer, str(used), str(unused), str(modified), str(strChange) ]) + '\n') fres.close()