Exemple #1
0
 def compare(self, hash1, hash2):
     if hash1 == '-' or hash2 == '-':
         return '-'
     distance = tlsh.diffxlen(hash1, hash2)
     #if distance > TLSH_MAX_SIMILARITY:
     #    return '-'
     return distance
Exemple #2
0
 def match_hash(self, signal_str: str) -> t.List[signal_base.SignalMatch]:
     matches = []
     if len(signal_str) == EXPECT_TLSH_HASH_LENGTH:
         for x in TEMP_MATCH_IMPLEMNTATION_CHECK_DB:
             if tlsh.diffxlen(x[0],
                              signal_str) <= TLSH_CONFIDENT_MATCH_THRESHOLD:
                 matches.append(signal_base.SignalMatch(x[1][0], x[1][1]))
     return matches
Exemple #3
0
 def compare_hash(
     cls,
     hash1: str,
     hash2: str,
     distance_threshold: t.Optional[int] = None
 ) -> signal_base.HashComparisonResult:
     if distance_threshold is None:
         distance_threshold = TLSH_CONFIDENT_MATCH_THRESHOLD
     dist = tlsh.diffxlen(hash1, hash2)
     return signal_base.HashComparisonResult.from_dist(
         dist, distance_threshold)
Exemple #4
0
    def TO_OVERWRITE_compute_distance(self, pic1: picture_class.Picture,
                                      pic2: picture_class.Picture):
        dist = None
        if self.conf.ALGO == configuration.ALGO_TYPE.TLSH:
            dist = tlsh.diff(pic1.hash, pic2.hash)
        elif self.conf.ALGO == configuration.ALGO_TYPE.TLSH_NO_LENGTH:
            dist = tlsh.diffxlen(pic1.hash, pic2.hash)
        else:
            raise Exception(
                "Invalid algorithm type for TLSH execution handler during distance computing : "
                + str(self.conf.ALGO.name))

        return dist
Exemple #5
0
def tlsh_score(response, site, alert):
    """
    Caculate TLSH Score.

    :param response: Http response.
    :param site: Site Object.
    :param alert: Alert Integer.
    :return: alert, score
    :rtype: int, int
    """
    fuzzy_hash = tlsh.hash(bytes(response.text, 'utf-8'))
    score = tlsh.diffxlen(site.content_fuzzy_hash, fuzzy_hash)
    if score > 160:
        alert += 4
        Site.objects.filter(pk=site.pk).update(content_fuzzy_hash=fuzzy_hash)
    return alert, score
Exemple #6
0
 def match_hash(self, signal_str: str) -> t.List[signal_base.SignalMatch]:
     matches = []
     try:
         import tlsh
     except:
         warnings.warn(
             "Matching a tlsh hash requires additional libraries already be installed; install threatexchange with the [pdf] extra",
             category=UserWarning,
         )
         return []
     if len(signal_str) == EXPECT_TLSH_HASH_LENGTH:
         for tlsh_hash, signal_attr in self.state.items():
             if (tlsh.diffxlen(tlsh_hash, signal_str) <=
                     TLSH_CONFIDENT_MATCH_THRESHOLD):
                 matches.append(
                     signal_base.SignalMatch(
                         signal_attr.labels,
                         signal_attr.first_descriptor_id))
     return matches
Exemple #7
0
 def compare(self, h1, h2):
     return tlsh.diffxlen(h1, h2)
Exemple #8
0
            for dico_name in dico_range_list:
                opened_dico.append([dico_name, dico_redis[dico_name]])

            # retrieve hash from paste
            paste_hashes = PST._get_p_hash()

            # Go throught the Database of the dico (of the month)
            for curr_dico_name, curr_dico_redis in opened_dico:
                for hash_type, paste_hash in paste_hashes.items():
                    for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type):

                        try:
                            if hash_type == 'ssdeep':
                                percent = 100-ssdeep.compare(dico_hash, paste_hash)
                            else:
                                percent = tlsh.diffxlen(dico_hash, paste_hash)
                                if percent > 100:
                                    percent = 100

                            threshold_duplicate = threshold_set[hash_type]
                            if percent < threshold_duplicate:
                                percent = 100 - percent if hash_type == 'ssdeep' else percent #recovert the correct percent value for ssdeep
                                # Go throught the Database of the dico filter (month)
                                r_serv_dico = dico_redis[curr_dico_name]

                                # index of paste
                                index_current = r_serv_dico.get(dico_hash)
                                index_current = index_current
                                paste_path = r_serv_dico.get(index_current)
                                paste_path = paste_path
                                paste_date = r_serv_dico.get(index_current+'_date')
        print('Across ' + str(txn_count) + ' transactions (occurrences).')
    else:
        print('An error has occurred while summing amounts by account:' + n)
        print(response.status_code)
'''Everything we just did, is now a API Endpoint: pivot/by/btc'''
response = requests.get(
    'https://ransomcoindb.concinnity-risks.com/api/v2/pivot/by/btc/13AM4VW2dhxYgXeQepoHkHSQuy6NgaEb94?currency=usd&limit=1000',
    headers={
        'accept': 'application/json',
        'x-api-key': api_key
    },
)
print('Everything we just did, is now a API Endpoint: pivot/by/btc')
trx_function_data = pd.DataFrame.from_dict(response.json())
print(trx_function_data)
'''Let us go back to TLSH though to show you some magic'''
wcry_bindist = []
for i in df.tlsh:
    for j in df.tlsh:
        if i != j:
            wcry_bindist.append(tlsh.diffxlen(i, j))

print('Let us look for new information with binary distance tricks.')
print('A good threshold for this set of binaries would be: ' +
      str(sum(wcry_bindist) / len(wcry_bindist)))
print('Min distance is: ' + str(min(wcry_bindist)))
print('Max distance is: ' + str(max(wcry_bindist)))
print(
    'Thus further work could identify similar malware based on the TLSH distance, even if it does not have the BTC addresses within it.'
)
Exemple #10
0
            opened_dico = []
            for dico_name in dico_range_list:
                opened_dico.append([dico_name, dico_redis[dico_name]])

            # retrieve hash from paste
            paste_hashes = PST._get_p_hash()

            # Go throught the Database of the dico (of the month)
            for curr_dico_name, curr_dico_redis in opened_dico:
                for hash_type, paste_hash in paste_hashes.iteritems():
                    for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type):
                        try:
                            if hash_type == 'ssdeep':
                                percent = 100-ssdeep.compare(dico_hash, paste_hash)  
                            else:
                                percent = tlsh.diffxlen(dico_hash, paste_hash)

                            threshold_duplicate = threshold_set[hash_type]
                            if percent < threshold_duplicate:
                                percent = 100 - percent if hash_type == 'ssdeep' else percent #recovert the correct percent value for ssdeep
                                # Go throught the Database of the dico filter (month)
                                r_serv_dico = dico_redis[curr_dico_name]

                                # index of paste
                                index_current = r_serv_dico.get(dico_hash)
                                paste_path = r_serv_dico.get(index_current)
                                if paste_path != None:
                                    hash_dico[dico_hash] = (hash_type, paste_path, percent)

                                print '['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + '  and  ' + str(paste_path[44:]) + ' percentage: ' + str(percent)
                        except Exception,e:
Exemple #11
0
def simTLSH(h1,h2):
  return tlsh.diffxlen(h1,h2)
Exemple #12
0
def detector(inputDict, inputRepo):
    componentDB = {}

    componentDB = readComponentDB()

    fres = open(resultPath + "result_" + inputRepo, 'w')
    aveFuncs = getAveFuncs()

    for OSS in componentDB:
        commonFunc = []
        repoName = OSS.split('_sig')[0]
        totOSSFuncs = float(aveFuncs[repoName])
        if totOSSFuncs == 0.0:
            continue
        comOSSFuncs = 0.0
        for hashval in componentDB[OSS]:
            if hashval in inputDict:
                commonFunc.append(hashval)
                comOSSFuncs += 1.0

        if (comOSSFuncs / totOSSFuncs) >= theta:
            verPredictDict = {}
            allVerList, idx2Ver = readAllVers(repoName)

            for eachVersion in allVerList:
                verPredictDict[eachVersion] = 0.0

            weightDict = readWeigts(repoName)

            with open(initialDBPath + OSS, 'r', encoding="UTF-8") as fi:
                jsonLst = json.load(fi)
                for eachHash in jsonLst:
                    hashval = eachHash["hash"]
                    verlist = eachHash["vers"]

                    if hashval in commonFunc:
                        for addedVer in verlist:
                            verPredictDict[
                                idx2Ver[addedVer]] += weightDict[hashval]

            sortedByWeight = sorted(verPredictDict.items(),
                                    key=lambda x: x[1],
                                    reverse=True)
            predictedVer = sortedByWeight[0][0]

            predictOSSDict = {}
            with open(repoFuncPath + repoName + '/fuzzy_' + predictedVer +
                      '.hidx',
                      'r',
                      encoding="UTF-8") as fo:
                body = ''.join(fo.readlines()).strip()
                for eachLine in body.split('\n')[1:]:

                    ohash = eachLine.split('\t')[0]
                    opath = eachLine.split('\t')[1]

                    predictOSSDict[ohash] = opath.split('\t')

            used = 0
            unused = 0
            modified = 0
            strChange = False

            for ohash in predictOSSDict:
                flag = 0

                for thash in inputDict:
                    if ohash == thash:
                        used += 1

                        nflag = 0
                        for opath in predictOSSDict[ohash]:
                            for tpath in inputDict[thash]:
                                if opath in tpath:
                                    nflag = 1

                        if nflag == 0:
                            strChange = True

                        flag = 1

                    else:
                        score = tlsh.diffxlen(ohash, thash)
                        if int(score) <= 30:
                            modified += 1

                        nflag = 0
                        for opath in predictOSSDict[ohash]:
                            for tpath in inputDict[thash]:
                                if opath in tpath:
                                    nflag = 1

                        if nflag == 0:
                            strChange = True

                        flag = 1

                    if flag == 0:
                        unused += 1

            fres.write('\t'.join([
                inputRepo, repoName, predictedVer,
                str(used),
                str(unused),
                str(modified),
                str(strChange)
            ]) + '\n')
    fres.close()