Esempio n. 1
0
def statNGramFrequency(dir_path, N, class_dir=True, log_dump_path=None):
    def statNGramFrequencyInner(count_, filep_, report_, list_, dict_,
                                **kwargs):
        apis = report_['apis']
        print('# %d' % count_, filep_, 'len=%d' % len(apis), end=' ')

        for i in range(len(apis)):
            if i + N >= len(apis):
                break
            ngram = '/'.join(apis[i:i + N])
            if ngram not in dict_:
                dict_[ngram] = 1
            else:
                dict_[ngram] += 1

            if len(list_) == 0:
                list_.append(1)
            else:
                list_[0] += 1

        return list_, dict_

    def statNGramFrequencyFNcb(reporter_, list_, dict_):
        for k in dict_:
            dict_[k] = dict_[k] / list_[0]
        if log_dump_path is not None:
            dumpJson(dict_, log_dump_path)

    datasetTraverse(dir_path=dir_path,
                    exec_kernel=statNGramFrequencyInner,
                    class_dir=class_dir,
                    final_callback=statNGramFrequencyFNcb)
Esempio n. 2
0
def extractAPISequenceFromRaw(dir_path, dst_path, log_dump_path=None):

    def extractAPISequenceFromRawInner(count_, file_path_, report_, list_, dict_, **kwargs):
        print("# %d"%count_, end=' ')
        new_report = {}
        new_report['sha1'] = report_['target']['file']['sha1']
        new_report['name'] = report_['target']['file']['name']
        new_report['sha256'] = report_['target']['file']['sha256']
        new_report['sha512'] = report_['target']['file']['sha512']
        md5 = new_report['md5'] = report_['target']['file']['md5']

        apis = []
        for process in report_['behavior']['processes']:
            for call in process['calls']:
                apis.append(call['api'])
        new_report['apis'] = apis

        dumpJson(new_report, dst_path+md5+'.json')
        return list_, dict_

    def extractAPISequenceFromRawFNcb(reporter_, list_, dict_):
        if log_dump_path is not None:
            reporter_.dump(log_dump_path)

    datasetTraverse(dir_path,
                    extractAPISequenceFromRawInner,
                    class_dir=False,
                    name_prefix='report',
                    name_suffix='.json',
                    final_callback=extractAPISequenceFromRawFNcb)
Esempio n. 3
0
def convertDir2Image(dir_path, dst_path, width=256):

    def convertDir2ImageInner(count_, filep_, report_, list_, dict_, **kwargs):
        print("# %d"%count_, filep_, end=' ')

        folder_path = dst_path+kwargs["folder"]+'/'
        if not os.path.exists(folder_path):
            os.mkdir(folder_path)

        convert(filep_, folder_path+kwargs['item'], img_width=width)

        return list_, dict_

    datasetTraverse(dir_path=dir_path,
                    exec_kernel=convertDir2ImageInner,
                    class_dir=True,
                    load_func=None)
Esempio n. 4
0
def renamePEbyMD5fromApi(api_dir_path, pe_dir_path):
    def renamePEbyMD5fromApiInner(count_, filep_, report_, list_, dict_,
                                  **kwargs):
        print("# %d" % count_, filep_, end=' ')

        md5 = report_['md5']
        folder = '.'.join(report_['name'].split('.')[:3])
        item = report_['name']

        os.rename(pe_dir_path + folder + '/' + item,
                  pe_dir_path + folder + '/' + md5)

        return list_, dict_

    datasetTraverse(dir_path=api_dir_path,
                    exec_kernel=renamePEbyMD5fromApiInner,
                    class_dir=True)
Esempio n. 5
0
def mapAndExtractTopKNgram(dir_path,
                           ngram_stat_log_path,
                           K,
                           N,
                           class_dir=True,
                           map_dump_path=None):

    ngram_fre = loadJson(ngram_stat_log_path)
    sorted_ngrams = sorted(ngram_fre.items(), key=lambda x: x[1],
                           reverse=True)[:K]
    topk_ngrams = {x[0]: i + 1
                   for i, x in enumerate(sorted_ngrams)}  # 将NGram映射为下标序号
    topk_ngrams['<PAD>'] = 0  # 0为pad

    def mapAndExtractTopKNgramInner(count_, filep_, report_, list_, dict_,
                                    **kwargs):
        print('# %d' % count_, end=' ')
        new_seq = []
        apis = report_['apis']

        for i in range(len(apis)):
            if i + N >= len(apis):
                break
            ngram = '/'.join(apis[i:i + N])
            if ngram in topk_ngrams:
                new_seq.append(topk_ngrams[ngram])

        new_report = {k: v for k, v in report_.items()}
        new_report['apis'] = new_seq

        dumpJson(new_report, filep_)
        return list_, dict_

    datasetTraverse(dir_path=dir_path,
                    exec_kernel=mapAndExtractTopKNgramInner,
                    class_dir=class_dir)

    if map_dump_path is not None:
        dumpJson(topk_ngrams, map_dump_path)
Esempio n. 6
0
def removeAPIRedundancy(dir_path, class_dir=True):
    def removeAPIRedundancyInner(count_, filep_, report_, list_, dict_,
                                 **kwargs):
        print('# %d' % count_, end=' ')

        new_report = {key: val for key, val in report_.items()}
        new_apis = []
        base = 0
        apis = report_['apis']
        while base < len(apis):
            shift = 1
            while base + shift < len(apis) and apis[base +
                                                    shift] == apis[base]:
                shift += 1
            new_apis.append(apis[base])
            base += shift
        new_report['apis'] = new_apis
        dumpJson(new_report, filep_)
        return list_, dict_

    datasetTraverse(dir_path=dir_path,
                    exec_kernel=removeAPIRedundancyInner,
                    class_dir=class_dir)
Esempio n. 7
0
def statExceptionReport(dir_path, class_dir=False,
                        name_prefix=None,
                        exception_call_patience=20,
                        dump_noexp_path=None):

    def statExceptionReportInner(count_, filep_, report_, list_, dict_, **kwargs):
        print('# %d'%count_, filep_, end=' ')

        if len(dict_) == 0:
            dict_ = {
                'noexc': 0,
                'exc': 0,
                'err': 0,
                'exc_list': [],
                'noexc_list': []
            }

        apis = report_['apis']
        for i in range(len(apis)):
            if apis[i] == '__exception__' and i+1 < len(apis):      # 只关注exception出现的位置
                if apis[i+1] == 'NtTerminateProcess' and i+2==len(apis):           # 如果exception发生以后立刻terminate且进程结束,检测成功
                    print('terminate', end=' ')
                elif apis[i+1] == '__exception__':              # 如果连续的exception出现
                    j = 1
                    flag = False

                    while i+j < len(apis):                      # 检测连续的exception是否超过了耐心值
                        if j == exception_call_patience:        # 连续的exception达到了耐心值,检测成功
                            flag = True
                            print('successive exceptions', end=' ')
                            break
                        elif apis[i+j] != '__exception__':
                            break
                        else:
                            j += 1

                    if not flag:
                        continue
                else:               # 其余所有情况都视为检测失败
                    continue

                dict_['exc'] += 1
                dict_['exc_list'].append(filep_)
                print('Exception')
                return list_, dict_
        dict_['noexc'] += 1
        dict_['noexc_list'].append(filep_)
        print('Normal')
        return list_,dict_

    def statExceptionReportFcb(e, list_, dict_):
        dict_['err'] += 1
        print("Error")

    def statExceptionReportFNcb(reporter_, list_, dict_):
        print('*'*50)
        print("Total:", dict_['noexc']+dict_['exc']+dict_['err'])
        print("No Exception:", dict_['noexc'])
        print('Exception:', dict_['exc'])
        print('Error:', dict_['err'])
        print('*' * 50)

        if dump_noexp_path is not None:
            dumpJson({'has_exception': dict_['exc_list'],
                      'no_exception': dict_['noexc_list']},
                     dump_noexp_path)


    datasetTraverse(dir_path=dir_path,
                    exec_kernel=statExceptionReportInner,
                    class_dir=class_dir,
                    name_prefix=name_prefix,
                    success_callback=lambda x,y:None,         # 不做success的默认打印
                    fail_callback=statExceptionReportFcb,
                    final_callback=statExceptionReportFNcb)