Ejemplo n.º 1
0
def main():
 
    import json
    import ember
    
    from sklearn.preprocessing import RobustScaler
    rs = RobustScaler()
       
    parser = argparse.ArgumentParser()
    parser.add_argument("-v", "--featureversion", type=int, default=2, help="EMBER feature version")
    parser.add_argument("binaries", metavar="BINARIES", type=str, nargs="+", help="PE files to classify")
    args = parser.parse_args()
    #opening the downloaded PE file
    testpe = open(args.binaries[0],'rb').read()
    #Feature extractor class of the ember project 
    extract = ember.PEFeatureExtractor() 
    data = extract.feature_vector(testpe) #vectorizing the extracted features
    scaled_data = rs.fit_transform([data])
    Xdata = np.reshape(scaled_data,(1, 2381))
    Xdata= Xdata.tolist()

    client = boto3.client('runtime.sagemaker',
				region_name='us-east-1',
                              	#enter ids from AWS CLI
				aws_access_key_id='XXXXXXXXXXXXXX', 
				aws_secret_access_key='XXXXXXXXXXXXX',
				aws_session_token='XXXXXXXXXXXXX')
def main():
    prog = "predict_sample"
    descr = "Using the model to predict a single PE's binary."
    parser = argparse.ArgumentParser(prog=prog, description=descr)
    parser.add_argument("-v",
                        "--featureversion",
                        type=int,
                        default=2,
                        help="EMBER feature version")
    parser.add_argument("binaries",
                        metavar="BINARIES",
                        type=str,
                        nargs="+",
                        help="PE files to classify")
    args = parser.parse_args()

    extractor = ember.PEFeatureExtractor(args.featureversion)
    sample_data = open(args.binaries[0], 'rb').read()
    sample_data = extractor.feature_vector(sample_data)
    sample_data = np.array(sample_data, dtype=np.float32)
    sample_data = mms.transform([sample_data])
    sample_data = np.reshape(sample_data, (-1, 1, 2381))
    sample_data = sample_data.tolist()

    client = boto3.client('runtime.sagemaker',
                          region_name='us-east-1',
                          aws_access_key_id='<put access key id here>',
                          aws_secret_access_key='<put secret key here>',
                          aws_session_token='<put token here>')
    response = client.invoke_endpoint(
        EndpointName='sagemaker-tensorflow-2020-04-12-12-12-55-002',
        Body=json.dumps(sample_data))
    response_body = response['Body']
    bbtyes = response_body.read()
    astr = bbtyes.decode("UTF-8")
    d = ast.literal_eval(astr)
    d = d['outputs']['score']['floatVal']

    if d[0] >= .5:
        print("Malicious")
    else:
        print("Benign")
Ejemplo n.º 3
0
def main():
    prog = "python3 extractor_from_bin.py"
    descr = "extract high level features from binary files"
    parser = argparse.ArgumentParser(prog=prog, description=descr)
    # parser.add_argument("-v", "--featureversion", type=int, default=2, help="EMBER feature version")
    parser.add_argument("source_csv",
                        metavar="SOURCE_DIR",
                        type=str,
                        help="Directory with raw features")
    parser.add_argument("dest_json",
                        metavar="DEST_DIR",
                        type=str,
                        default="data.json",
                        help="Directory with raw features")
    # parser.add_argument("--optimize", help="gridsearch to find best parameters", action="store_true")
    args = parser.parse_args()

    E = ember.PEFeatureExtractor()

    with open(args.source_csv, newline='') as f:
        reader = csv.reader(f)
        docs = list(reader)
    f.close()

    # docs = [['00552355331eefcab8898d82c621aec5df0ae25cee09644ce4087da93a4a49f6','1'],['kernel32.dll','0']]

    with open(args.dest_json, 'w') as j_file:
        for path, label in docs:
            with open(path, 'rb') as f:
                bytes_array = np.array(bytearray(f.read()), dtype="uint8")
            f.close()
            raw = E.raw_features(bytes_array, path, label=int(label))

            j_file.write(json.dumps(raw) + "\n")

    j_file.close()
Ejemplo n.º 4
0
import ember
import numpy as np
import lightgbm as lgb
from utils import exe_util

file_data = open("/home/bohan/lief_test2.exe", 'rb').read()
extractor = ember.PEFeatureExtractor()
features = np.array(extractor.feature_vector(file_data), dtype=np.float32)

file_path = '/home/bohan/res/ml_dataset/lynx_benign_exes/ChecksumMultiGuard_MD5_packing.exe'
mrl, bytes = exe_util.get_modifiable_range_in_exports(file_path, functions_cnt=10)
print(mrl)


# model = lgb.Booster(model_file="/home/bohan/res/ml_dataset/ember2018/ember_model_2018.txt")
# vd = "/home/bohan/res/ml_dataset/virusshare/"
# d = open(vd + "VirusShare_06f1c1bc8ad03a43633807618a8e3158", "rb").read()
# pred = ember.predict_sample(model, d)
#
# successful_file_paths = [
#     'VirusShare_3c8c59d25ecb9bd91e7b933113578e40',
#     'VirusShare_3a4fac1796f0816d7567abb9bf0a9440',
#     'VirusShare_01cd58ba6e5f9d1e1f718dfba7478d30',
#     'VirusShare_40fd3647c44239df91fc5d7765dd0d9f',
#     'VirusShare_22fd8d088ef3ccadc6baa44dc8cb7490',
# ]
#
# stubborn_files = [
#     'VirusShare_1e4997bc0fced91b25632c3151f91710',
#     'VirusShare_01dd838da5efd739579f412e4f56b180',
#     'VirusShare_21d3b6c1cd1873add493e0675fbd8220',