コード例 #1
0
def abstract_code(code, build_dir=None, build_src_path=None):
    try:
        c_tokenizer = CTokenizer(build_dir=build_dir, build_src_path=build_src_path)
        tokens = c_tokenizer.tokenize(code)
    except Exception:
        c_tokenizer = CTokenizer()
        tokens = c_tokenizer.tokenize(code)
    return c_tokenizer.detokenize(tokens)
コード例 #2
0

def abstract_code(code, build_dir=None, build_src_path=None):
    try:
        c_tokenizer = CTokenizer(build_dir=build_dir,
                                 build_src_path=build_src_path)
        tokens = c_tokenizer.tokenize(code)
    except Exception:
        c_tokenizer = CTokenizer()
        tokens = c_tokenizer.tokenize(code)
    return c_tokenizer.detokenize(tokens)


#INPUT_DIR = "/dev/shm/output/abstracted_decompiled_code"
#OUTPUT_DIR = "/dev/shm/output/abstracted_base64_decompiled_code"
INPUT_DIR = "/dev/shm/output/abstracted_src_code"
OUTPUT_DIR = "/dev/shm/output/abstracted_base64_src_code"
os.system("mkdir -p %s" % OUTPUT_DIR)
for fname in tqdm.tqdm(os.listdir(INPUT_DIR), smoothing=0, dynamic_ncols=True):
    fpath = os.path.join(INPUT_DIR, fname)
    fid = int(fname.split(".")[0])
    with open(fpath) as f:
        code = f.read()
    c_tokenizer = CTokenizer()
    tokens = c_tokenizer.tokenize(code)
    encoded_tokens = [base64.b64encode(x.encode()).decode() for x in tokens]

    output_fpath = os.path.join(OUTPUT_DIR, fname)
    with open(output_fpath, 'w') as f:
        f.write(" ".join(encoded_tokens))
コード例 #3
0
                        default='',
                        help='The binary to tokenize.')
    parser.add_argument('-n',
                        '--func-name',
                        default='',
                        help='The function name in the binary to tokenize.')
    args = parser.parse_args()

    sanitize_input(args)

    # choose a proper tokenizer
    if args.input_file:
        with io.open(args.input_file, encoding='utf-8') as f:
            source = f.read()

        tokenizer = CTokenizer(build_dir=args.build_dir,
                               build_src_path=args.build_src_path)
        tokens = tokenizer.tokenize(source)
        print(source)
        print("------")
    elif args.input_bin:
        tokenizer = IDATokenizer()
        tokens = tokenizer.tokenize(args.input_bin, args.func_name)
    else:
        raise

    print(tokens)
    print("------")

    new_code = tokenizer.detokenize(tokens)
    print(new_code)
コード例 #4
0
#INPUT_DIR = "/dev/shm/output/abstracted_decompiled_code"
#OUTPUT_DIR = "/dev/shm/output/abstracted_base64_decompiled_code"


INPUT_DIR_BASE = "./full_base64"
OUTPUT_DIR_BASE = "./full_base64_decoded"
for sub_dir in ["sinput", "abstracted_base64_src_code",  "result"]:
    input_dir = os.path.join(INPUT_DIR_BASE, sub_dir)
    output_dir = os.path.join(OUTPUT_DIR_BASE, sub_dir)
    os.system("mkdir -p %s" % input_dir)
    os.system("mkdir -p %s" % output_dir)
    for fname in tqdm.tqdm(os.listdir(input_dir), smoothing=0, dynamic_ncols=True):
        fpath = os.path.join(input_dir, fname)
        fid = int(fname.split(".")[0])
        with open(fpath) as f:
            code = f.read()
    
        tokens = []
        for x in code.split():
            try:
                y = base64.b64decode(x).decode('utf8', 'ignore')
            except Exception:
                y = '<unk>'
            tokens.append(y)
        c_tokenizer = CTokenizer()
        output_code = c_tokenizer.detokenize(tokens)
    
        output_fpath = os.path.join(output_dir, fname)
        with open(output_fpath, 'w') as f:
            f.write(output_code)