Beispiel #1
0
def test_branches(program, df, infile, args):
    # get labels:
    df_l = get_labeled(df)

    _, ext = os.path.splitext(infile)
    tmpfile = 'input' + ext

    df_l_g = None

    work_dir = '/dev/shm/'
    # if program == 'jpeg':
        # work_dir = '/tmp/'

    infile = os.path.abspath(infile)
    fsize = os.path.getsize(infile)

    cwd = os.getcwd()
    with tempfile.TemporaryDirectory(prefix=work_dir) as run_dir:
        os.chdir(run_dir)
        
        # get trace:
        print(infile, tmpfile)

        run(['cp', infile, tmpfile])
        # run([program, "taint", '1000000', tmpfile])

        env = os.environ.copy()
        env['DFSAN_OPTIONS'] = "func_logfile='',always_record_branches=1" # turn off func recording
        print(programs.get_cmd(program, programs.GRAD, tmpfile))
        res = run(programs.get_cmd(program, programs.GRAD, tmpfile), env=env, stdout=DEVNULL)

        if res.returncode:
            print('ERROR RUNNING BASELINE:', res.returncode)
            print(res.stderr.decode('utf-8', 'ignore'))
        # baseline_trace_df = pd.read_csv('branches.csv')

        baseline_trace = pd.read_csv('branches.csv')
        
        result = []
        
        df_l_g = df_l.groupby(['file_id', 'inst_id']).agg(lambda x: tuple(x))

        for (f_id, b_id), row in tqdm(df_l_g.iterrows(), total=df_l_g.shape[0]):
            
            # test_bytes = row.deriv_byte
            # if args.all_bytes:
            test_bytes = list(range(fsize))
            # print(test_bytes, row)
            # print(len(test_bytes), len(fsize))
            
            valid_bytes = test_branch(program, f_id, b_id, baseline_trace, test_bytes, infile)
            result.append(valid_bytes)


        df_l_g['valid_bytes'] = pd.Series(result, df_l_g.index)

    os.chdir(cwd)
    return df_l_g
Beispiel #2
0
def test_branch(program, f_id, b_id, orig_trace, byte_inds, infile):
    changes = [0, 255] + [1,2,4,8,16,32,64,128]# + list(range(1,254))
    valid_byte_inds = []
    
    orig_trace_b = orig_trace[(orig_trace.file_id == f_id) & (orig_trace.inst_id == b_id)]

    _, ext = os.path.splitext(infile)
    tmpfile = 'input' + ext
    # for byte index

    orig_blob_data = None
    with open(infile, 'rb') as tf:
        orig_blob_data = bytearray(tf.read())
 

    for byte_ind in byte_inds:
        # for 0, 255, other changes?
        found = False
        for change in changes:
            # read and modify template, write output
            blob_data = bytearray(orig_blob_data)
            if change in [0, 255]:
                blob_data[byte_ind] = change
            else:
                blob_data[byte_ind] = blob_data[byte_ind] ^ change
            
            with open(tmpfile, 'wb') as outfile:
                outfile.write(blob_data)
            
            # run([program, "taint", '0', tmpfile])

            env = os.environ.copy()
            env['DFSAN_OPTIONS'] = "func_logfile='',always_record_branches=1" # turn off func recording
            run(programs.get_cmd(program, programs.GRAD, tmpfile), env=env, 
                    stdout=DEVNULL, stderr=DEVNULL)
            
            # load branches, check if any branch values changed
            trace = pd.read_csv('branches.csv')
            trace_b = trace[(trace.file_id == f_id) &(trace.inst_id == b_id)]

            for i in range((len(trace_b))):
                if i < len(orig_trace_b):
                    row = trace_b.iloc[i]
                    origrow = orig_trace_b.iloc[i]

                    if (row.lhs_val != origrow.lhs_val or 
                        row.rhs_val != origrow.rhs_val):
                        valid_byte_inds.append(byte_ind)
                        found = True
                        break
            if found:
                break
                
    return valid_byte_inds
Beispiel #3
0
def test_bytes(program, infile, working_dir='/tmp/'):

    cwd = os.getcwd()
    print(infile)
    infile = os.path.abspath(infile)
    print(infile)

    df_actual = pd.DataFrame()

    with tempfile.TemporaryDirectory(prefix=working_dir) as run_dir:
        os.chdir(run_dir)

        _, ext = os.path.splitext(infile)
        tmpfile = 'input' + ext
        
        # get trace:
        run(['cp', infile, tmpfile])
        
        
        result = []
                     
        infile_len = os.path.getsize(infile)
        print('evaluating input bytes for', infile)
        for byte_ind in tqdm(range(infile_len)):
            cmd = programs.get_cmd(program, programs.TAINT, infile, byte_ind)
            run(cmd, stdout=DEVNULL, stderr=DEVNULL)
            baseline_trace = pd.read_csv('branches.csv')
            
            actual_branches = test_byte(program, byte_ind, baseline_trace, infile)
            result.extend(actual_branches)
            
        df_actual = pd.DataFrame(result)
        df_actual.columns = ['deriv_byte', 'file_id', 'inst_id', 'actual']


    os.chdir(cwd)

    return df_actual
Beispiel #4
0
def test_byte(program, byte_ind, orig_trace, infile):
    changes = [0, 255] + [1,2,4,8,16,32,64,128]# + list(range(1,254))
    actual_branches = []
    
    
    test_branches = [ind for ind, _ in orig_trace.groupby(['file_id', 'inst_id']).agg(lambda x:()).iterrows()]
    

    _, ext = os.path.splitext(infile)
    tmpfile = 'input' + ext
    
    for change in changes:
        
        with open(infile, 'rb') as tf:
            blob_data = bytearray(tf.read())
        if change in [0, 255]:
            blob_data[byte_ind] = change
        else:
            blob_data[byte_ind] = blob_data[byte_ind] ^ change

        with open(tmpfile, 'wb') as outfile:
            outfile.write(blob_data)

        # run([program, "taint", '0', tmpfile], stdout=DEVNULL, stderr=DEVNULL)

        env = os.environ.copy()
        env['DFSAN_OPTIONS'] = "func_logfile=''" # turn off func recording
        # print(programs.get_cmd(program, programs.TAINT, byte_ind))
        run(programs.get_cmd(program, programs.TAINT, tmpfile, byte_ind), env=env, 
                stdout=DEVNULL, stderr=DEVNULL)
        


        # load branches, check if any branch values changed
        try:
          trace = pd.read_csv('branches.csv')
        
          result = {}
          
                  
          for (f_id, b_id) in test_branches:
              
              if (f_id, b_id) in actual_branches:
                  continue # already found this branch
                  
              orig_trace_b = get_branch(orig_trace, f_id, b_id)
              trace_b = get_branch(trace, f_id, b_id)
          
              for i in range((len(trace_b))):
                  if i < len(orig_trace_b):
                      row = trace_b.iloc[i]
                      origrow = orig_trace_b.iloc[i]

                      if (row.lhs_val != origrow.lhs_val or 
                          row.rhs_val != origrow.rhs_val):
                          actual_branches.append((f_id, b_id))
                          break
        except Exception as e:
          print('Branch check error!', e)
               
    actual_branches = [(byte_ind, f_id, b_id, 1) for (f_id, b_id) in actual_branches]
    return actual_branches
Beispiel #5
0
def eval_file(program, infile, mode, label=True, opt=True):

    env = os.environ.copy()
    env['GR_MODE_PERF'] = '1'

    if opt:
        env['DFSAN_OPTIONS'] = "reuse_labels=0"


    _, ext = os.path.splitext(infile)

    tmpfile = 'input'+ext
    run(['cp', infile, tmpfile])

    cmd = programs.get_cmd(program, mode, infile, 0)
    if mode in [programs.TAINT, programs.GRAD]:
        if label:
            cmd[-2] = "$i"
        else:
            del cmd[-2]
            del cmd[-2]

    # del cmd[-2]
    # del cmd[-2]
    cmd = " ".join(cmd)

    size = os.path.getsize(infile)
    iters = min(size-1, MAX_ITER)
    print(infile, iters)

    try:
        fullcmd =  "for i in {0.."+str(iters)+"}; do "+cmd+">/dev/null; done;"
        if (iters < 1000):
            fullcmd = "for j in {0..5}; do " + fullcmd + "done;"
            iters = iters*5
            nsamples = 5
        else:
            nsamples = 2
        # fullcmd = cmd
        # fullcmd =  "for i in {0.."+str(iters)+"}; do "+cmd+"; done;"
        print(fullcmd)
        total_time = 0.0

        for i in range(nsamples):
            # prep any cache
            run_res = run(cmd, shell=True, env=env, executable='/bin/bash', capture_output=True)

            start = time.time()
            run_res = run(fullcmd, shell=True, env=env, executable='/bin/bash', capture_output=True)
            elapsed = time.time() - start
            total_time += elapsed

            if run_res.returncode:
                print('Error',str(run_res.returncode)+':', " ".join(programs.get_cmd(program, mode, infile)))
                # print('stdout:', run_res.stdout.decode('utf-8', errors='ignore'))
                print('stderr:', run_res.stderr.decode('utf-8', errors='ignore'))

    except Exception as e:
        print('Error:', " ".join(programs.get_cmd(program, mode, infile)))
        print(e)
        return 0

    # sanity check we're not recording
    # assert(not (os.path.exists('branches.csv')))
    # assert(not (os.path.exists('func_args.csv')))
    meantime = total_time / nsamples * 1000.0 # convert to ms
    per_byte_time = meantime / iters

    return per_byte_time