Ejemplo n.º 1
0
def do_stuff(data, input_text):
    names = tuple(sorted([(file_byte_index(key), key) for key in data.keys()]))
    if len(names) == 0:
        return tuple()

    assert [name[0] for name in names] == range(len(names))
    contexts = []
    text_compartments = []

    cur_start = 0
    for i in range(1, len(names) - 1):
        before = names[i - 1][1]
        current = names[i][1]
        after = names[i + 1][1]

        pred_sim = data[before][current]['similarity']
        self_sim = data[current][current]['similarity']
        succ_sim = data[current][after]['similarity']

        should_be_explored = self_sim > succ_sim
        more_interesting_than_pred = self_sim > data[before][before]['similarity']
        more_interesting_than_succ = self_sim > data[before][before]['similarity']

        unique_checks = self_sim > max(pred_sim, succ_sim)
        seperator = succ_sim == pred_sim and unique_checks
        poi = unique_checks and pred_sim != succ_sim

        compartment_start = pred_sim < succ_sim
        #compartment_start2 = data[before][before][0] < data[current][current]['similarity']
        #compartment_start3 = data[before][before][0] != data[current][current]['similarity']
        #compartment_start4 = data[before][before][0] > data[current][current]['similarity']
        #compartment_start4 = data[current][current][0] > data[after][after]['similarity']
        compartment_end = succ_sim < self_sim

        context = {'val': input_text[i:i+1],
                   'pred': pred_sim,
                   'self': self_sim,
                   'succ': succ_sim,
                   'compart_start': 1 if compartment_start else 0,
                   'sep': 1 if seperator else 0,
                   }

        contexts.append(context)

    return tuple(contexts), tuple(text_compartments)
Ejemplo n.º 2
0
def dump_table(data, input_text):
    def print_entries(entries, fmt = '{:>4}'):
        print ', '.join([fmt.format(e) for e in entries])

    print_entries([''] * 3 + ['{:02x}'.format(ord(c)) for c in  input_text])
    print_entries([''] * 3 + [repr(c) for c in input_text])
    print

    for one in sorted(data.keys(), key=file_byte_index):
        entries = []
        input_char = input_text[file_byte_index(one)]
        entries.append('{}'.format(repr(input_char)))
        entries.append('{:02x}'.format(ord(input_char)))
        entries.append('')
        for two in sorted(data[one].keys(), key=file_byte_index):
            val = data[one][two][0]
            entries.append(val)

        print_entries(entries)
Ejemplo n.º 3
0
def make_unique_compartment_constraint_sequence(var_constraints, start_inc,
                                                end_exc):
    comp_const_seq = []
    for var in sorted(var_constraints.keys()):
        if not (start_inc <= file_byte_index(var) < end_exc):
            continue

        prev = comp_const_seq[-1] if len(comp_const_seq) != 0 else None
        # No previous entry to compare against, unique!
        if prev is not None and len(prev) == len(var_constraints[var]):

            unique_const = False
            for a, b in zip(prev, var_constraints[var]):
                if hash(a) != hash(b):
                    unique_const = True

            if not unique_const:
                continue

        comp_const_seq.append(tuple(var_constraints[var]))

    return tuple(comp_const_seq)
Ejemplo n.º 4
0
def dump_compartments(data, input_text):
    names = tuple(sorted([(file_byte_index(key), key) for key in data.keys()]))
    if len(names) == 0:
        return tuple()

    assert [name[0] for name in names] == range(len(names))

    #compartment_start =
    #for i in range(1, )

    compartments = {}
    for i_one, one in names:
        zipped = []
        for i_two, two in names:
            if i_two < i_one:
                continue

        partial_compartments = tuple(split(lambda a, b: a[2] < b[2], zipped))
        compartments.append(tuple(partial_compartments))

    compartments = list(set(compartments))

    return compartments
Ejemplo n.º 5
0
def extract_input_constraint_similarities(path, input_text):
    constraints = defaultdict(set)

    vars = set()
    for c in path.guards:
        if c.is_true():
            continue

        if not is_reasonable_constraint(c):
            print "IGNORING unreasonable constraint of depth {}".format(
                c.depth)
            continue

        print "HANDLING reasonable constraint of depth {}: {}".format(
            c.depth, c)

        vars.update(c.variables)
        for var in c.variables:
            mapping, dummied_constraints = dummy_out_vars((c, ))
            constraints[var].update(dummied_constraints)

    if len(vars) == 0:
        return constraints, []

    var_names = [None] * len(input_text)

    for v in vars:
        var_names[file_byte_index(v)] = v
    #assert all(file_byte_index(vars[i]) == i for i in range(len(vars)))

    similarity = tuple(
        tuple(
            len(constraints[one].intersection(constraints[two]))
            for two in var_names) for one in var_names)
    if None in constraints:
        del constraints[None]
    return constraints, similarity
Ejemplo n.º 6
0
def extract_input_characteristics(proj, final_path, input_text,
                                  var_before_touch_state_map,
                                  string_classification_data):
    var_constraints, similarities, comp_descriptors = extract_input_compartment_descriptors(
        final_path, input_text)
    comp_set = set([c['const_seq'] for c in comp_descriptors])
    print len(comp_set), '/', len(comp_descriptors)

    other_opts = {}
    for comp in comp_descriptors:
        if comp['const_seq'] in other_opts and other_opts[
                comp['const_seq']] is not None:
            continue

        var_idx_to_state_mapping = {
            file_byte_index(k): v
            for k, v in var_before_touch_state_map.iteritems()
        }

        #import ipdb; ipdb.set_trace()

        by_runs = defaultdict(list)
        for var in range(comp['start'], comp['end']):
            if var in var_idx_to_state_mapping:
                for r, g, s in var_idx_to_state_mapping[var]:
                    by_runs[r].append((g, s))

        first_idx = -1
        for idx in range(comp['start'], comp['end']):
            if idx in var_idx_to_state_mapping:
                first_idx = idx
                break

        if first_idx == -1:
            other_opts[comp['const_seq']] = None
            continue  # We can't do anything with this compartment!

        interesting_guard_creators = var_idx_to_state_mapping[first_idx]

        run, guard, state = interesting_guard_creators[-1]
        removed_guards = []
        for r in sorted(by_runs.keys(), reverse=True):
            if r > run:
                continue

            still_matching = False
            for g, s in by_runs[r]:
                #if any(comp['start'] <= file_byte_index(v) < comp['end'] for v in g.variables):
                if any(comp['start'] == file_byte_index(v)
                       for v in g.variables):
                    # still constraining our input, we're good!
                    run, guard, state = r, g, s
                    still_matching = True
                    break

            if not still_matching:
                break

            removed_guards.append(guard)

        #import ipdb; ipdb.set_trace()

        #print "Finding other options when ignoring {}".format(removed_guards)
        other_string_options = extract_other_options(
            proj, state, comp, input_text, string_classification_data)
        other_opts[comp['const_seq']] = other_string_options

    for comp in comp_descriptors:
        comp['other_options'] = other_opts[comp['const_seq']]

    return var_constraints, similarities, comp_descriptors, other_opts
Ejemplo n.º 7
0
    def is_interesting_guard(g):
        if g is None or g.is_true():
            return False

        return any(comp_descriptor['start'] <= file_byte_index(v) <
                   comp_descriptor['end'] for v in g.variables)
Ejemplo n.º 8
0
def get_chr_idx_to_constraints_mapping(guards):
    chr_idx_to_constraints = defaultdict(list)
    for c in guards:
        for var in c.variables:
            chr_idx_to_constraints[file_byte_index(var)].append(c)
    return chr_idx_to_constraints