Ejemplo n.º 1
0
def extract_file_strings(buf, **kwargs):
    """
    extract ASCII and UTF-16 LE strings from file
    """
    for s in capa.features.extractors.strings.extract_ascii_strings(buf):
        yield String(s.s), s.offset

    for s in capa.features.extractors.strings.extract_unicode_strings(buf):
        yield String(s.s), s.offset
Ejemplo n.º 2
0
def extract_file_strings(pe, file_path):
    """
    extract ASCII and UTF-16 LE strings from file
    """
    with open(file_path, "rb") as f:
        b = f.read()

    for s in capa.features.extractors.strings.extract_ascii_strings(b):
        yield String(s.s), s.offset

    for s in capa.features.extractors.strings.extract_unicode_strings(b):
        yield String(s.s), s.offset
Ejemplo n.º 3
0
def test_rule_yaml_count_string():
    rule = textwrap.dedent(
        """
        rule:
            meta:
                name: test rule
            features:
                - count(string(foo)): 2
        """
    )
    r = capa.rules.Rule.from_yaml(rule)
    assert r.evaluate({String("foo"): {}}) == False
    assert r.evaluate({String("foo"): {1}}) == False
    assert r.evaluate({String("foo"): {1, 2}}) == True
    assert r.evaluate({String("foo"): {1, 2, 3}}) == False
Ejemplo n.º 4
0
def test_explicit_string_values_int():
    rule = textwrap.dedent(
        """
        rule:
            meta:
                name: test rule
            features:
                - or:
                    - string: "123"
                    - string: "0x123"
        """
    )
    r = capa.rules.Rule.from_yaml(rule)
    children = list(r.statement.get_children())
    assert (String("123") in children) == True
    assert (String("0x123") in children) == True
Ejemplo n.º 5
0
Archivo: insn.py Proyecto: H1d3r/capa
def extract_insn_string_features(f, bb, insn):
    """parse string features from the given instruction."""
    # example:
    #
    #     push    offset aAcr     ; "ACR  > "

    for oper in insn.opers:
        if isinstance(oper, envi.archs.i386.disasm.i386ImmOper):
            v = oper.getOperValue(oper)
        elif isinstance(oper, envi.archs.i386.disasm.i386ImmMemOper):
            # like 0x10056CB4 in `lea eax, dword [0x10056CB4]`
            v = oper.imm
        elif isinstance(oper, envi.archs.i386.disasm.i386SibOper):
            # like 0x401000 in `mov eax, 0x401000[2 * ebx]`
            v = oper.imm
        elif isinstance(oper, envi.archs.amd64.disasm.Amd64RipRelOper):
            v = oper.getOperAddr(insn)
        else:
            continue

        for v in derefs(f.vw, v):
            try:
                s = read_string(f.vw, v)
            except ValueError:
                continue
            else:
                yield String(s.rstrip("\x00")), insn.va
Ejemplo n.º 6
0
def test_string_values_special_characters():
    rule = textwrap.dedent(
        """
        rule:
            meta:
                name: test rule
            features:
                - or:
                    - string: "hello\\r\\nworld"
                    - string: "bye\\nbye"
                      description: "test description"
        """
    )
    r = capa.rules.Rule.from_yaml(rule)
    children = list(r.statement.get_children())
    assert (String("hello\r\nworld") in children) == True
    assert (String("bye\nbye") in children) == True
Ejemplo n.º 7
0
def extract_file_strings():
    """extract ASCII and UTF-16 LE strings

    IDA must load resource sections for this to be complete
        - '-R' from console
        - Check 'Load resource sections' when opening binary in IDA manually
    """
    for seg in capa.features.extractors.ida.helpers.get_segments():
        seg_buff = capa.features.extractors.ida.helpers.get_segment_buffer(seg)

        for s in capa.features.extractors.strings.extract_ascii_strings(
                seg_buff):
            yield String(s.s), (seg.start_ea + s.offset)

        for s in capa.features.extractors.strings.extract_unicode_strings(
                seg_buff):
            yield String(s.s), (seg.start_ea + s.offset)
Ejemplo n.º 8
0
def extract_insn_string_features(f, bb, insn):
    """parse string features from the given instruction."""
    # example:
    #
    #     push    offset aAcr     ; "ACR  > "
    for data_ref in insn.getDataRefs():
        for v in derefs(f.smda_report, data_ref):
            string_read = read_string(f.smda_report, v)
            if string_read:
                yield String(string_read.rstrip("\x00")), insn.offset
Ejemplo n.º 9
0
def extract_insn_string_features(
        f: CilMethodBody, bb: CilMethodBody,
        insn: Instruction) -> Iterator[Tuple[String, int]]:
    """parse instruction string features"""
    if not insn.is_ldstr():
        return

    if not isinstance(insn.operand, StringToken):
        return

    user_string: Optional[str] = read_dotnet_user_string(
        f.ctx["pe"], insn.operand)
    if user_string is None:
        return

    yield String(user_string), insn.offset
Ejemplo n.º 10
0
def extract_insn_string_features(f, bb, insn):
    """parse instruction string features

    args:
        f (IDA func_t)
        bb (IDA BasicBlock)
        insn (IDA insn_t)

    example:
        push offset aAcr     ; "ACR  > "
    """
    ref = capa.features.extractors.ida.helpers.find_data_reference_from_insn(insn)
    if ref != insn.ea:
        found = capa.features.extractors.ida.helpers.find_string_at(ref)
        if found:
            yield String(found), insn.ea