Example #1
0
def process(oid, opts):
    if len(opts['file_contents']) == 0:
        return True
    logger.debug("Processing file %s", oid)
    data = {"data":opts["file_contents"]}
    api.store(name, oid, data, opts)
    return True
Example #2
0
def process(oid, opts):
    logger.debug("Processing file %s", oid)
    import_time = int(time.time())
    import_name = os.path.basename(opts["file_location"]) # strip dir from name
    file_stat   = opts["stat"]
    size        = file_stat["size"]
        
    data = None
    # Get the existing file info - if any
    if api.exists(name, oid, opts):
        data = api.retrieve(name, oid, opts, True)
    
    # If file info doesn't exist create new
    if not data:  
        metadata = {import_time:{import_name:file_stat}}
        data = {"metadata":metadata, "names":set([import_name]), "size":size}
    
    # If data already exists append
    else:
        if "size" not in data: data["size"] = size
        data["metadata"][import_time] = {import_name:file_stat}
        data["names"].add(import_name)
        
    api.store(name, oid, data, opts)
    
    # Add import time tag
    tags = {"import_time":import_time}
    api.apply_tags(oid, tags)

    return True
Example #3
0
def process(oid, opts):
    logger.debug("process()")
    
    insns = api.get_field("disassembly", oid, "insns")

    if insns == None:
        return False

    offsets = insns.keys()
    offsets.sort()
    count = 0
    start_offset = 0
    nop_run = {}
    for offset in offsets:
        if insns[offset]["mnem"] == "nop":
            if count == 0:
                start_offset = offset            
            count += 1
        else:
            if count > 0:
                nop_run[start_offset] = count
                count = 0
                start_offset = 0

    if count > 0:
        nop_run[start_offset] = count
            
    api.store(name, oid, nop_run, opts)
    return True
Example #4
0
def process(oid, opts):
    logger.debug("process()")
    src_type = api.retrieve("src_type", oid)
    source = src_type["source"]
    header_oid = oid
    if source != "files":
        return False
    file_data = api.get_field(source, oid, "data")
    header = api.get_field("object_header", [header_oid], "header")
    
    # do not disassemble under certain conditions
    if not header:
        logging.info("Not processing oid %s: unrecognized file type", oid)
        return False
    else:
        logger.info ("calling (python-based) disassemble_first_run on %s", oid)
        entries = header.get_entries()
        if not entries:
            logger.info("No entry points found for %s", oid)
            return False
        
        entry_address = entries.pop()
        functions = api.retrieve("function_extract", oid)
        if functions and entry_address in functions:
            insns = functions[entry_address]["insns"]        
        else:
            insns = disassemble_entry.disassemble_entry(file_data, header, entry_address, logger)
        data = {"type":src_type["type"], "insns":insns,
                "num_insns":len(insns), "insn_mode":header.insn_mode}
    api.store(name, oid, data, opts)
    return True
Example #5
0
def process(oid, opts):
    logger.debug("process()")
    ifs = 0
    loops = 0
    conditionals = []
    insns = []
    features = {}
    variables = []
    asm = api.retrieve("function_extract", [oid])
    if not asm:
        return False
    basic_blocks = api.retrieve("basic_blocks", [oid])
    for func_addresses, blocks in sorted(basic_blocks.iteritems()):
        features[func_addresses] = {}
        for block in blocks:
            features[func_addresses].update({block["first_insn"]: {"functions": [], "conditional_structure": []}})
    for address, dicts in sorted(asm.iteritems()):
        previousAddr = 0
        nesting = []
        insns = dicts["insns"]
        for line in insns:
            block_address = max([block_index for block_index in features[address] if block_index <= line["addr"]])
            if line["mnem"] == "call":
                analyzeCall(oid, features[address], address, block_address, insns, line)
            # if line['group'] == 'cond':
            #    analyzeConditionals(oid, features[address], block_address, insns, line, nesting)
        conditionals = [instr for instr in insns if instr["group"] == "cond" and correctCMP(instr)]
        loops = len([instr for instr in conditionals if not forwardJump(nextJump(insns, instr["addr"]), instr["addr"])])
        ifs = len([instr for instr in conditionals if forwardJump(nextJump(insns, instr["addr"]), instr["addr"])])
        maths = len([instr for instr in insns if instr["group"] == "arith"])
        features[address].update({"conditionals": len(conditionals), "loops": loops, "ifs": ifs, "math": maths})
    checkDynamicCalls(oid, features)
    api.store(name, oid, features, opts)
    return True
Example #6
0
def process(oid, opts):
    if len(opts['oid_list']) == 0:
        return True
    logger.debug("Processing collection " + str(oid))
    oid_list = list(set(api.expand_oids(opts["oid_list"])))
    data = {"oid_list":oid_list}
    api.store(name, oid, data, opts)
    return True
Example #7
0
def process(oid, opts):
    logger.debug("process()")
    disasm = api.get_field("disassembly", [oid], "insns") 
    if disasm == None: 
        return False
    opcodes = get_opcodes(disasm)
    api.store(name, oid, {"opcodes":opcodes}, opts)
    return True
Example #8
0
def reducer(intermediate_output, opts, jobid):
    logger.debug("reducer()")
    out_histo = defaultdict(int)
    for oid in intermediate_output:
        if oid:
            histo = api.retrieve(name, oid, opts)
            out_histo = merge_histo(histo, out_histo)
    api.store(name, jobid, out_histo, opts)
    return out_histo          
Example #9
0
def process(oid, opts):
    logger.debug("Processing oid %s", oid)
    src_type = api.get_field("src_type", oid, "type")
    if src_type != "ELF":
        return False
    header = api.retrieve("elf_parse", oid)
    if not header:
        return False
    api.store(name, oid, {"header":elf_repr(header)}, opts)
    return True
Example #10
0
def process(oid, opts):
    logger.debug("Processing collection %s", oid)
    data = {"notes":opts["notes"], "num_oids":opts["num_oids"], "name":opts["name"]}
    api.store(name, oid, data, opts)
    
    # Add creation time tag
    tags = {"creation_time":int(time.time())}
    api.apply_tags(oid, tags)
    
    return True
Example #11
0
def process(oid, opts):
    logger.debug("process()")
    
    opcode_histogram = api.retrieve("opcode_histogram", oid, {})
    opcode_ngrams = api.retrieve("opcode_ngrams", oid, {"n":2})
    if not opcode_histogram or not opcode_ngrams:
        return False
    colls = collocations(opcode_histogram, opcode_ngrams)
    api.store(name, oid, colls, opts)
    return True
Example #12
0
def mapper(oid, opts, jobid=False):
    logger.debug("mapper()")
    src = api.source(oid)
    if api.documentation(src)["set"]:
        return None
    if api.exists(name, oid, opts):
        return oid
    out_histo = build_ngrams(api.retrieve(src, oid, opts)["data"], opts["n"])
    api.store(name, oid, out_histo, opts)
    return oid
Example #13
0
def process(oid, opts):
    logger.debug("process()")
    src_type = api.get_field("src_type", oid, "type")
    if src_type != "PE":
        return False
        
    header = api.retrieve("pe_parse", oid)
    if not header:
        return False
    api.store(name, oid, {"header":pe_repr(header)}, opts)
    return True
Example #14
0
def mapper(oid, opts, jobid=False):
    logger.debug("mapper()")
    src = api.source(oid)
    if api.exists(name, oid, opts):
        return oid
    data = api.get_field(src, oid, "data")
    if not data:
        return None
    out_histo = build_histo(data)
    api.store(name, oid, out_histo, opts)
    return oid
Example #15
0
def mapper(oid, opts, jobid=False):
    logger.debug("mapper()")
    src = api.source(oid)
    if api.documentation(src)["set"]:
        return None
    if api.exists(name, oid, opts):
        return oid
    opcodes = api.get_field("opcodes", oid, "opcodes")
    if not opcodes: return None
    out_histo = build_histo(opcodes)
    api.store(name, oid, out_histo, opts)
    return oid
Example #16
0
def process(oid, opts=None):
    logger.debug("process()")
    src = api.source(oid)
    src_type = {"source":src}
    logger.debug("Processing file %s", str(oid))
    if src == "collections":
        src_type["type"] = "collection"
    else:
        src_type["type"] = file_type(api.get_field(src, oid, "data")) 

    api.store(name, oid, src_type, opts)
    return True
Example #17
0
def mapper(oid, opts, jobid=False):
    logger.debug("mapper()")
    src = api.source(oid)
    if api.documentation(src)["set"]:
        return None
    if api.exists(name, oid, opts):
        return oid
    nops = api.retrieve("nops", oid, opts)
    if not nops:
        return None
    out_histo = nop_histo(nops)
    api.store(name, oid, out_histo, opts)
    return oid
Example #18
0
def process(oid, opts):
    logger.debug("process()")
    insns = api.get_field("disassembly", [oid], "insns", {})

    pos = opts["position"]
    
    if insns:
        operands = get_operands (insns, pos)
    else:
        operands = list() # empty list
        
    api.store(name, oid, {"operands":operands}, opts)
    return True
Example #19
0
def process(oid, opts):
    logger.debug("process()")
    functions = api.retrieve("function_extract", oid)
    if not functions:
        return False
    hashes = set()
    for f in functions:
        mnems = "".join([i["mnem"] for i in functions[f]["insns"]])
        mnem_hash = hashlib.sha1(mnems).hexdigest()
        hashes.add(mnem_hash)

    api.store(name, oid, {"hashes": hashes}, opts)
    return True
Example #20
0
def apply_tags(oid_list, new_tags):
    if isinstance(oid_list, list):
        for oid in oid_list:
            apply_tags(oid, new_tags)
    else:
        oid = oid_list
        if not api.exists("tags", oid):
            tags = {}
        else:
            tags = api.retrieve("tags", oid, {}, True)
        for tag in new_tags:
            tags[tag] = new_tags[tag]
        api.store("tags", oid, tags)
Example #21
0
def mapper(oid, opts, jobid=False):
    logger.debug("mapper()")

    if api.exists(name, oid, opts):
        return oid
    functions = api.retrieve("function_extract", oid)
    if not functions:
        return None
    out_histo = defaultdict(int)
    for f in functions:
        l = calls(functions[f])
        out_histo = merge_histo(out_histo, l)
    api.store(name, oid, out_histo, opts)
    return oid
Example #22
0
def mapper(oid, opts, jobid=False):
    logger.debug("mapper()")

    if api.exists(name, oid, opts):
        return oid
    bbs = api.retrieve("basic_blocks", oid)
    if not bbs:
        return None
    out_histo = defaultdict(int)
    for f in bbs:
        for bb in bbs[f]:
            out_histo[bb["num_insns"]] += 1
    api.store(name, oid, out_histo, opts)
    return oid
Example #23
0
def mapper(oid, opts, jobid=False):
    logger.debug("mapper()")

    src = api.source(oid)
    if api.exists(name, oid, opts):
        return oid
    map_imports = api.get_field("map_calls", oid, "system_calls")
    if not map_imports:
        return None
    out_histo = defaultdict(int)
    for addr in map_imports:
        out_histo[map_imports[addr]] = out_histo[map_imports[addr]] + 1
    api.store(name, oid, out_histo, opts)
    return oid
Example #24
0
def process(oid, opts):
    logger.debug("process()")
    src_type = api.get_field("src_type", oid, "type")
    if src_type != "DEX":
        return False
    src = api.source(oid)
    data = api.get_field(src, oid, "data")
    if not data: 
        logger.debug("Not able to process %s",oid)
        return False
    result = parse_dex(data, oid)
    if result:
        api.store(name, oid, result, opts)
        return True
    return False
Example #25
0
def process(oid, opts):
    logger.debug("process()")
    basic_blocks = api.retrieve("basic_blocks", oid)
    functions = api.retrieve("function_extract", oid)
    if not basic_blocks or not functions:
		return False
    hashes = set()        
    for f in functions:
        for b in basic_blocks[f]:
            mnems = "".join( [i["mnem"] for i in functions[f]["insns"] if i["addr"] >= b["first_insn"] and i["addr"] <= b["last_insn"]] )
            mnem_hash = hashlib.sha1(mnems).hexdigest() 
            hashes.add(mnem_hash)
    api.store(name, oid, {"hashes":hashes}, opts)
    return True

            
Example #26
0
def process(oid, opts):
    logger.debug("process()")
    src_type = api.get_field("src_type", oid, "type")
    if src_type != "PE":
        return False
    file_meta = api.retrieve('pe_parse', oid)
    if not file_meta: 
        logger.debug("Not able to process %s",oid)
	return False
    data = api.get_field("files", oid, "data")
    if not data:
        return False
    result = detect_packer(file_meta, data)
    if result:
        api.store(name, oid, result, opts)
        return True
    return False
Example #27
0
def process(oid, opts):
    logger.debug("process()")

    data = api.get_field(api.source(oid), oid, "data")
    if not data:
        return False
    
    strings = {}
    offset = 0
    for c in data:
        i = ord(c)
        if i >= 32 and i <= 126:
            strings[offset] = c
        offset += 1
    
    api.store(name, oid, strings, opts)
    return True
Example #28
0
def process(oid, opts):
    logger.debug("process()")
    src_type = api.retrieve("src_type", oid, {})
    if src_type["type"] != "MACHO" and src_type["type"] != "OSX Universal Binary":
        return False
    
    src = api.source(oid)
    data = api.get_field(src, oid, "data", {})
    if not data: 
        logger.debug("Not able to process %s",oid)
        return False
        
    result = process_macho(data, oid)
    if result:
        api.store(name, oid, result, opts)
        return True
        
    return False
Example #29
0
def process(oid, opts):
    logger.debug("process()")
    src_type = api.retrieve("src_type", oid, {})
    if src_type["type"] != "ELF":
        return False
    
    src = api.source(oid)
    data = api.get_field(src, oid, "data", {})
    if not data: 
        logger.debug("Not able to process %s",oid)
        return False
        
    result = parse_elf(data, oid)
    if result:
        api.store(name, oid, result, opts)
        return True
        
    return False
Example #30
0
def process(oid, opts):
    logger.debug("process()")
    functions = api.retrieve("function_extract", oid)
    if not functions:
		return False
    
    header = api.get_field("object_header", oid, "header")
    if not header:
		return False

    data = api.get_field(api.source(oid), oid, "data")
    if not data:
        return False
    
    g = build_basic_blocks(functions, header, data)
    
    api.store(name, oid, g, opts)
    return True