Ejemplo n.º 1
0
def GetIDForHosts(fileFullPathList, DB):
    # Returns: (filePath, instanceID, hostname, hostID, ingest_type)
    hostsTest = {}
    hostsProcess = []
    progress_total = 0
    progress_current = 0

    # Determine plugin_type and hostname
    for file_name_fullpath in fileFullPathList:
        hostName = None
        ingest_type = None
        loop_counter = 0
        while True:
            if loop_counter > len(ingest_plugins_types_stack):
                # We ignore empty file from hosts with no appcompat data
                # todo: Omit suppression on verbose mode
                tmp_file_size = file_size(file_name_fullpath)
                if tmp_file_size > 500:
                    logger.warning("No ingest plugin could process: %s (skipping file) [size: %d]" %
                                   (ntpath.basename(file_name_fullpath), tmp_file_size))
                break
            ingest_type = ingest_plugins_types_stack[0]
            if ingest_plugins[ingest_type].matchFileNameFilter(file_name_fullpath):
                # Check magic:
                try:
                    magic_check = ingest_plugins[ingest_type].checkMagic(file_name_fullpath)
                    if isinstance(magic_check, tuple):
                        logger.error("Report bug")
                    else: magic_check_res = magic_check
                    if magic_check_res:
                        # Magic OK, go with this plugin
                        hostName = ingest_plugins[ingest_type].getHostName(file_name_fullpath)
                        break
                except Exception as e:
                    logger.exception("Error processing: %s (%s)" % (file_name_fullpath, str(e)))
            # Emulate stack with list to minimize internal looping (place last used plugin at the top)
            ingest_plugins_types_stack.remove(ingest_type)
            ingest_plugins_types_stack.insert(len(ingest_plugins_types_stack), ingest_type)
            loop_counter += 1
        if hostName is not None:
            if hostName in hostsTest:
                hostsTest[hostName].append((file_name_fullpath, ingest_plugins[ingest_type]))
            else:
                hostsTest[hostName] = []
                hostsTest[hostName].append((file_name_fullpath, ingest_plugins[ingest_type]))

    progress_total = len(hostsTest.keys())
    # Iterate over hosts. If host exists in DB grab rowID else create and grab rowID.
    conn = DB.appGetConn()
    with closing(conn.cursor()) as c:
        for hostName in hostsTest.keys():
            assert(hostName)
            logger.debug("Processing host: %s" % hostName)
            # Check if Host exists
            c.execute("SELECT count(*) FROM Hosts WHERE HostName = '%s'" % hostName)
            data = c.fetchone()[0]
            if (data != 0):
                # Host already has at least one instance in the DB
                c.execute("SELECT HostID, Instances FROM Hosts WHERE HostName = '%s'" % hostName)
                data = c.fetchone()
                tmpHostID = data[0]
                tmpInstances = eval(data[1])
                for (file_fullpath, ingest_plugin) in hostsTest[hostName]:
                    logger.debug("Grabbing instanceID from file: %s" % file_fullpath)
                    try:
                        instance_ID = CalculateInstanceID(file_fullpath, ingest_plugin)
                    except Exception:
                        logger.error("Error parsing: %s (skipping)" % file_fullpath)
                        traceback.print_exc(file=sys.stdout)
                    else:
                        if str(instance_ID) not in tmpInstances:
                            tmpInstances.append(str(instance_ID))
                            hostsProcess.append((file_fullpath, instance_ID, hostName, tmpHostID, ingest_plugin))
                        else:
                            logger.debug("Duplicate host and instance found: %s" %hostName)
                            continue
                # Save updated Instances list
                c.execute("UPDATE Hosts SET Instances = %s, InstancesCounter = %d WHERE HostName = '%s'" % ('"' + str(repr(tmpInstances)) + '"', len(tmpInstances), hostName))
            else:
                # Host does not exist. Add instance and grab the host ID.
                tmpInstances = []
                newInstances = []
                for (file_fullpath, ingest_plugin) in hostsTest[hostName]:
                    try:
                        instance_ID = CalculateInstanceID(file_fullpath, ingest_plugin)
                    except Exception:
                        logger.error("Error parsing: %s (skipping)" % file_fullpath)
                        traceback.print_exc(file=sys.stdout)
                    else:
                        if str(instance_ID) not in tmpInstances:
                            tmpInstances.append(str(instance_ID))
                            newInstances.append((file_fullpath, instance_ID, ingest_plugin))

                c.execute("INSERT INTO Hosts VALUES (NULL,%s,%s,%d,%d,%d)" % ('"' + hostName + '"', '"' + str(repr(tmpInstances)) + '"', len(tmpInstances), 0, 0))
                tmpHostID = c.lastrowid
                for (file_fullpath, instance_ID, ingest_plugin) in newInstances:
                    # todo: Do we want/need each row to track from what instance it came?
                    hostsProcess.append((file_fullpath, instance_ID, hostName, tmpHostID, ingest_plugin))
            # Update progress
            progress_current += 1
            if settings.logger_getDebugMode():
                status_extra_data = " [RAM: %d%%]" % psutil_phymem_usage()
            else: status_extra_data = ""
            # logger.debug("Pre-process new hosts/instances%s" % status_extra_data)
            logger.info(update_progress(min(1, float(progress_current) / float(progress_total)), "Calculate ID's for new hosts/instances%s" % status_extra_data, True))
        conn.commit()

    # Return hosts to be processed
    return hostsProcess
Ejemplo n.º 2
0
def appSearchMP(dbfilenameFullPath, searchType, search_space, options):
    (outputFile, maxCores) = (options.outputFile, options.maxCores)
    known_bad_data = None
    # Start timer
    t0 = time.time()

    DB = appDB.DBClass(dbfilenameFullPath, True, settings.__version__)
    conn = DB.appConnectDB()

    # If possible use the available indexes
    if hasattr(
            options, 'field_name'
    ) and searchType == 'LITERAL' and options.searchLiteral[0][0] not in [
            '=', '>', '<'
    ] and DB.appIndexExistsDB(options.field_name):
        num_hits = namedtuple('hits', 'value')
        num_hits_suppressed = namedtuple('hits', 'value')
        (num_hits.value, num_hits_suppressed.value,
         results) = runIndexedSearch(dbfilenameFullPath, search_space, options)

    else:
        # Get total number of entries to search
        entriesCount = DB.CountEntries()
        logger.debug("Total entries in search space: %d" % entriesCount)

        # Pre-load known_bad if required
        if searchType == 'KNOWNBAD':
            known_bad_data = LoadRegexBulkSearch(options.knownbad_file)

        # Establish communication queues
        tasks = multiprocessing.JoinableQueue()
        resultsProducers = multiprocessing.Queue()
        resultsConsumers = multiprocessing.Queue()
        hitHistogram_queue = multiprocessing.Queue()

        # Start producers/consumers
        num_consumers = 1
        num_producers = max(1, maxCores - 1)

        # Prep lock for progress update Producers
        progProducers = multiprocessing.Value('i', 0)
        # Prep lock for progress update Consumers
        progConsumers = multiprocessing.Value('i', 0)
        # Prep Consumers return values
        num_hits = multiprocessing.Value('i', 0)
        num_hits_suppressed = multiprocessing.Value('i', 0)

        logger.debug(
            'Using %d cores for searching / %d cores for dumping results' %
            (num_producers, num_consumers))

        # Queue tasks for Producers
        # Limit rowsPerJob to constrain memory use and ensure reasonable progress updates
        rowsPerJob = min((entriesCount / 8), 5000)
        logger.debug("RowsPerJob: %d" % rowsPerJob)
        num_tasks = 0
        for startingRowID in range(0, entriesCount - rowsPerJob, rowsPerJob):
            tasks.put(Task(startingRowID, rowsPerJob - 1))
            logger.debug(
                "Creating search job %d: [%d - %d]" %
                (num_tasks, startingRowID, startingRowID + rowsPerJob - 1))
            num_tasks += 1
        logger.debug("Creating search job %d: [%d - %d]" %
                     (num_tasks, num_tasks * (rowsPerJob),
                      ((num_tasks * rowsPerJob) +
                       (entriesCount - (num_tasks * (rowsPerJob) - 1)))))
        # Special consideration for the last one:
        tasks.put(
            Task(num_tasks * (rowsPerJob),
                 (entriesCount - ((num_tasks * rowsPerJob) - 1))))
        logger.debug("Number of tasks: %d" % num_tasks)

        # Add a poison pill for each producer
        for i in xrange(num_producers):
            tasks.put(None)

        # Start producer threads
        producers = [Producer(tasks, resultsProducers, dbfilenameFullPath, progProducers, num_consumers, \
                              searchType, search_space, options, num_hits, known_bad_data) for i in xrange(num_producers)]
        for producer in producers:
            producer.daemon = True  # Remove for debugging
            producer.start()

        # Start consumer threads
        consumers = [Consumer(resultsProducers, resultsConsumers, progConsumers, num_producers, outputFile, \
                              dbfilenameFullPath, searchType, search_space, options, num_hits, \
                              num_hits_suppressed, hitHistogram_queue, known_bad_data) for i in xrange(num_consumers)]
        for consumer in consumers:
            consumer.daemon = True  # Remove for debugging
            consumer.start()

        # Producer progress loop
        while (num_tasks > progProducers.value and progProducers.value >= 0):
            logger.debug("Producer num_tasks: %d - v.value: %d" %
                         (num_tasks, progProducers.value))
            update_progress(
                min(1,
                    float(progProducers.value) / float(num_tasks)),
                "Searching [%d]" %
                (num_hits.value - num_hits_suppressed.value))
            time.sleep(0.5)
        update_progress(
            1, "Searching [%d]" % (num_hits.value - num_hits_suppressed.value))

        # Wait for consumers dumping results to finish too
        while (num_hits.value > progConsumers.value
               and progConsumers.value >= 0):
            logger.debug("Consuming hit: %d / %d" %
                         (progConsumers.value, num_hits.value))
            update_progress(
                min(1,
                    float(progConsumers.value) / float(num_hits.value)),
                "Dumping results to disk [%d]" % progConsumers.value)
            time.sleep(0.5)

        # Make sure we dumped as many hits as we found
        assert (num_hits.value == progConsumers.value)
        update_progress(1,
                        "Dumping results to disk [%d]" % progConsumers.value)

        # Track Consumers deaths
        logger.debug("Waiting for consumer reverse-poison pills")
        while num_consumers > 0:
            tmp = resultsConsumers.get()
            # Check for reverse-poison pill
            if tmp is None:
                num_consumers -= 1
                logger.debug("Consumer finished!")
        logger.debug("All consumers accounted for")

        # Wait for consumer threads to finish
        logger.debug("Waiting for consumer threads to finish")
        for consumer in consumers:
            consumer.join()
        logger.debug("Consumer threads finished")

        # Print hit histogram:
        results = []
        results.append(('cyan', ("Hit histogram:", "", "")))
        while not hitHistogram_queue.empty():
            (name, regex, regex_hits) = hitHistogram_queue.get()
            results.append(('white', (name, regex, regex_hits)))
        if len(results) > 1:
            outputcolum(results)

    # Stop timer
    t1 = time.time()

    logger.info("Search hits: %d" % num_hits.value)
    logger.info("Suppresed duplicate hits: %d" % num_hits_suppressed.value)
    logger.info("Search time: %s" % (str(timedelta(seconds=(t1 - t0)))))

    if num_hits.value:
        logger.info("Head:")
        # Dump head of output file:
        num_lines = file_size(options.outputFile)
        from itertools import islice
        with open(options.outputFile) as myfile:
            head = list(islice(myfile, 5))
        for line in head:
            logger.info(line.strip('\n\r'))
        logger.info("(%d lines suppressed)" % max(0, (num_lines - 5)))

    return (num_hits.value, num_hits_suppressed.value, results)
Ejemplo n.º 3
0
def parseManifestAuditFileName(jsondata, zip_archive_filename):
    # Parse manifest.json data and return files which will need to be processed
    file_list = []
    m = re.match(r'^.*(?:\\|\/)(.*)[-_].{22}\..{3}$', zip_archive_filename)
    if m:
        hostname = m.group(1)
        data = json.load(jsondata)
        if 'audits' in data:
            for audit in data['audits']:
                if 'sysinfo' in audit['generator']: continue
                if 'install' not in audit['generator']:

                    if 'registry-api' in audit[
                            'generator'] or 'w32registryapi' in audit[
                                'generator']:
                        for result in audit['results']:
                            if 'application/xml' in result['type']:
                                file_list.append(
                                    (os.path.join(zip_archive_filename,
                                                  result['payload']),
                                     os.path.join(
                                         zip_archive_filename, hostname + "_" +
                                         result['payload'] + ".xml")))
                            else:
                                continue
                    elif 'plugin-execute' in audit['generator']:
                        for result in audit['results']:
                            if 'application/vnd.mandiant.issues+xml' not in result[
                                    'type']:
                                file_list.append(
                                    (os.path.join(zip_archive_filename,
                                                  result['payload']),
                                     os.path.join(
                                         zip_archive_filename, hostname + "_" +
                                         result['payload'] + ".xml")))
                            else:
                                continue
                    elif 'w32scripting-persistence' in audit['generator']:
                        for result in audit['results']:
                            if 'application/vnd.mandiant.issues+xml' not in result[
                                    'type']:
                                file_list.append(
                                    (os.path.join(zip_archive_filename,
                                                  result['payload']),
                                     os.path.join(
                                         zip_archive_filename, hostname + "_" +
                                         result['payload'] + ".xml")))
                            else:
                                continue
                    elif 'file-acquisition' in audit['generator']:
                        for result in audit['results']:
                            if 'application/vnd.mandiant.issues+xml' not in result[
                                    'type']:
                                file_list.append(
                                    (os.path.join(zip_archive_filename,
                                                  result['payload']),
                                     os.path.join(
                                         zip_archive_filename, hostname + "_" +
                                         result['payload'] + ".xml")))
                            else:
                                continue
                    # elif 'plugin' not in audit['generator'] and len(audit['results']) == 1:
                    #     file_list.append((os.path.join(zip_archive_filename, audit['results'][0]['payload']), os.path.join(zip_archive_filename, hostname+"_"+audit['results'][0]['payload']+".xml")))
                    # elif 'plugin' in audit['generator'] and len(audit['results']) <= 1:
                    #     pass
                    # elif 'plugin' in audit['generator'] and len(audit['results']) == 2:
                    #     file_list.append((os.path.join(zip_archive_filename, audit['results'][1]['payload']), os.path.join(zip_archive_filename, hostname+"_"+audit['results'][0]['payload']+".xml")))
                    # else:
                    #     logger.error("Unknown result type/format on HX audit manifest.json: %s (please report!)" % zip_archive_filename)
        else:
            logger.warning(
                "HX script execution failed for host: %s, ignoring" % hostname)
    else:
        logger.error(
            "Unable to extract hostname on parseManifestAuditFileName: %s" %
            zip_archive_filename)

    if len(file_list) == 0:
        logger.warning(
            "No file that could be processed found on manifest.json (likely to be a failed script run) for: %s [%d bytes]"
            % (zip_archive_filename, file_size(zip_archive_filename)))
    return file_list