예제 #1
0
 def PrintEntryRowID(self, RowID):
     with closing(self.conn.cursor()) as c:
         data = self.Query(
             "SELECT HostName, LastModified, LastUpdate, FilePath, FileName, Size, ExecFlag FROM \
         Entries INNER JOIN Hosts ON Entries.HostID = Hosts.HostID WHERE RowID = '%s'"
             % RowID)
         results = []
         for row in data:
             results.append(('white', row))
     outputcolum(results)
예제 #2
0
 def PrintEntryRowIDList(self, rowIDList):
     with closing(self.conn.cursor()) as c:
         data = self.Query(
             "SELECT HostName, LastModified, LastUpdate, FilePath, FileName, Size, ExecFlag FROM \
         Entries INNER JOIN Hosts ON Entries.HostID = Hosts.HostID WHERE RowID IN (%s)"
             % ",".join(rowIDList))
         if len(data) > 0:
             results = []
             results.append(('cyan', list(data[0]._fields)))
             for row in data:
                 results.append(('white', row))
             outputcolum(results)
         else:
             logger.error("PrintEntryRowIDList - nothing to print!")
예제 #3
0
def appSearchMP(dbfilenameFullPath, searchType, search_space, options):
    (outputFile, maxCores) = (options.outputFile, options.maxCores)
    known_bad_data = None
    # Start timer
    t0 = time.time()

    DB = appDB.DBClass(dbfilenameFullPath, True, settings.__version__)
    conn = DB.appConnectDB()

    # If possible use the available indexes
    if hasattr(
            options, 'field_name'
    ) and searchType == 'LITERAL' and options.searchLiteral[0][0] not in [
            '=', '>', '<'
    ] and DB.appIndexExistsDB(options.field_name):
        num_hits = namedtuple('hits', 'value')
        num_hits_suppressed = namedtuple('hits', 'value')
        (num_hits.value, num_hits_suppressed.value,
         results) = runIndexedSearch(dbfilenameFullPath, search_space, options)

    else:
        # Get total number of entries to search
        entriesCount = DB.CountEntries()
        logger.debug("Total entries in search space: %d" % entriesCount)

        # Pre-load known_bad if required
        if searchType == 'KNOWNBAD':
            known_bad_data = LoadRegexBulkSearch(options.knownbad_file)

        # Establish communication queues
        tasks = multiprocessing.JoinableQueue()
        resultsProducers = multiprocessing.Queue()
        resultsConsumers = multiprocessing.Queue()
        hitHistogram_queue = multiprocessing.Queue()

        # Start producers/consumers
        num_consumers = 1
        num_producers = max(1, maxCores - 1)

        # Prep lock for progress update Producers
        progProducers = multiprocessing.Value('i', 0)
        # Prep lock for progress update Consumers
        progConsumers = multiprocessing.Value('i', 0)
        # Prep Consumers return values
        num_hits = multiprocessing.Value('i', 0)
        num_hits_suppressed = multiprocessing.Value('i', 0)

        logger.debug(
            'Using %d cores for searching / %d cores for dumping results' %
            (num_producers, num_consumers))

        # Queue tasks for Producers
        # Limit rowsPerJob to constrain memory use and ensure reasonable progress updates
        rowsPerJob = min((entriesCount / 8), 5000)
        logger.debug("RowsPerJob: %d" % rowsPerJob)
        num_tasks = 0
        for startingRowID in range(0, entriesCount - rowsPerJob, rowsPerJob):
            tasks.put(Task(startingRowID, rowsPerJob - 1))
            logger.debug(
                "Creating search job %d: [%d - %d]" %
                (num_tasks, startingRowID, startingRowID + rowsPerJob - 1))
            num_tasks += 1
        logger.debug("Creating search job %d: [%d - %d]" %
                     (num_tasks, num_tasks * (rowsPerJob),
                      ((num_tasks * rowsPerJob) +
                       (entriesCount - (num_tasks * (rowsPerJob) - 1)))))
        # Special consideration for the last one:
        tasks.put(
            Task(num_tasks * (rowsPerJob),
                 (entriesCount - ((num_tasks * rowsPerJob) - 1))))
        logger.debug("Number of tasks: %d" % num_tasks)

        # Add a poison pill for each producer
        for i in xrange(num_producers):
            tasks.put(None)

        # Start producer threads
        producers = [Producer(tasks, resultsProducers, dbfilenameFullPath, progProducers, num_consumers, \
                              searchType, search_space, options, num_hits, known_bad_data) for i in xrange(num_producers)]
        for producer in producers:
            producer.daemon = True  # Remove for debugging
            producer.start()

        # Start consumer threads
        consumers = [Consumer(resultsProducers, resultsConsumers, progConsumers, num_producers, outputFile, \
                              dbfilenameFullPath, searchType, search_space, options, num_hits, \
                              num_hits_suppressed, hitHistogram_queue, known_bad_data) for i in xrange(num_consumers)]
        for consumer in consumers:
            consumer.daemon = True  # Remove for debugging
            consumer.start()

        # Producer progress loop
        while (num_tasks > progProducers.value and progProducers.value >= 0):
            logger.debug("Producer num_tasks: %d - v.value: %d" %
                         (num_tasks, progProducers.value))
            update_progress(
                min(1,
                    float(progProducers.value) / float(num_tasks)),
                "Searching [%d]" %
                (num_hits.value - num_hits_suppressed.value))
            time.sleep(0.5)
        update_progress(
            1, "Searching [%d]" % (num_hits.value - num_hits_suppressed.value))

        # Wait for consumers dumping results to finish too
        while (num_hits.value > progConsumers.value
               and progConsumers.value >= 0):
            logger.debug("Consuming hit: %d / %d" %
                         (progConsumers.value, num_hits.value))
            update_progress(
                min(1,
                    float(progConsumers.value) / float(num_hits.value)),
                "Dumping results to disk [%d]" % progConsumers.value)
            time.sleep(0.5)

        # Make sure we dumped as many hits as we found
        assert (num_hits.value == progConsumers.value)
        update_progress(1,
                        "Dumping results to disk [%d]" % progConsumers.value)

        # Track Consumers deaths
        logger.debug("Waiting for consumer reverse-poison pills")
        while num_consumers > 0:
            tmp = resultsConsumers.get()
            # Check for reverse-poison pill
            if tmp is None:
                num_consumers -= 1
                logger.debug("Consumer finished!")
        logger.debug("All consumers accounted for")

        # Wait for consumer threads to finish
        logger.debug("Waiting for consumer threads to finish")
        for consumer in consumers:
            consumer.join()
        logger.debug("Consumer threads finished")

        # Print hit histogram:
        results = []
        results.append(('cyan', ("Hit histogram:", "", "")))
        while not hitHistogram_queue.empty():
            (name, regex, regex_hits) = hitHistogram_queue.get()
            results.append(('white', (name, regex, regex_hits)))
        if len(results) > 1:
            outputcolum(results)

    # Stop timer
    t1 = time.time()

    logger.info("Search hits: %d" % num_hits.value)
    logger.info("Suppresed duplicate hits: %d" % num_hits_suppressed.value)
    logger.info("Search time: %s" % (str(timedelta(seconds=(t1 - t0)))))

    if num_hits.value:
        logger.info("Head:")
        # Dump head of output file:
        num_lines = file_size(options.outputFile)
        from itertools import islice
        with open(options.outputFile) as myfile:
            head = list(islice(myfile, 5))
        for line in head:
            logger.info(line.strip('\n\r'))
        logger.info("(%d lines suppressed)" % max(0, (num_lines - 5)))

    return (num_hits.value, num_hits_suppressed.value, results)