Example #1
0
    def test_MPEngine_dropProducers(self):
        try:
            print "Starting test"
            maxCores = 6

            mpe = MPEngineProdCons(maxCores, WkrTestProd, WkrTestCons)
            # Add tasks
            task_list = [i for i in xrange(1, 10)]
            mpe.addTaskList(task_list)

            mpe.startProducers(4)
            mpe.addProducer()
            mpe.addProducer()
            mpe.removeProducer()
            mpe.removeProducer()
            mpe.startConsumers(1)

            while mpe.working():
                print("Prod: %d / Cons: %d | %s -> %s -> %s" %
                      mpe.getProgress())
                time.sleep(1)
                mpe.rebalance()

            print "Test ended"
        except Exception:
            traceback.print_exc(file=sys.stdout)
            self.fail("Exception triggered")

        # Pass
        self.assertEquals(1, 1, "test_MPEngine")
Example #2
0
    def test_MPEngine_regressionBug(self):
        try:
            print "Starting test"
            maxCores = 4

            mpe = MPEngineProdCons(maxCores, WkrTestProd, WkrTestCons)
            # Add tasks
            task_list = [i for i in xrange(1, 8)]
            mpe.addTaskList(task_list)

            mpe.startProducers(3)
            time.sleep(2)
            mpe.startConsumers(1)
            time.sleep(2)
            mpe.endProducers()
            mpe.endConsumers()
            time.sleep(2)
            mpe.addConsumer()
            mpe.addProducer()
            time.sleep(2)
            mpe.addProducer()
            time.sleep(2)
            logger.debug("restartConsumers")
            mpe.restartConsumers()
            logger.debug("restartProducers")
            mpe.restartProducers()
            time.sleep(2)

            while mpe.working():
                print("Prod: %d / Cons: %d | %s -> %s -> %s" %
                      mpe.getProgress())
                time.sleep(1)
                # mpe.rebalance()

            del mpe
            print "Test ended"
        except Exception:
            traceback.print_exc(file=sys.stdout)
            self.fail("Exception triggered")

        # Pass
        self.assertEquals(1, 1, "test_MPEngine")
Example #3
0
def appLoadMP(pathToLoad, dbfilenameFullPath, maxCores, governorOffFlag):
    global _tasksPerJob

    files_to_process = []
    conn = None

    # Start timer
    t0 = datetime.now()

    logger.debug("Starting appLoadMP")
    # Calculate aggreagate file_filter for all ingest types supported:
    file_filter = '|'.join([v.getFileNameFilter() for k,v in ingest_plugins.iteritems()])
    # Add zip extension
    file_filter += "|.*\.zip"

    # Check if we're loading Redline data
    if os.path.isdir(pathToLoad) and os.path.basename(pathToLoad).lower() == 'RedlineAudits'.lower():
        files_to_process = searchRedLineAudits(pathToLoad)
    else:
        # Search for all files to be processed
        if os.path.isdir(pathToLoad):
            files_to_process = searchFolders(pathToLoad, file_filter)
        else:
            files_to_process = processArchives(pathToLoad, file_filter)

    if files_to_process:
        # Init DB if required
        DB = appDB.DBClass(dbfilenameFullPath, True, settings.__version__)
        conn = DB.appConnectDB()

        # Extract hostnames, grab existing host IDs from DB and calculate instance ID for new IDs to be ingested:
        instancesToProcess = []
        instancesToProcess += GetIDForHosts(files_to_process, DB)
        countInstancesToProcess = len(instancesToProcess)
        logger.info("Found %d new instances" % (countInstancesToProcess))

        # Setup producers/consumers initial counts
        num_consumers = 1
        num_producers = 1

        # Setup MPEngine
        mpe = MPEngineProdCons(maxCores, appLoadProd, appLoadCons, governorOffFlag)

        # Reduce _tasksPerJob for small jobs
        if countInstancesToProcess < _tasksPerJob: _tasksPerJob = 1

        # Create task list
        task_list = []
        instancesPerJob = _tasksPerJob
        num_tasks = 0
        for chunk in chunks(instancesToProcess, instancesPerJob):
            # todo: We no longer need pathToLoad as tasks include the fullpath now
            task_list.append(Task(pathToLoad, chunk))
            num_tasks += 1

        if num_tasks > 0:
            # Check if we have to drop indexes to speedup insertions
            # todo: Research ratio of existing hosts to new hosts were this makes sense
            if countInstancesToProcess > 1000 or DB.CountHosts() < 1000:
                DB.appDropIndexesDB()

            # Queue tasks for Producers
            mpe.addTaskList(task_list)

            # Start procs
            mpe.startProducers(num_producers)
            mpe.startConsumers(num_consumers, [dbfilenameFullPath])
            # mpe.addProducer()

            # Control loop
            while mpe.working():
                time.sleep(1.0)
                (num_producers,num_consumers,num_tasks,progress_producers,progress_consumers) = mpe.getProgress()
                elapsed_time = datetime.now() - t0
                mean_loadtime_per_host = (elapsed_time) / max(1, _tasksPerJob * progress_consumers)
                pending_hosts = ((num_tasks * _tasksPerJob) - (_tasksPerJob * progress_consumers))
                etr = (mean_loadtime_per_host * pending_hosts)
                eta = t0 + elapsed_time + etr
                ett = (eta - t0)
                if settings.logger_getDebugMode(): status_extra_data = " Prod: %s Cons: %s (%d -> %d -> %d: %d) [RAM: %d%% / Obj: %d / ETH: %s / ETA: %s / ETT: %s]" % \
                                                                       (num_producers, num_consumers, num_tasks, progress_producers, progress_consumers, progress_producers - progress_consumers,
                     psutil_phymem_usage(), len(gc.get_objects()),
                     mean_loadtime_per_host if progress_consumers * _tasksPerJob > 100 else "N/A",
                     str(eta.time()).split(".")[0] if progress_consumers * _tasksPerJob > 100 else "N/A",
                     str(ett).split(".")[0] if progress_consumers * _tasksPerJob > 100 else "N/A")
                else: status_extra_data = ""
                # logger.info("Parsing files%s" % status_extra_data)

                logger.info(update_progress(min(1,float(progress_consumers) / float(num_tasks)), "Parsing files%s" % status_extra_data, True))
                mpe.rebalance()

            del mpe

        # Stop timer
        elapsed_time = datetime.now() - t0
        mean_loadtime_per_host = (elapsed_time) / max(1, countInstancesToProcess)
        logger.info("Load speed: %s seconds / file" % (mean_loadtime_per_host))
        logger.info("Load time: %s" % (str(elapsed_time).split(".")[0]))
    else:
        logger.info("Found no files to process!")