Exemple #1
0
def runIndexedSearch(dbfilenameFullPath, search_space, options):
    # todo: Handle duplicate hit supression
    logger.info("Performing indexed search")
    DB = appDB.DBClass(dbfilenameFullPath, True, settings.__version__)
    DB.appInitDB()
    DB.appConnectDB()

    searchTerm = options.searchLiteral[0]
    numHits = 0
    # Run actual indexed query
    data = DB.Query("SELECT RowID FROM Entries_FilePaths WHERE %s == '%s';" %
                    (search_space, searchTerm))
    if data:
        # results = []
        # results.append(('cyan', "FileName,HitCount".split(',')))
        with open(options.outputFile, "w") as text_file:
            with open(
                    os.path.join(
                        ntpath.dirname(options.outputFile),
                        ntpath.splitext(options.outputFile)[0] + ".mmd"),
                    "w") as markdown_file:
                for row in data:
                    # results.append(('white', row))
                    record = retrieveSearchData(row[0], DB, search_space)
                    saveSearchData(record, None, None, text_file,
                                   markdown_file)
                    numHits += 1
                # outputcolum(results)

        return (numHits, 0, [])
    else:
        return (0, 0, [])
    def test_Stack_Generic01(self):
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            # Run
            (db_filenameFullPath, db_version, db_count, num_instances,
             num_entries) = main([self.testset1, "status"])
            ret = main([self.testset1, "stack", "FileName"])

        for item_count, item_file_name in [(int(i[1][0]), i[1][1])
                                           for i in ret[1:]][1:10]:
            print "Checking: " + item_file_name
            (num_hits, num_hits_suppressed, results) = main(
                [self.testset1, "search", "-F", '\\' + item_file_name])
            self.assertEquals(num_hits, item_count,
                              "test_Stack_Generic01 failed!")
            (num_hits2, num_hits_suppressed2, results2) = main([
                self.testset1, "fsearch", "FileName", "-F",
                "=" + item_file_name
            ])
            self.assertEquals(num_hits2, item_count,
                              "test_Stack_Generic01 failed!")

        # Check total entry count from stacking on FileName = total # entries.
        count = sum([int(i[1][0]) for i in ret[1:]])
        self.assertEquals(count, num_entries, "test_Stack_Generic01 failed!")
Exemple #3
0
    def test_Filehitcount1(self):
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            entry_fields = settings.EntriesFields(EntryType=settings.__APPCOMPAT__, FilePath='C:\Temp', FileName='test123.exe')
            add_entry(DB, "TestHost01", entry_fields)
            entry_fields = settings.EntriesFields(EntryType=settings.__APPCOMPAT__, FilePath='C:\Temp', FileName='test1234.exe')
            add_entry(DB, "TestHost01", entry_fields)
            entry_fields = settings.EntriesFields(EntryType=settings.__APPCOMPAT__, FilePath='C:\\test123.exe', FileName='nohit.exe')
            add_entry(DB, "TestHost01", entry_fields)

            # Get temp db name for the test
            temp_file = tempfile.NamedTemporaryFile(suffix='.db', prefix='testCase', dir=tempfile.gettempdir())
            temp_file.close()
            with open(temp_file.name, 'w') as fh:
                fh.write('test123.exe')

            try:
                ret = main([self.testset1, "filehitcount", temp_file.name])
            except Exception as e:
                print traceback.format_exc()
                self.fail(e.message + "\n" + traceback.format_exc())

            # Remove temp file
            os.remove(temp_file.name)

            num_hits = len(ret)
            self.assertEquals(num_hits, 2, sys._getframe().f_code.co_name)
            self.assertEquals(ret[1][1][1][0], 'test123.exe', "test_Tstomp1 failed!")
            self.assertEquals(int(ret[1][1][1][1]), 1, "test_Tstomp1 failed!")
Exemple #4
0
    def test_AppCompat_LiteralSearchNoHits(self):
        rndFileName = ''.join(
            random.choice(string.ascii_uppercase) for _ in range(20))
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

        # Get temp file name for the DB
        with tempfile.NamedTemporaryFile(
                suffix='.txt',
                prefix='test_AppCompat_LiteralSearch',
                dir=tempfile.gettempdir()) as temp_file:
            # Search
            (num_hits, num_hits_suppressed, results) = main([
                "-o", temp_file.name, self.testset1, "search", "-F",
                rndFileName
            ])
            # Check we got at least as many as we added into the DB
            self.assertTrue(
                num_hits == 0,
                sys._getframe().f_code.co_name + " num_hits: %d" % num_hits)
            # Check output has the expected result
            self.assertEquals(
                num_hits - num_hits_suppressed,
                self.count_lines_regex(temp_file.name, rndFileName),
                sys._getframe().f_code.co_name +
                " Output regex count doesn't match num_hits!")
    def test_Stack(self):
        rndFileName = ''.join(
            random.choice(string.ascii_uppercase) for _ in range(15))
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            # Add stuff to stack
            for i in xrange(0, 10):
                entry_fields = settings.EntriesFields(
                    EntryType=settings.__APPCOMPAT__,
                    FilePath='C:\Windows',
                    FileName=rndFileName,
                    Size=i,
                    ExecFlag='True')
                add_entry(DB, "TestHost01", entry_fields)

            # Run
            ret = main([
                self.testset1, "stack", "FileName", "FilePath = 'c:\Windows'"
            ])

        # Check status count == db count
        count = int([i[1][0] for i in ret if rndFileName in i[1]][0])
        self.assertEquals(count, 10, "test_Stack failed!")
    def test_TStack(self):
        rndFileName = 'randomfilename.rnd'
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            # Add stuff to stack
            for i in xrange(0, 10):
                entry_fields = settings.EntriesFields(
                    EntryType=settings.__APPCOMPAT__,
                    FilePath='C:\Windows',
                    FileName=rndFileName,
                    Size=i,
                    LastModified='1000-01-01 00:00:0' + str(i))
                add_entry(DB, "TestHost01", entry_fields)

            # Run
            ret = main([self.testset1, "tstack", '1000-01-01', '1000-01-02'])

        # Check we found the right file
        self.assertEquals(ret[1][1][0], rndFileName, "test_TStack failed!")
        # Check expected in count
        self.assertEquals(int(ret[1][1][1]), 10, "test_TStack failed!")
        # Check expected out count
        self.assertEquals(int(ret[1][1][2]), 0, "test_TStack failed!")
Exemple #7
0
    def test_AmCache_LiteralSearch(self):
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            for i in xrange(0, 10):
                entry_fields = settings.EntriesFields(
                    EntryType=settings.__AMCACHE__,
                    FilePath='C:\Temp',
                    FileName='calc.exe',
                    Size=i,
                    ExecFlag='True')
                add_entry(DB, "TestHost01", entry_fields)

        # Get temp file name for the DB
        with tempfile.NamedTemporaryFile(
                suffix='.txt', prefix='Output',
                dir=tempfile.gettempdir()) as temp_file:
            # Search
            (num_hits, num_hits_suppressed, results) = main([
                "-o", temp_file.name, self.testset1, "search", "-F", "calc.exe"
            ])
            # Check we got at least as many as we added into the DB
            self.assertTrue(
                num_hits >= 10,
                sys._getframe().f_code.co_name + " num_hits: %d" % num_hits)
            # Check output has the expected result
            self.assertEquals(
                num_hits, self.count_lines_regex(temp_file.name, "calc\.exe"),
                sys._getframe().f_code.co_name +
                " Output regex count doesn't match num_hits!")
Exemple #8
0
    def test_MPEngine_ConsumerSimple(self):
        try:
            # Get temp db name for the test
            tempdb = tempfile.NamedTemporaryFile(suffix='.db',
                                                 prefix='testCase',
                                                 dir=tempfile.gettempdir())
            tempdb.close()
            dbfilenameFullPath = tempdb.name
            with appDB.DBClass(dbfilenameFullPath, settings.__version__) as DB:
                DB.appInitDB()

            print "Starting test"
            mpe = MPEngineProdCons(6, WkrTestProd, WkrTestConsDB)
            # Add tasks
            task_list = [i for i in xrange(1, 5)]
            mpe.addTaskList(task_list)

            mpe.addConsumer([dbfilenameFullPath])
            time.sleep(1)
            mpe.removeConsumer()

            del mpe
            print "Test ended"
        except Exception:
            traceback.print_exc(file=sys.stdout)
            self.fail("Exception triggered")

        # Pass
        self.assertEquals(1, 1, "test_MPEngine")
Exemple #9
0
    def test_AppCompat_IndexedSearch2(self):
        rndFileName = ''.join(
            random.choice(string.ascii_uppercase) for _ in range(20))
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            for i in xrange(0, 20):
                entry_fields = settings.EntriesFields(
                    EntryType=settings.__APPCOMPAT__,
                    FilePath='C:\Temp',
                    FileName=rndFileName,
                    Size=i,
                    ExecFlag='True')
                add_entry(DB, "TestHost01", entry_fields)

        # Get temp file name for the DB
        with tempfile.NamedTemporaryFile(
                suffix='.txt',
                prefix='test_AppCompat_IndexedSearch',
                dir=tempfile.gettempdir()) as temp_file_indexed:
            with tempfile.NamedTemporaryFile(
                    suffix='.txt',
                    prefix='test_AppCompat_NormalSearch',
                    dir=tempfile.gettempdir()) as temp_file_normal:
                # Indexed Search
                (num_hits, num_hits_suppressed, results) = main([
                    "-o", temp_file_indexed.name, self.testset1, "fsearch",
                    "FileName", "-F", rndFileName
                ])
                # Standard Search
                (num_hits2, num_hits_suppressed2, results2) = main([
                    "-o", temp_file_normal.name, self.testset1, "search", "-F",
                    "\\" + rndFileName
                ])
                # Check we got the same number of hits
                self.assertTrue(
                    num_hits == num_hits2,
                    sys._getframe().f_code.co_name +
                    " num_hits: %d" % num_hits)
                # Check output has the expected results
                self.assertEquals(
                    num_hits - num_hits_suppressed,
                    self.count_lines_regex(temp_file_indexed.name,
                                           rndFileName),
                    sys._getframe().f_code.co_name +
                    " Output regex count doesn't match num_hits!")
                # Check output has the expected results
                self.assertEquals(
                    num_hits2 - num_hits_suppressed2,
                    self.count_lines_regex(temp_file_normal.name, rndFileName),
                    sys._getframe().f_code.co_name +
                    " Output regex count doesn't match num_hits!")
                # Check standard and indexed search produced the same results
                self.assertTrue(
                    self.compare_output_files(temp_file_normal.name,
                                              temp_file_indexed.name),
                    "Results differ!")
Exemple #10
0
 def dumpCSV(self, dbfilenameFullPath, dumpfilenameFullPath):
     DB = appDB.DBClass(dbfilenameFullPath, True, settings.__version__)
     DB.appInitDB()
     conn = DB.appConnectDB()
     rows = DB.Query("SELECT * FROM Csv_Dump")
     with open(dumpfilenameFullPath, "w") as file_handle:
         for row in rows:
             line = [str(field) for field in row]
             file_handle.write("%s\n" % ','.join(line))
         file_handle.flush()
Exemple #11
0
    def test_AppCompat_LiteralSearch_Suppressed(self):
        rndFileName = ''.join(
            random.choice(string.ascii_uppercase) for _ in range(15))
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            # Add 10 entries
            for i in xrange(0, 10):
                entry_fields = settings.EntriesFields(
                    EntryType=settings.__APPCOMPAT__,
                    FilePath='C:\Temp',
                    FileName=rndFileName,
                    Size=i,
                    ExecFlag='True')
                add_entry(DB, "TestHost01", entry_fields)

            # Add 10 entries which will be deduped to 1 on search
            for i in xrange(0, 10):
                entry_fields = settings.EntriesFields(
                    EntryType=settings.__APPCOMPAT__,
                    FilePath='C:\Temp',
                    FileName=rndFileName,
                    Size=1000,
                    ExecFlag='True')
                add_entry(DB, "TestHost01", entry_fields)

        # Get temp file name for the DB
        with tempfile.NamedTemporaryFile(
                suffix='.txt', prefix='Output',
                dir=tempfile.gettempdir()) as temp_file:
            # Search
            (num_hits, num_hits_suppressed, results) = main([
                "-o", temp_file.name, self.testset1, "search", "-F",
                rndFileName
            ])
            # Check we got as many hits as we expect
            self.assertTrue(
                num_hits == 10 + 10,
                sys._getframe().f_code.co_name + " num_hits: %d - %s" %
                (num_hits, self.testset1))
            # Check supression worked as expected
            self.assertTrue(
                num_hits_suppressed == 9,
                sys._getframe().f_code.co_name + " num_hits: %d" % num_hits)
            # Check output has the expected result
            self.assertEquals(
                num_hits - num_hits_suppressed,
                self.count_lines_regex(temp_file.name, rndFileName),
                sys._getframe().f_code.co_name +
                " Output regex count doesn't match num_hits!")
Exemple #12
0
    def test_Leven2(self):
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            # Add stuff
            entry_fields = settings.EntriesFields(EntryType=settings.__APPCOMPAT__, FilePath='C:\Windows\System32',
                FileName='svchosts.exe')
            add_entry(DB, "TestHost01", entry_fields)

            # Run
            ret = main([self.testset1, "leven"])
            # Check we found the right file
            self.assertEquals('svchosts.exe' in ret[1][1][1], True, "test_Leven2 failed!")
Exemple #13
0
    def test_Leven(self):
        rndFileName = ''.join(random.choice(string.ascii_uppercase) for _ in range(15))
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            # Add stuff
            entry_fields = settings.EntriesFields(EntryType=settings.__APPCOMPAT__, FilePath='C:\Windows\System32', FileName=rndFileName)
            add_entry(DB, "TestHost01", entry_fields)

            # Run
            leven_fileName = 'a' + rndFileName
            ret = main([self.testset1, "leven", leven_fileName])
            # Check we found the right file
            self.assertEquals(ret[1][1][1], "'"+rndFileName+"'", "test_Leven failed!")
Exemple #14
0
    def test_MPEngine_DatabaseLocked(self):
        try:
            logger.info("Starting test_MPEngine_end2end_BalanceSimulation")
            # Get temp db name for the test
            tempdb = tempfile.NamedTemporaryFile(suffix='.db',
                                                 prefix='testCase',
                                                 dir=tempfile.gettempdir())
            tempdb.close()
            dbfilenameFullPath = tempdb.name
            with appDB.DBClass(dbfilenameFullPath, settings.__version__) as DB:
                DB.appInitDB()

            num_tasks = 50
            mpe = MPEngineProdCons(4, WkrTestProdFast, WkrTestConsDB)
            # Add tasks
            task_list = [i for i in xrange(1, num_tasks + 1)]
            mpe.addTaskList(task_list)

            mpe.addConsumer([dbfilenameFullPath])
            mpe.addProducer()

            loop_test_num = num_tasks
            while mpe.working():
                (num_prod, num_cons, task1, task2, task3) = mpe.getProgress()
                print("Prod: %d / Cons: %d | %s -> %s -> %s" %
                      mpe.getProgress())
                time.sleep(1)
                if task3 >= 20 and task3 <= 30:
                    logger.info("Simulating rebalance (task3: %d task1/2: %d" %
                                (task3, task1 / 2))
                    mpe.restartConsumers()
                    mpe.restartProducers()
                loop_test_num -= 1

            results = mpe.grabResults()
            self.assertEquals(len(results), num_tasks, "test_MPEngine_end2end")
            self.assertEquals(results[-1], num_tasks, "test_MPEngine_end2end")

            del mpe
            print "Test ended"
        except Exception:
            traceback.print_exc(file=sys.stdout)
            self.fail("Exception triggered")

        # Pass
        self.assertEquals(1, 1, "test_MPEngine_end2end")
Exemple #15
0
    def run(self):
        # Note: __init__ runs on multiprocessing's main thread and as such we can't use that to init a sqlite connection
        assert(len(self.extra_arg_list) == 1)
        self.dbfilenameFullPath = self.extra_arg_list[0]
        self.DB = None
        self.conn = None

        # Init DB access to DB
        self.DB = appDB.DBClass(self.dbfilenameFullPath, True, settings.__version__)
        # self.DB.appInitDB()
        self.conn = self.DB.appConnectDB()

        # Call super run to continue with the natural worker flow
        super(appLoadCons, self).run()

        # Close DB connection
        self.logger.debug("%s - closing down DB" % self.proc_name)
        self.conn.close()
        del self.DB
Exemple #16
0
    def run(self):
        self.logger.info("WorkerTestConsumerDB: Run")
        self.dbfilenameFullPath = self.extra_arg_list[0]
        self.DB = None
        self.conn = None

        # Init DB access to DB
        self.DB = appDB.DBClass(self.dbfilenameFullPath, True,
                                settings.__version__)
        self.conn = self.DB.appConnectDB()
        self.logger.info("WorkerTestConsumerDB: appConnectDB done")

        # Call super run to continue with the natural worker flow
        super(WkrTestConsDB, self).run()

        # Close DB connection
        self.logger.info("%s - closing down DB" % self.proc_name)

        # # Simulate a very log pending queue of data that needs to be dumped to the DB before we can exit:
        # self.write_to_DB(10, 20)
        # self.conn.close()
        self.logger.info("%s - deleting DB object" % self.proc_name)
        del self.DB
Exemple #17
0
    def test_AppCompat_IndexedSearchFilePath(self):
        rndFileName = ''.join(
            random.choice(string.ascii_uppercase) for _ in range(20))
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            for i in xrange(0, 20):
                entry_fields = settings.EntriesFields(
                    EntryType=settings.__APPCOMPAT__,
                    FilePath='C:\\' + rndFileName,
                    FileName="calc.exe",
                    Size=i,
                    ExecFlag='True')
                add_entry(DB, "TestHost01", entry_fields)

        # Get temp file name for the DB
        with tempfile.NamedTemporaryFile(
                suffix='.txt',
                prefix='test_AppCompat_IndexedSearch',
                dir=tempfile.gettempdir()) as temp_file:
            # Search
            (num_hits, num_hits_suppressed, results) = main([
                "-o", temp_file.name, self.testset1, "fsearch", "FilePath",
                "-F", "C:\\" + rndFileName
            ])
            # Check we got at least as many as we added into the DB
            self.assertTrue(
                num_hits == 20,
                sys._getframe().f_code.co_name + " num_hits: %d" % num_hits)
            # Check output has the expected result
            self.assertEquals(
                num_hits - num_hits_suppressed,
                self.count_lines_regex(temp_file.name, rndFileName),
                sys._getframe().f_code.co_name +
                " Output regex count doesn't match num_hits!")
Exemple #18
0
    def test_StatusAppCompat(self):
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            # Get host list
            (db_filenameFullPath2, db_version2, db_count2, num_instances2,
             num_entries2) = main([self.testset1, "status"])
            db_count_query = DB.CountHosts()

        # Check status count == db count
        self.assertEquals(db_count2, db_count_query,
                          "test_StatusAmCache failed!")
        # Check status count == known host #
        self.assertEquals(db_count2, self.fake_bd_num_hosts,
                          "test_StatusAmCache failed!")
        # Check reported path == known path
        self.assertEquals(db_filenameFullPath2, self.testset1,
                          "test_StatusAmCache failed!")
        # Check entries count is with expected parameters
        self.assertTrue(
            num_entries2 > 400 * self.fake_bd_num_hosts
            and num_entries2 < 800 * self.fake_bd_num_hosts,
            "test_StatusAmCache failed!")
Exemple #19
0
def build_fake_DB(hosts=10, seed=random.randint(0, 10000), database_file=None):
    hostnames_set = set()
    filePaths_dict = defaultdict(int)
    filePaths_dict_ID = 0
    filePaths_dict_ID_skip = 0

    random.seed(seed)
    fake.seed(seed)
    fake_ES.seed(seed)

    if database_file == None:
        # Get temp db name for the test
        tempdb = tempfile.NamedTemporaryFile(suffix='.db',
                                             prefix='testCase',
                                             dir=tempfile.gettempdir())
        tempdb.close()
        database_file = tempdb.name

    if os.path.isfile(database_file):
        logger.warning("Adding hosts to existing database")
        with appDB.DBClass(database_file, "False", settings.__version__) as DB:
            conn = DB.appConnectDB()
            # Load existing hosts
            data = DB.Query("SELECT HostName FROM Hosts")
            for hostName in data:
                hostnames_set.add(hostName[0])
            # Load existing paths
            data = DB.Query("SELECT FilePathID, FilePath FROM FilePaths")
            for filePathID, FilePath in data:
                filePaths_dict[FilePath] = (filePathID)
                filePaths_dict_ID += 1
            filePaths_dict_ID_skip = filePaths_dict_ID

    else:
        with appDB.DBClass(database_file, "True", settings.__version__) as DB:
            DB.appInitDB()
            DB.appSetIndex()
            conn = DB.appConnectDB()
            DB.appRequireIndexesDB(
                "index_EntriesHostName",
                "CREATE INDEX index_EntriesHostName on Hosts(HostName)")
            DB.appRequireIndexesDB(
                "index_FilePathsFilePath",
                "CREATE INDEX index_FilePathsFilePath on FilePaths(FilePath)")

    with appDB.DBClass(database_file, "False", settings.__version__) as DB:
        conn = DB.appConnectDB()

        # Start creating hosts and data:
        rowList = []
        insertList = []
        numFields = 29 - 3
        valuesQuery = "(NULL," + "?," * numFields + "0, 0)"

        progressCurrent = 0
        progressTotal = hosts
        for i in xrange(0, hosts):
            progressCurrent += 1
            update_progress(float(progressCurrent) / float(progressTotal))

            HostName = ""
            while True:
                HostName = strip_accents(
                    (fake_ES.color_name() + fake_ES.country()).replace(
                        ' ', ''))
                HostName = strip_non_ascii(HostName)
                HostName += "_" + str(random.randint(000, 999))
                if HostName not in hostnames_set:
                    hostnames_set.add(HostName)
                    break

            print "Creating appcompat/amcache data for host: %s" % HostName
            Instances = ['dummy']
            InstancesCounter = 1
            Recon = 0
            ReconScoring = 0

            DB.ExecuteMany("INSERT INTO Hosts VALUES (NULL,?,?,?,?,?)",
                           [(HostName, str(repr(Instances)), InstancesCounter,
                             Recon, ReconScoring)])
            HostID = DB.Query(
                "SELECT HostID FROM Hosts WHERE HostName = '%s'" %
                HostName)[0][0]

            # Sampled 2K hosts, this should statistically provide a somewhat realistic amount of entries (for AppCompat)
            for i in xrange(1, random.randint(400, 800)):
                # EntryType = random.choice([settings.__APPCOMPAT__,settings.__AMCACHE__])
                EntryType = settings.__APPCOMPAT__
                RowNumber = 0
                LastModified = str(fake.date_time_between('-1y')) + "." + str(
                    random.randint(1, 9999))
                LastUpdate = str(fake.date_time_between('-4y')) + "." + str(
                    random.randint(1, 9999))
                filePathID = 0
                # todo: FilePath retains final backslash on root paths (c:\, d:\ ...) remove.
                FilePath, FileName = ntpath.split(fake.path())
                FilePath = FilePath.lower()
                FileName = FileName.lower()
                Size = random.randint(1, 100000)
                if EntryType == settings.__APPCOMPAT__:
                    ExecFlag = random.choice(['True', 'False'])
                else:
                    ExecFlag = 'True'

                if EntryType == settings.__AMCACHE__:
                    SHA1 = fake.sha1()
                    FileDescription = random.choice(
                        ['', '', '', '', '', '', '', '', '', '',
                         fake.text()])
                    FirstRun = str(fake.date_time_between('-1y')) + "." + str(
                        random.randint(1, 9999))
                    Created = str(fake.date_time_between('-5y')) + "." + str(
                        random.randint(1, 9999))
                    Modified1 = str(fake.date_time_between('-5y')) + "." + str(
                        random.randint(1, 9999))
                    Modified2 = str(fake.date_time_between('-5y')) + "." + str(
                        random.randint(1, 9999))
                    LinkerTS = str(fake.date_time_between('-10y'))
                    Company = fake.company()
                    PE_sizeofimage = random.randint(1, 10000)

                    # Redo re-assignment of date we do on load for AmCache
                    LastUpdate = FirstRun
                    LastModified = Modified2
                else:
                    SHA1 = ''
                    FileDescription = ''
                    FirstRun = ''
                    Created = ''
                    Modified1 = ''
                    Modified2 = ''
                    LinkerTS = ''
                    Company = ''
                    PE_sizeofimage = ''

                Product = 0
                Version_number = 0
                Version = 0
                Language = 0
                Header_hash = 0
                PE_checksum = 0
                SwitchBackContext = 0
                InstanceID = 0

                # # Add FilePath if not there yet
                # DB.Execute("INSERT OR IGNORE INTO FilePaths VALUES (NULL, '%s')" % FilePath)
                # # Get FilePathID
                # FilePathID = DB.QueryInt("SELECT FilePathID FROM FilePaths WHERE FilePath = '%s'" % FilePath)
                if FilePath not in filePaths_dict:
                    filePaths_dict[FilePath] = (filePaths_dict_ID)
                    filePathID = filePaths_dict_ID
                    filePaths_dict_ID += 1
                else:
                    filePathID = filePaths_dict[FilePath]

                insertList.append(
                    (HostID, EntryType, RowNumber, LastModified, LastUpdate,
                     filePathID, FileName, Size, ExecFlag, SHA1,
                     FileDescription, FirstRun, Created, Modified1, Modified2,
                     LinkerTS, Product, Company, PE_sizeofimage,
                     Version_number, Version, Language, Header_hash,
                     PE_checksum, SwitchBackContext, InstanceID))

                # Dump every now and then:
                if len(insertList) > 1000000:
                    logger.info("Dumping data to DB")
                    DB.ExecuteMany("INSERT INTO Entries VALUES " + valuesQuery,
                                   insertList)
                    insertList = []

        # Insert last bucket
        logger.info("Dumping last bucket to DB")
        DB.ExecuteMany("INSERT INTO Entries VALUES " + valuesQuery, insertList)

        # Insert new FilePaths
        list_FilePath_ID = [(v, k) for k, v in filePaths_dict.items()]
        list_FilePath_ID.sort(key=lambda tup: tup[0])
        DB.ExecuteMany("INSERT INTO FilePaths VALUES (?,?)",
                       list_FilePath_ID[filePaths_dict_ID_skip:])

    return database_file
Exemple #20
0
    def test_TcorrTest_prog1(self):
        with appDB.DBClass(self.testset1, settings.__version__) as DB:
            DB.appInitDB()
            conn = DB.appConnectDB()

            # TestHost01
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='AAA.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost01", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='BBB.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost01", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='CCC.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost01", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='DDD.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost01", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='EEE.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost01", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='FFF.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost01", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='GGG.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost01", entry_fields)

            # TestHost02
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='AAA.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost02", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='BBB.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost02", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='CCC.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost02", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='DDD.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost02", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='EEE.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost02", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='FFF.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost02", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='GGG.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost02", entry_fields)

            try:
                directCorrelationData = main(
                    [self.testset1, "tcorr", "DDD.exe", "-w 1"])
            except Exception as e:
                print traceback.format_exc()
                self.fail(e.message + "\n" + traceback.format_exc())

            # Check Names
            self.assertEquals(directCorrelationData[1][3], "CCC.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[0][3], "EEE.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            # Check Before
            self.assertEquals(directCorrelationData[1][6], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[0][6], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            # Check After
            self.assertEquals(directCorrelationData[1][7], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[0][7], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            # Check InvBond
            self.assertEquals(directCorrelationData[1][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[0][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            # Check Total_Count
            self.assertEquals(directCorrelationData[1][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[0][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")

            try:
                directCorrelationData = main(
                    [self.testset1, "tcorr", "DDD.exe", "-w 2"])
            except Exception as e:
                print traceback.format_exc()
                self.fail(e.message + "\n" + traceback.format_exc())

            # Check Names
            self.assertEquals(directCorrelationData[0][3], "CCC.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][3], "EEE.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[2][3], "BBB.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[3][3], "FFF.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            # Check Before
            self.assertEquals(directCorrelationData[0][6], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][6], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[2][6], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[3][6], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            # Check After
            self.assertEquals(directCorrelationData[0][7], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][7], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[2][7], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[3][7], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            # Check InvBond
            self.assertEquals(directCorrelationData[0][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[2][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[3][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            # Check Total_Count
            self.assertEquals(directCorrelationData[0][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[2][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[3][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            # Check Weight
            self.assertTrue(
                directCorrelationData[0][8] > directCorrelationData[2][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[0][8] > directCorrelationData[3][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[1][8] > directCorrelationData[2][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[1][8] > directCorrelationData[3][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[0][8] == directCorrelationData[1][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[2][8] == directCorrelationData[3][8],
                "test_TcorrTest_prog1 - Name failed!")

            try:
                directCorrelationData = main(
                    [self.testset1, "tcorr", "DDD.exe", "-w 3"])
            except Exception as e:
                print traceback.format_exc()
                self.fail(e.message + "\n" + traceback.format_exc())

            # Check Names
            self.assertEquals(directCorrelationData[0][3], "CCC.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][3], "EEE.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[2][3], "BBB.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[3][3], "FFF.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[4][3], "AAA.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[5][3], "GGG.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            # Check Before
            self.assertEquals(directCorrelationData[0][6], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][6], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[2][6], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[3][6], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[4][6], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[5][6], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            # Check After
            self.assertEquals(directCorrelationData[0][7], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][7], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[2][7], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[3][7], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[4][7], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[5][7], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            # Check InvBond
            self.assertEquals(directCorrelationData[0][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[2][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[3][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[4][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[5][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            # Check Total_Count
            self.assertEquals(directCorrelationData[0][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[2][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[3][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[4][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[5][10], 2,
                              "test_TcorrTest_prog1 - Name failed!")
            # Check Weight
            self.assertTrue(
                directCorrelationData[0][8] > directCorrelationData[2][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[0][8] > directCorrelationData[3][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[0][8] > directCorrelationData[4][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[0][8] > directCorrelationData[5][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[1][8] > directCorrelationData[2][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[1][8] > directCorrelationData[3][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[1][8] > directCorrelationData[4][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[1][8] > directCorrelationData[5][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[0][8] == directCorrelationData[1][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[2][8] == directCorrelationData[3][8],
                "test_TcorrTest_prog1 - Name failed!")
            self.assertTrue(
                directCorrelationData[4][8] == directCorrelationData[5][8],
                "test_TcorrTest_prog1 - Name failed!")

            # TestHost03
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='AAA.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost03", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='BBB.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost03", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='CCC.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost03", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='DDD.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost03", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='EEE.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost03", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='FFF.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost03", entry_fields)
            entry_fields = settings.EntriesFields(
                EntryType=settings.__APPCOMPAT__,
                FilePath='C:\Temp',
                FileName='GGG.exe',
                Size=1,
                ExecFlag='True')
            add_entry(DB, "TestHost03", entry_fields)

            try:
                directCorrelationData = main(
                    [self.testset1, "tcorr", "DDD.exe", "-w 1"])
            except Exception as e:
                print traceback.format_exc()
                self.fail(e.message + "\n" + traceback.format_exc())

            # Check Names
            self.assertEquals(directCorrelationData[0][3], "CCC.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][3], "EEE.exe",
                              "test_TcorrTest_prog1 - Name failed!")
            # Check Before
            self.assertEquals(directCorrelationData[0][6], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][6], 3,
                              "test_TcorrTest_prog1 - Name failed!")
            # Check After
            self.assertEquals(directCorrelationData[0][7], 3,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][7], 0,
                              "test_TcorrTest_prog1 - Name failed!")
            # Check InvBond
            self.assertEquals(directCorrelationData[0][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][9], "True",
                              "test_TcorrTest_prog1 - Name failed!")
            # Check Total_Count
            self.assertEquals(directCorrelationData[0][10], 3,
                              "test_TcorrTest_prog1 - Name failed!")
            self.assertEquals(directCorrelationData[1][10], 3,
                              "test_TcorrTest_prog1 - Name failed!")
Exemple #21
0
    def test_Dump(self):
        try:
            # Init DB if required
            with appDB.DBClass(self.testset10, settings.__version__) as DB:
                DB.appInitDB()
                conn = DB.appConnectDB()

                # Get host list
                data = DB.Query(
                    "SELECT HostID, HostName, Recon, ReconScoring FROM Hosts ORDER BY ReconScoring DESC"
                )
                # Dump all hosts
                for row in data:
                    hostname = row[1]
                    # Get temp dump filename
                    temp = tempfile.NamedTemporaryFile(
                        suffix='.txt',
                        prefix='testCase',
                        dir=tempfile.gettempdir())
                    dump_filename = temp.name
                    temp.close()

                    # Dump host
                    dump = appDumpHost(DB, hostname, None)
                    appCompatREGEX = re.compile(
                        r'"((?:\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})|N\/A)","((?:\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})|N\/A)","(.*)\\([^\\]*)","(N\/A|\d*)","(N\/A|True|False)"'
                    )
                    with open(dump_filename, "wb") as file:
                        for item in dump:
                            if item == 'Last Modified,Last Update,Path,File Size,Exec Flag':
                                file.write("%s\r\n" % item)
                            else:
                                m = appCompatREGEX.match(item)
                                if m:
                                    if m.group(1) == '0001-01-01 00:00:00':
                                        LastModified = 'N/A'
                                    else:
                                        LastModified = datetime.datetime.strptime(
                                            unicode(m.group(1)),
                                            '%Y-%m-%d %H:%M:%S').strftime(
                                                '%Y-%m-%d %H:%M:%S')
                                    if m.group(2) == '0001-01-01 00:00:00':
                                        LastUpdate = 'N/A'
                                    else:
                                        LastUpdate = datetime.datetime.strptime(
                                            unicode(m.group(2)),
                                            '%Y-%m-%d %H:%M:%S').strftime(
                                                '%Y-%m-%d %H:%M:%S')
                                    file.write("%s,%s,%s\\%s,%s,%s\r\n" %
                                               (LastModified, LastUpdate,
                                                unicode(m.group(3)),
                                                unicode(m.group(4)),
                                                unicode(m.group(5)),
                                                unicode(m.group(6))))

                    # Remove dumped host
                    os.remove(dump_filename)

        except Exception:
            traceback.print_exc(file=sys.stdout)
            self.fail("Exception triggered")
Exemple #22
0
def appSearchMP(dbfilenameFullPath, searchType, search_space, options):
    (outputFile, maxCores) = (options.outputFile, options.maxCores)
    known_bad_data = None
    # Start timer
    t0 = time.time()

    DB = appDB.DBClass(dbfilenameFullPath, True, settings.__version__)
    conn = DB.appConnectDB()

    # If possible use the available indexes
    if hasattr(
            options, 'field_name'
    ) and searchType == 'LITERAL' and options.searchLiteral[0][0] not in [
            '=', '>', '<'
    ] and DB.appIndexExistsDB(options.field_name):
        num_hits = namedtuple('hits', 'value')
        num_hits_suppressed = namedtuple('hits', 'value')
        (num_hits.value, num_hits_suppressed.value,
         results) = runIndexedSearch(dbfilenameFullPath, search_space, options)

    else:
        # Get total number of entries to search
        entriesCount = DB.CountEntries()
        logger.debug("Total entries in search space: %d" % entriesCount)

        # Pre-load known_bad if required
        if searchType == 'KNOWNBAD':
            known_bad_data = LoadRegexBulkSearch(options.knownbad_file)

        # Establish communication queues
        tasks = multiprocessing.JoinableQueue()
        resultsProducers = multiprocessing.Queue()
        resultsConsumers = multiprocessing.Queue()
        hitHistogram_queue = multiprocessing.Queue()

        # Start producers/consumers
        num_consumers = 1
        num_producers = max(1, maxCores - 1)

        # Prep lock for progress update Producers
        progProducers = multiprocessing.Value('i', 0)
        # Prep lock for progress update Consumers
        progConsumers = multiprocessing.Value('i', 0)
        # Prep Consumers return values
        num_hits = multiprocessing.Value('i', 0)
        num_hits_suppressed = multiprocessing.Value('i', 0)

        logger.debug(
            'Using %d cores for searching / %d cores for dumping results' %
            (num_producers, num_consumers))

        # Queue tasks for Producers
        # Limit rowsPerJob to constrain memory use and ensure reasonable progress updates
        rowsPerJob = min((entriesCount / 8), 5000)
        logger.debug("RowsPerJob: %d" % rowsPerJob)
        num_tasks = 0
        for startingRowID in range(0, entriesCount - rowsPerJob, rowsPerJob):
            tasks.put(Task(startingRowID, rowsPerJob - 1))
            logger.debug(
                "Creating search job %d: [%d - %d]" %
                (num_tasks, startingRowID, startingRowID + rowsPerJob - 1))
            num_tasks += 1
        logger.debug("Creating search job %d: [%d - %d]" %
                     (num_tasks, num_tasks * (rowsPerJob),
                      ((num_tasks * rowsPerJob) +
                       (entriesCount - (num_tasks * (rowsPerJob) - 1)))))
        # Special consideration for the last one:
        tasks.put(
            Task(num_tasks * (rowsPerJob),
                 (entriesCount - ((num_tasks * rowsPerJob) - 1))))
        logger.debug("Number of tasks: %d" % num_tasks)

        # Add a poison pill for each producer
        for i in xrange(num_producers):
            tasks.put(None)

        # Start producer threads
        producers = [Producer(tasks, resultsProducers, dbfilenameFullPath, progProducers, num_consumers, \
                              searchType, search_space, options, num_hits, known_bad_data) for i in xrange(num_producers)]
        for producer in producers:
            producer.daemon = True  # Remove for debugging
            producer.start()

        # Start consumer threads
        consumers = [Consumer(resultsProducers, resultsConsumers, progConsumers, num_producers, outputFile, \
                              dbfilenameFullPath, searchType, search_space, options, num_hits, \
                              num_hits_suppressed, hitHistogram_queue, known_bad_data) for i in xrange(num_consumers)]
        for consumer in consumers:
            consumer.daemon = True  # Remove for debugging
            consumer.start()

        # Producer progress loop
        while (num_tasks > progProducers.value and progProducers.value >= 0):
            logger.debug("Producer num_tasks: %d - v.value: %d" %
                         (num_tasks, progProducers.value))
            update_progress(
                min(1,
                    float(progProducers.value) / float(num_tasks)),
                "Searching [%d]" %
                (num_hits.value - num_hits_suppressed.value))
            time.sleep(0.5)
        update_progress(
            1, "Searching [%d]" % (num_hits.value - num_hits_suppressed.value))

        # Wait for consumers dumping results to finish too
        while (num_hits.value > progConsumers.value
               and progConsumers.value >= 0):
            logger.debug("Consuming hit: %d / %d" %
                         (progConsumers.value, num_hits.value))
            update_progress(
                min(1,
                    float(progConsumers.value) / float(num_hits.value)),
                "Dumping results to disk [%d]" % progConsumers.value)
            time.sleep(0.5)

        # Make sure we dumped as many hits as we found
        assert (num_hits.value == progConsumers.value)
        update_progress(1,
                        "Dumping results to disk [%d]" % progConsumers.value)

        # Track Consumers deaths
        logger.debug("Waiting for consumer reverse-poison pills")
        while num_consumers > 0:
            tmp = resultsConsumers.get()
            # Check for reverse-poison pill
            if tmp is None:
                num_consumers -= 1
                logger.debug("Consumer finished!")
        logger.debug("All consumers accounted for")

        # Wait for consumer threads to finish
        logger.debug("Waiting for consumer threads to finish")
        for consumer in consumers:
            consumer.join()
        logger.debug("Consumer threads finished")

        # Print hit histogram:
        results = []
        results.append(('cyan', ("Hit histogram:", "", "")))
        while not hitHistogram_queue.empty():
            (name, regex, regex_hits) = hitHistogram_queue.get()
            results.append(('white', (name, regex, regex_hits)))
        if len(results) > 1:
            outputcolum(results)

    # Stop timer
    t1 = time.time()

    logger.info("Search hits: %d" % num_hits.value)
    logger.info("Suppresed duplicate hits: %d" % num_hits_suppressed.value)
    logger.info("Search time: %s" % (str(timedelta(seconds=(t1 - t0)))))

    if num_hits.value:
        logger.info("Head:")
        # Dump head of output file:
        num_lines = file_size(options.outputFile)
        from itertools import islice
        with open(options.outputFile) as myfile:
            head = list(islice(myfile, 5))
        for line in head:
            logger.info(line.strip('\n\r'))
        logger.info("(%d lines suppressed)" % max(0, (num_lines - 5)))

    return (num_hits.value, num_hits_suppressed.value, results)
Exemple #23
0
    def run(self):
        proc_name = self.name
        exitFlag = False
        hit_dict = {}
        logger.debug("%s - Starting consumer process" % (self.proc_name))

        # Init DB if required
        self.DB = appDB.DBClass(self.dbfilenameFullPath, True,
                                settings.__version__)
        self.conn = self.DB.appConnectDB()

        # Load known_bad if required
        if self.searchType == 'KNOWNBAD':
            (searchTermRegex, searchTermRegexFilters,
             known_bad_search_terms) = self.known_bad_data
            for x in known_bad_search_terms:
                hit_dict[x.regex] = [0, x.name, x.regex]

        # Open output files:
        tmp_counter = 0
        with open(self.outputFile, "w") as text_file:
            with open(
                    os.path.join(ntpath.dirname(self.outputFile),
                                 ntpath.splitext(self.outputFile)[0] + ".mmd"),
                    "w") as markdown_file:

                # While there are results to be processed we grab them and process them
                # todo: [High] We're holding all hits in memory now, stage file dumping activity?
                rowID_list = []
                while not exitFlag:
                    # Grab next result from queue
                    rowID = self.task_queue.get()
                    # Check for poison pill from Producers
                    if rowID is None:
                        self.num_producers -= 1
                        logger.debug(
                            "%s - Found one poison pill %d Producers left" %
                            (self.proc_name, self.num_producers))
                        # Check if all Producers have finished
                        if self.num_producers == 0:
                            # Reverse poison pill
                            self.result_queue.put(None)
                            logger.debug("%s - Exiting process" %
                                         (self.proc_name))
                            exitFlag = True
                            continue
                    else:
                        tmp_counter += 1
                        # logger.debug("%s - consuming hit #%d: %d" % (self.proc_name, tmp_counter, rowID))
                        rowID_list.append(rowID)

                # Finished grabbing rowID, now we dump them all:
                dumped_set = set()
                for rowID in rowID_list:
                    # Grab entry data we want to save to the output file:
                    record = retrieveSearchData(rowID, self.DB,
                                                self.search_space)

                    # De-dup results:
                    entryMD5 = hashlib.md5(''.join([
                        str(e) for e in [
                            record[0], record[1], record[2], record[3],
                            record[4], record[5], record[9]
                        ]
                    ])).hexdigest()
                    if entryMD5 in dumped_set:
                        # print("Suppressing row %d" % entry[6])
                        with self.num_hits_suppressed.get_lock():
                            self.num_hits_suppressed.value += 1
                    else:
                        dumped_set.add(entryMD5)
                        # Re-filter against known bad individually to build histogram and highlight
                        regex_hit_name = None
                        search_space = None
                        if self.searchType == 'KNOWNBAD':
                            # Search for known_bad one by one and filter if required
                            for x in list(known_bad_search_terms):
                                if re.compile(x.regex, re.IGNORECASE).search(
                                        str(record.Search_Space)) is not None:
                                    if x.filter is not None:
                                        if re.compile(
                                                x.filter,
                                                re.IGNORECASE).search(
                                                    str(record.Search_Space)
                                                ) is not None:
                                            regex_hit_name = x.name
                                            continue
                                    # 'u200b' is a zero width unicode character I have to use to avoid messy markdown highlighting:
                                    search_space = re.compile(
                                        '(.*)(' + x.regex + ')(.*)', re.I).sub(
                                            r'\1' + u'\u200b' + r'**' +
                                            u'\u200b' + r'\2' + u'\u200b' +
                                            '**' + u'\u200b' + r'\3',
                                            record.Search_Space, re.IGNORECASE)
                                    # Add hit to know_bad hit counter:
                                    regex_hit_name = x.name
                                    hit_dict[x.regex][0] += 1

                                    # We only report the match with the first regex from our set
                                    break
                            # Program flow should never really make it here :)
                            # assert(False, "We're in trouble")
                        else:
                            search_space = record.Search_Space
                            # search_space will be None if Producer hit but Consumer did not:
                            if search_space is None:
                                if regex_hit_name:
                                    logger.error(
                                        "Producer/Consumer hit mismatch (consumer filtered) ! (report bug please) sig: %s - %s"
                                        %
                                        (regex_hit_name, record.Search_Space))
                                else:
                                    logger.error(
                                        "Producer/Consumer hit mismatch! (report bug please) - %s"
                                        % record.Search_Space)
                                pass

                        # We dump the data to the output file/s
                        saveSearchData(record, self.searchType, regex_hit_name,
                                       text_file, markdown_file)

                    # Update progress counter
                    with self.val.get_lock():
                        self.val.value += 1

        # Dump hit histogram
        time.sleep(0.5)
        for x in sorted(hit_dict.values(),
                        key=operator.itemgetter(0),
                        reverse=True):
            if x[0] > 0:
                self.hitHistogram_queue.put((x[1], x[2], x[0]))
Exemple #24
0
    def run(self):
        DB = appDB.DBClass(self.dbfilenameFullPath, True, settings.__version__)
        DB.appInitDB()
        conn = DB.appConnectDB()
        filter_skipped = 0

        # While there are tasks to be ran we grab and run them
        while True:
            # Start timer
            t0 = time.time()
            taskRows = []

            # Grab next job from job queue
            next_task = self.task_queue.get()
            if next_task is None:
                # Poison pill means shutdown
                self.task_queue.task_done()
                # Pass poison pills
                for _ in xrange(self.num_consumers):
                    self.result_queue.put(None)
                    logger.debug("%s - Adding poison pill for consumer" %
                                 (self.proc_name))
                logger.debug("%s - Exiting process" % (self.proc_name))
                # We're skipping way to much stuff improve filter skipper counter to detect what regexes have to be tightened
                logger.debug("filter_skipped: %d" % filter_skipped)
                return

            # Grab job data
            (startingRowID, entriesPerJob) = next_task()
            with closing(conn.cursor()) as c:
                # Start timer
                t0 = time.time()
                logger.debug("%s - Starting query [%d / %d]. SearchSpace: %s" %
                             (self.proc_name, startingRowID, entriesPerJob,
                              self.search_space))
                if self.searchType == 'REGEX' or self.searchType == 'KNOWNBAD':
                    results = c.execute(
                        "SELECT RowID, " + self.search_space +
                        " AS SearchSpace FROM Entries_FilePaths \
                                        WHERE RowID >= %d AND RowID <= %d" %
                        (startingRowID, startingRowID + entriesPerJob))
                elif self.searchType == 'LITERAL' or self.searchType == 'COMBINED':
                    if self.search_modifier_Literal in [">", "<"]:
                        results = c.execute(
                            "SELECT RowID, " + self.search_space +
                            " AS SearchSpace FROM Entries_FilePaths \
                                            WHERE RowID >= %d AND RowID <= %d \
                                            AND SearchSpace %s '%s'" %
                            (startingRowID, startingRowID + entriesPerJob, self
                             .search_modifier_Literal, self.searchTermLiteral))
                    else:
                        results = c.execute(
                            "SELECT RowID, " + self.search_space +
                            " AS SearchSpace FROM Entries_FilePaths \
                                            WHERE RowID >= %d AND RowID <= %d \
                                            AND SearchSpace LIKE '%s'" %
                            (startingRowID, startingRowID + entriesPerJob,
                             self.searchTermLiteral))
                else:
                    logger.error("Unknown searchType %s" % (self.searchType))

                t1 = time.time()
                logger.debug("%s - Execute time: %s seconds" %
                             (self.proc_name, "{0:.4f}".format(t1 - t0)))
                rows = c.fetchall()
                t2 = time.time()
                logger.debug("%s - Fetchall time: %s seconds (%s / %s)" %
                             (self.proc_name, "{0:.4f}".format(t2 - t1),
                              startingRowID, entriesPerJob))

                # Process row per row:
                for row in rows:
                    if row[1] is not None:
                        if self.searchType == 'LITERAL':
                            self.addHit(int(row[0]))
                        elif self.searchType == 'REGEX' or self.searchType == 'COMBINED':
                            if re_fn(self.searchTermRegex, str(row[1])):
                                self.addHit(int(row[0]))
                        elif self.searchType == 'KNOWNBAD':
                            # Search for known bads with no filters:
                            if self.searchTermRegex != "()":
                                if re_fn(self.searchTermRegex, str(row[1])):
                                    self.addHit(int(row[0]))
                            # Search for known bads which have a filter associated:
                            for x in list(self.known_bad_with_filter):
                                assert (x.filter is not None)
                                if re.compile(x.regex, re.IGNORECASE).search(
                                        str(row[1])) is not None:
                                    if re.compile(x.filter,
                                                  re.IGNORECASE).search(
                                                      str(row[1])) is None:
                                        self.addHit(int(row[0]))
                                        # One hit is enough for us
                                        break
                                    # fixme:
                                    else:
                                        filter_skipped += 1
                        else:
                            logger.error("Unknown searchType %s" %
                                         (self.searchType))

            t3 = time.time()
            logger.debug("%s - REGEX filtering time: %s seconds (%s / %s)" %
                         (self.proc_name, "{0:.4f}".format(t3 - t2),
                          startingRowID, entriesPerJob))
            if (t3 - t2) > 30:
                logger.warning(
                    "Warning: Producer queues clogged, throttling down.")
            logger.debug(
                "%s Task results: %d execution time: %s seconds" %
                (self.proc_name, len(taskRows), "{0:.4f}".format(t3 - t0)))

            # Update progress counter
            with self.val.get_lock():
                self.val.value += 1
            self.task_queue.task_done()
        logger.warning("%s - Abnormal exit" % (self.proc_name))
Exemple #25
0
def appLoadMP(pathToLoad, dbfilenameFullPath, maxCores, governorOffFlag):
    global _tasksPerJob

    files_to_process = []
    conn = None

    # Start timer
    t0 = datetime.now()

    logger.debug("Starting appLoadMP")
    # Calculate aggreagate file_filter for all ingest types supported:
    file_filter = '|'.join([v.getFileNameFilter() for k,v in ingest_plugins.iteritems()])
    # Add zip extension
    file_filter += "|.*\.zip"

    # Check if we're loading Redline data
    if os.path.isdir(pathToLoad) and os.path.basename(pathToLoad).lower() == 'RedlineAudits'.lower():
        files_to_process = searchRedLineAudits(pathToLoad)
    else:
        # Search for all files to be processed
        if os.path.isdir(pathToLoad):
            files_to_process = searchFolders(pathToLoad, file_filter)
        else:
            files_to_process = processArchives(pathToLoad, file_filter)

    if files_to_process:
        # Init DB if required
        DB = appDB.DBClass(dbfilenameFullPath, True, settings.__version__)
        conn = DB.appConnectDB()

        # Extract hostnames, grab existing host IDs from DB and calculate instance ID for new IDs to be ingested:
        instancesToProcess = []
        instancesToProcess += GetIDForHosts(files_to_process, DB)
        countInstancesToProcess = len(instancesToProcess)
        logger.info("Found %d new instances" % (countInstancesToProcess))

        # Setup producers/consumers initial counts
        num_consumers = 1
        num_producers = 1

        # Setup MPEngine
        mpe = MPEngineProdCons(maxCores, appLoadProd, appLoadCons, governorOffFlag)

        # Reduce _tasksPerJob for small jobs
        if countInstancesToProcess < _tasksPerJob: _tasksPerJob = 1

        # Create task list
        task_list = []
        instancesPerJob = _tasksPerJob
        num_tasks = 0
        for chunk in chunks(instancesToProcess, instancesPerJob):
            # todo: We no longer need pathToLoad as tasks include the fullpath now
            task_list.append(Task(pathToLoad, chunk))
            num_tasks += 1

        if num_tasks > 0:
            # Check if we have to drop indexes to speedup insertions
            # todo: Research ratio of existing hosts to new hosts were this makes sense
            if countInstancesToProcess > 1000 or DB.CountHosts() < 1000:
                DB.appDropIndexesDB()

            # Queue tasks for Producers
            mpe.addTaskList(task_list)

            # Start procs
            mpe.startProducers(num_producers)
            mpe.startConsumers(num_consumers, [dbfilenameFullPath])
            # mpe.addProducer()

            # Control loop
            while mpe.working():
                time.sleep(1.0)
                (num_producers,num_consumers,num_tasks,progress_producers,progress_consumers) = mpe.getProgress()
                elapsed_time = datetime.now() - t0
                mean_loadtime_per_host = (elapsed_time) / max(1, _tasksPerJob * progress_consumers)
                pending_hosts = ((num_tasks * _tasksPerJob) - (_tasksPerJob * progress_consumers))
                etr = (mean_loadtime_per_host * pending_hosts)
                eta = t0 + elapsed_time + etr
                ett = (eta - t0)
                if settings.logger_getDebugMode(): status_extra_data = " Prod: %s Cons: %s (%d -> %d -> %d: %d) [RAM: %d%% / Obj: %d / ETH: %s / ETA: %s / ETT: %s]" % \
                                                                       (num_producers, num_consumers, num_tasks, progress_producers, progress_consumers, progress_producers - progress_consumers,
                     psutil_phymem_usage(), len(gc.get_objects()),
                     mean_loadtime_per_host if progress_consumers * _tasksPerJob > 100 else "N/A",
                     str(eta.time()).split(".")[0] if progress_consumers * _tasksPerJob > 100 else "N/A",
                     str(ett).split(".")[0] if progress_consumers * _tasksPerJob > 100 else "N/A")
                else: status_extra_data = ""
                # logger.info("Parsing files%s" % status_extra_data)

                logger.info(update_progress(min(1,float(progress_consumers) / float(num_tasks)), "Parsing files%s" % status_extra_data, True))
                mpe.rebalance()

            del mpe

        # Stop timer
        elapsed_time = datetime.now() - t0
        mean_loadtime_per_host = (elapsed_time) / max(1, countInstancesToProcess)
        logger.info("Load speed: %s seconds / file" % (mean_loadtime_per_host))
        logger.info("Load time: %s" % (str(elapsed_time).split(".")[0]))
    else:
        logger.info("Found no files to process!")