def test_backgroundIndexTask2(self):

        # break the process into two batches, simulate restart engine

        TEST_FILES = ["200(getopt_org).mlog", "gif.qlog", "empty_response.mlog"]  # 1  # 2 - weed  # 3 - bad
        queued = self._fetch_qlogs(TEST_FILES)

        transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(True)
        self.assertEqual((transformed, indexed, discarded), (1, 1, 2))

        self._check_archive_doc("000999000", "Luke - Lucene Index Toolbox", "uri: http://www.getopt.org/luke/")

        # simulate restarting engine by reinstantiate idCounter
        docarchive.idCounter = docarchive.IdCounter()

        # second batch
        TEST_FILES = [
            "gzipped(slashdot).mlog",  # 4
            "favicon.ico_text(nutch).mlog",  # 5 - weed txt
            "plaintext.mlog",  # 6
        ]
        queued = self._fetch_qlogs(TEST_FILES)

        transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(True)
        self.assertEqual((transformed, indexed, discarded), (2, 2, 1))

        self._check_archive_doc("000999000", "Luke - Lucene Index Toolbox", "uri: http://www.getopt.org/luke/")
        self._check_archive_doc("000999001", "Slashdot: News for nerds, stuff that matters")
        self._check_archive_doc("000999002", "All rights reserved.")
    def test_backgroundIndexTask1(self):

        # break the process into two batches

        TEST_FILES = [
            '200(getopt_org).mlog',         # 1
            'gif.qlog',                     # 2 - weed
            'empty_response.mlog',          # 3 - bad
        ]
        queued = self._fetch_qlogs(TEST_FILES)

        transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(True)
        self.assertEqual((transformed, indexed, discarded), (1,1,2))

        self._check_archive_doc('000999000', 'Luke - Lucene Index Toolbox', 'uri: http://www.getopt.org/luke/')

        # second batch
        TEST_FILES = [
            'gzipped(slashdot).mlog',       # 4
            'favicon.ico_text(nutch).mlog', # 5 - weed txt
            'plaintext.mlog',               # 6
        ]
        queued = self._fetch_qlogs(TEST_FILES)

        transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(True)
        self.assertEqual((transformed, indexed, discarded), (2,2,1))

        self._check_archive_doc('000999000', 'Luke - Lucene Index Toolbox', 'uri: http://www.getopt.org/luke/')
        self._check_archive_doc('000999001', 'Slashdot: News for nerds, stuff that matters')
        self._check_archive_doc('000999002', 'All rights reserved.')
def main(rfile, wfile, env):

    form = cgi.FieldStorage(fp=rfile, environ=env)

    transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(forceIndex=True)
    msg = 'Indexed %s discarded %s' % (indexed, discarded)

    wfile.write(
"""Content-type: text/html\r
Cache-control: no-cache\r
\r
""")
    from minds import app_httpserver
    app_httpserver.forwardTmpl(wfile, env, 'template.html', templateTmpl, msg)
    def test_backgroundIndexTask(self):

        TEST_FILES = [
            "200(getopt_org).mlog",  # 1
            "gif.qlog",  # 2 - weed
            "empty_response.mlog",  # 3 - bad
            "gzipped(slashdot).mlog",  # 4
            "favicon.ico_text(nutch).mlog",  # 5 - weed txt
            "plaintext.mlog",  # 6
        ]
        queued = self._fetch_qlogs(TEST_FILES)

        transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(True)
        self.assertEqual((transformed, indexed, discarded), (3, 3, 3))

        self._check_archive_doc("000999000", "Luke - Lucene Index Toolbox", "uri: http://www.getopt.org/luke/")
        self._check_archive_doc("000999001", "Slashdot: News for nerds, stuff that matters")
        self._check_archive_doc("000999002", "All rights reserved.")
def doIndexNow(wfile, req):
    transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(forceIndex=True)
    response.redirect(wfile, '/history')