def test_backgroundIndexTask2(self): # break the process into two batches, simulate restart engine TEST_FILES = ["200(getopt_org).mlog", "gif.qlog", "empty_response.mlog"] # 1 # 2 - weed # 3 - bad queued = self._fetch_qlogs(TEST_FILES) transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(True) self.assertEqual((transformed, indexed, discarded), (1, 1, 2)) self._check_archive_doc("000999000", "Luke - Lucene Index Toolbox", "uri: http://www.getopt.org/luke/") # simulate restarting engine by reinstantiate idCounter docarchive.idCounter = docarchive.IdCounter() # second batch TEST_FILES = [ "gzipped(slashdot).mlog", # 4 "favicon.ico_text(nutch).mlog", # 5 - weed txt "plaintext.mlog", # 6 ] queued = self._fetch_qlogs(TEST_FILES) transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(True) self.assertEqual((transformed, indexed, discarded), (2, 2, 1)) self._check_archive_doc("000999000", "Luke - Lucene Index Toolbox", "uri: http://www.getopt.org/luke/") self._check_archive_doc("000999001", "Slashdot: News for nerds, stuff that matters") self._check_archive_doc("000999002", "All rights reserved.")
def test_backgroundIndexTask1(self): # break the process into two batches TEST_FILES = [ '200(getopt_org).mlog', # 1 'gif.qlog', # 2 - weed 'empty_response.mlog', # 3 - bad ] queued = self._fetch_qlogs(TEST_FILES) transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(True) self.assertEqual((transformed, indexed, discarded), (1,1,2)) self._check_archive_doc('000999000', 'Luke - Lucene Index Toolbox', 'uri: http://www.getopt.org/luke/') # second batch TEST_FILES = [ 'gzipped(slashdot).mlog', # 4 'favicon.ico_text(nutch).mlog', # 5 - weed txt 'plaintext.mlog', # 6 ] queued = self._fetch_qlogs(TEST_FILES) transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(True) self.assertEqual((transformed, indexed, discarded), (2,2,1)) self._check_archive_doc('000999000', 'Luke - Lucene Index Toolbox', 'uri: http://www.getopt.org/luke/') self._check_archive_doc('000999001', 'Slashdot: News for nerds, stuff that matters') self._check_archive_doc('000999002', 'All rights reserved.')
def main(rfile, wfile, env): form = cgi.FieldStorage(fp=rfile, environ=env) transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(forceIndex=True) msg = 'Indexed %s discarded %s' % (indexed, discarded) wfile.write( """Content-type: text/html\r Cache-control: no-cache\r \r """) from minds import app_httpserver app_httpserver.forwardTmpl(wfile, env, 'template.html', templateTmpl, msg)
def test_backgroundIndexTask(self): TEST_FILES = [ "200(getopt_org).mlog", # 1 "gif.qlog", # 2 - weed "empty_response.mlog", # 3 - bad "gzipped(slashdot).mlog", # 4 "favicon.ico_text(nutch).mlog", # 5 - weed txt "plaintext.mlog", # 6 ] queued = self._fetch_qlogs(TEST_FILES) transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(True) self.assertEqual((transformed, indexed, discarded), (3, 3, 3)) self._check_archive_doc("000999000", "Luke - Lucene Index Toolbox", "uri: http://www.getopt.org/luke/") self._check_archive_doc("000999001", "Slashdot: News for nerds, stuff that matters") self._check_archive_doc("000999002", "All rights reserved.")
def doIndexNow(wfile, req): transformed, indexed, discarded = qmsg_processor.backgroundIndexTask(forceIndex=True) response.redirect(wfile, '/history')