doInitialWarming = int( INCREMENTAL_INDEXER_INITIAL_WARMING) deleteAfterIndexing = int( DELETE_MARC_RECORDS_FROM_INDEX_QUEUE ) # NOTE: we *cannot* use a job scheduler like Quartz here because we don't want overlapping jobs or jobs building up. lastRunTime = lastOptimizeTime = lastFacetWarmTime = time.time() firstTime = 1 keepgoing = 1 count = 0 startDate = int( time.strftime("%d") ) while keepgoing: now = time.time() ## 1st do incremental indexing if nec. if firstTime or ( (now - lastRunTime) >= checkInterval ): if firstTime and doInitialWarming: #a. warm facets if the very first time it has run facetWarmer.warmFacets() lastFacetWarmTime = time.time() # b. check for PMS/MARC files in indexer-queue directory and handle if necessary. # NOTE: it only does this check the first time through, so setting deleteAfterIndexing to false # will not cause the same files to keep getting reindexed. count += horizonIncrementalIndexer.processFilesInDirectory("./indexer-queue", anselUnicodeConverter = converter, numThreads=numThreads, deleteAfterIndexing=deleteAfterIndexing) print "[%s] done processing indexer-queue contents" % time.ctime() # c. do status changes. if doItemStatusIndexing: count += horizonIncrementalIndexer.incrementalIndexingJob(commitNonblocking=1) lastRunTime = time.time() firstTime = 0 else: print "not time to run yet, last ran %.4f ago" % (now-lastRunTime) ## 2nd do optimize if nec.
resp = solrConnection.postURL(SOLR_UPDATE_URL, "<add>%s</add>" % data) if resp.find('<result status="1"') > -1: print "\nError POSTing documents! Response from Solr was\n\n%s\n\n" % resp print "committing..." if nonblocking: solrConnection.commitNonblocking() else: solrConnection.commit() inStream.close() return count if __name__ == "__main__": anselUnicodeConverter = AnselToUnicode() # 2 arguments to command line are used to do a ran if len(sys.argv) == 2: processFile(sys.argv[1], anselUnicodeConverter) else: print "incorrect usage -- specify file to be processed." sys.exit(1) print "done indexing, now optimizing" optimize() print "done optimizing, now warming facets" from facetWarmer import * for i in range(3): # running warmFacets more than once appears to improve performance. facetWarmer.warmFacets(server=SOLR_QUERY_URL) print "all done!"
recordBatch = [] print("+%s+" % bibOn), # now do last batch if len(recordBatch) > 0: data = u''.join(recordBatch) resp = solrConnection.postURL(SOLR_UPDATE_URL, "<add>%s</add>" % data) if bibCount > 0: print "\n[%s] done updating bibs, now committing" % time.ctime() try: if commitNonblocking: solrConnection.commitNonblocking() else: solrConnection.commit() except IOError: print "Connection reset when talking to Solr, skipping this commit and sleeping 10 sec." time.sleep(10) print "[%s] done committing" % time.ctime() else: print "[%s] no bibs updated, exiting" % time.ctime() return bibCount if __name__ == '__main__': processFilesInDirectory(HORIZON_BASE_DIR) # finally, do an optimize here if DO_OPTIMIZE: print "starting final optimize" solrConnection.optimize() # csdebug facetWarmer.warmFacets()
resp = solrConnection.postURL(SOLR_UPDATE_URL, "<add>%s</add>" % data) if resp.find('<result status="1"') > -1: print "\nError POSTing documents! Response from Solr was\n\n%s\n\n" % resp print "committing..." if nonblocking: solrConnection.commitNonblocking() else: solrConnection.commit() inStream.close() return count if __name__ == '__main__': anselUnicodeConverter = AnselToUnicode() # 2 arguments to command line are used to do a ran if len(sys.argv) == 2: processFile(sys.argv[1], anselUnicodeConverter) else: print "incorrect usage -- specify file to be processed." sys.exit(1) print "done indexing, now optimizing" optimize() print "done optimizing, now warming facets" from facetWarmer import * for i in range(3): # running warmFacets more than once appears to improve performance. facetWarmer.warmFacets(server=SOLR_QUERY_URL) print "all done!"