doInitialWarming = int( INCREMENTAL_INDEXER_INITIAL_WARMING)
 deleteAfterIndexing = int( DELETE_MARC_RECORDS_FROM_INDEX_QUEUE )
 
 # NOTE: we *cannot* use a job scheduler like Quartz here because we don't want overlapping jobs or jobs building up.
 lastRunTime = lastOptimizeTime = lastFacetWarmTime = time.time()
 firstTime = 1
 keepgoing = 1
 count = 0
 startDate = int( time.strftime("%d") )
 while keepgoing:
     now = time.time()
     ## 1st do incremental indexing if nec.
     if firstTime or ( (now - lastRunTime) >= checkInterval ):
         if firstTime and doInitialWarming:
             #a. warm facets if the very first time it has run
             facetWarmer.warmFacets()
             lastFacetWarmTime = time.time()
         # b. check for PMS/MARC files in indexer-queue directory and handle if necessary.
         # NOTE: it only does this check the first time through, so setting deleteAfterIndexing to false
         # will not cause the same files to keep getting reindexed.
         count += horizonIncrementalIndexer.processFilesInDirectory("./indexer-queue", anselUnicodeConverter = converter, numThreads=numThreads, deleteAfterIndexing=deleteAfterIndexing)
         print "[%s] done processing indexer-queue contents" % time.ctime()
         
         # c. do status changes.
         if doItemStatusIndexing:
             count += horizonIncrementalIndexer.incrementalIndexingJob(commitNonblocking=1)    
         lastRunTime = time.time()
         firstTime = 0
     else:
         print "not time to run yet, last ran %.4f ago" % (now-lastRunTime)
     ## 2nd do optimize if nec.
        resp = solrConnection.postURL(SOLR_UPDATE_URL, "<add>%s</add>" % data)
        if resp.find('<result status="1"') > -1:
            print "\nError POSTing documents!  Response from Solr was\n\n%s\n\n" % resp
    print "committing..."
    if nonblocking:
        solrConnection.commitNonblocking()
    else:
        solrConnection.commit()
    inStream.close()
    return count


if __name__ == "__main__":
    anselUnicodeConverter = AnselToUnicode()
    # 2 arguments to command line are used to do a ran
    if len(sys.argv) == 2:
        processFile(sys.argv[1], anselUnicodeConverter)
    else:
        print "incorrect usage -- specify file to be processed."
        sys.exit(1)
    print "done indexing, now optimizing"
    optimize()
    print "done optimizing, now warming facets"
    from facetWarmer import *

    for i in range(3):
        # running warmFacets more than once appears to improve performance.
        facetWarmer.warmFacets(server=SOLR_QUERY_URL)

    print "all done!"
Ejemplo n.º 3
0
            recordBatch = []
        print("+%s+" % bibOn),
    # now do last batch
    if len(recordBatch) > 0:
        data = u''.join(recordBatch)
        resp = solrConnection.postURL(SOLR_UPDATE_URL, "<add>%s</add>" % data)
    if bibCount > 0:
        print "\n[%s] done updating bibs, now committing" % time.ctime()
        try:
            if commitNonblocking:
                solrConnection.commitNonblocking()
            else:
                solrConnection.commit()
        except IOError:
            print "Connection reset when talking to Solr, skipping this commit and sleeping 10 sec."
            time.sleep(10)
        print "[%s] done committing" % time.ctime()
    else:
        print "[%s] no bibs updated, exiting" % time.ctime()
    return bibCount


if __name__ == '__main__':

    processFilesInDirectory(HORIZON_BASE_DIR)
    # finally, do an optimize here
    if DO_OPTIMIZE:
        print "starting final optimize"
        solrConnection.optimize()  # csdebug
        facetWarmer.warmFacets()
Ejemplo n.º 4
0
        resp = solrConnection.postURL(SOLR_UPDATE_URL, "<add>%s</add>" % data)
        if resp.find('<result status="1"') > -1:
            print "\nError POSTing documents!  Response from Solr was\n\n%s\n\n" % resp
    print "committing..."
    if nonblocking:
        solrConnection.commitNonblocking()
    else:
        solrConnection.commit()
    inStream.close()
    return count


if __name__ == '__main__':
    anselUnicodeConverter = AnselToUnicode()
    # 2 arguments to command line are used to do a ran
    if len(sys.argv) == 2:
        processFile(sys.argv[1], anselUnicodeConverter)
    else:
        print "incorrect usage -- specify file to be processed."
        sys.exit(1)
    print "done indexing, now optimizing"
    optimize()
    print "done optimizing, now warming facets"
    from facetWarmer import *
    for i in range(3):
        # running warmFacets more than once appears to improve performance.
        facetWarmer.warmFacets(server=SOLR_QUERY_URL)

    print "all done!"