Exemple #1
0
def run(solr_url, query, repetitions=1):
    
    repetitions = int(repetitions)
    
    if (os.path.exists(query)):
        queries = load_queries(query)
        log.info("Loaded %s queries from: %s" % (len(queries), query))
    else:
        queries = [query]
    
    results = {}
    
    for i in (range(repetitions)):
        log.info("Starting iteration: #%s" % i)
        for q in queries:
            log.info("%s" % q)
            rsp = req(solr_url, q=q, rows=0)
            
            if (not rsp['responseHeader'].has_key('status') or rsp['responseHeader']['status'] != 0):
                log.error("Error searching: %s" % str(rsp))
                continue
            
            if (results.has_key(q)):
                results[q].add(rsp['responseHeader']['QTime'], rsp['response']['numFound'])
            else:
                results[q] = DataPoint(q, rsp['responseHeader']['QTime'], rsp['response']['numFound'])
            
                
    #print "%50s\t%10s\t\t%10s\t%10s\t%10s\t%10s\t%10s" % ("Query", "QTime", "numFound", "minQTime", "maxQTime", "#invocations", "return consistent")
    print "%s\t%s\t%s\t%s\t%s\t%s\t%s" % ("Query", "QTime", "numFound", "minQTime", "maxQTime", "#invocations", "return consistent")
            
    items = sorted(results.items(), key=lambda x: x[1].data[0])
    for k,v in items:
        print str(v)
Exemple #2
0
def recreate_index(solr_url, 
                   max_time=3600,
                   delay=5,
                   handler_name='/invenio/update',
                   maximport=500,
                   batchsize=2000,
                   startfrom=-1,
                   inveniourl='python://search',
                   importurl='/invenio/import?command=full-import&dirs=',
                   updateurl='/invenio/import?command=full-import&dirs=',
                   deleteurl='blankrecords',
                   doctor_handler='/invenio-doctor'
                   ):
    
    up_url = solr_url + handler_name
    doctor_url = solr_url + doctor_handler
    
    delay = int(delay)
    max_time=int(max_time)
    batchsize=int(batchsize)
    
    start = time.time()
    
    log.info("Starting index (re)build from the scratch")
    log.info("""
    solr_url=%s
    max_time=%s
    delay=%s
    handler_name=%s
    maximport=%s
    batchsize=%s
    inveniourl=%s
    importurl=%s
    updateurl=%s
    deleteurl=%s
    startfrom=%s
    doctor_handler=%s
    """ % (solr_url, max_time, delay, handler_name, maximport, batchsize, inveniourl, importurl, updateurl,
           deleteurl, startfrom, doctor_handler))
    
    params = dict(maximport=maximport, batchsize=batchsize,
                  inveniourl=inveniourl, importurl=importurl,
                  updateurl=updateurl, deleteurl=deleteurl)
    
    
    rsp = req(up_url, last_recid=startfrom, **params)
    
    
    round = 0
    recs = 0
    last_round = time.time()
    now = time.time()
    idtoken = '#0'
    i = 0
    
    while (now - start) < max_time: 
        i = i + 1
        idtoken = '#%s' % i
        
        rsp = req(up_url, idtoken=idtoken, **params)
        now = time.time()
        
        if 'idtoken' not in rsp:
            break
        
        if rsp['idtoken'] != idtoken:
            time.sleep(delay)
            continue
        
        
        round = round + 1
        
        recs = recs + batchsize
        
        log.info('Indexing (round/recs/last-round-s/total-s/avg-recs-per-sec): %s./%s/%.3f/%.3f/%.3f' 
                 % (round, recs, time.time() - last_round, time.time()-start, recs/(time.time()-start)))
        
        last_round = time.time()
        #req(doctor_url, command="start") # just make sure the doctor is running
            
        
    log.info('Stopped at round: %s, total time: %s' % (round, time.time() - start))
    
    req(doctor_url, command="start")
    time.sleep(1)
    while (now - start) < max_time:
        rsp = req(doctor_url, command="info")
        if rsp['status'] == 'idle':
            break
        now = time.time()
        time.sleep(delay)
        
    rsp = req(doctor_url, command="detailed-info")
    log.info("Indexing finished, here is status info from: %s" % doctor_url)
    log.info(pprint.pformat(rsp, indent=2, width=200))
    
    rsp = req(solr_url + "/update", commit="true")
    
    req(doctor_url, command="discover")
    req(doctor_url, command="start")
    time.sleep(1)
    while (now - start) < max_time:
        rsp = req(doctor_url, command="info")
        if rsp['status'] == 'idle':
            break
        now = time.time()
        time.sleep(delay)
        
    rsp = req(doctor_url, command="show-missing")
    log.info("Did we get any missing records? %s" % doctor_url)
    log.info(pprint.pformat(rsp, indent=2, width=200))

    
    rsp = req(solr_url + "/update", commit="true")
    log.info("commit was called")