def run(solr_url, query, repetitions=1): repetitions = int(repetitions) if (os.path.exists(query)): queries = load_queries(query) log.info("Loaded %s queries from: %s" % (len(queries), query)) else: queries = [query] results = {} for i in (range(repetitions)): log.info("Starting iteration: #%s" % i) for q in queries: log.info("%s" % q) rsp = req(solr_url, q=q, rows=0) if (not rsp['responseHeader'].has_key('status') or rsp['responseHeader']['status'] != 0): log.error("Error searching: %s" % str(rsp)) continue if (results.has_key(q)): results[q].add(rsp['responseHeader']['QTime'], rsp['response']['numFound']) else: results[q] = DataPoint(q, rsp['responseHeader']['QTime'], rsp['response']['numFound']) #print "%50s\t%10s\t\t%10s\t%10s\t%10s\t%10s\t%10s" % ("Query", "QTime", "numFound", "minQTime", "maxQTime", "#invocations", "return consistent") print "%s\t%s\t%s\t%s\t%s\t%s\t%s" % ("Query", "QTime", "numFound", "minQTime", "maxQTime", "#invocations", "return consistent") items = sorted(results.items(), key=lambda x: x[1].data[0]) for k,v in items: print str(v)
def recreate_index(solr_url, max_time=3600, delay=5, handler_name='/invenio/update', maximport=500, batchsize=2000, startfrom=-1, inveniourl='python://search', importurl='/invenio/import?command=full-import&dirs=', updateurl='/invenio/import?command=full-import&dirs=', deleteurl='blankrecords', doctor_handler='/invenio-doctor' ): up_url = solr_url + handler_name doctor_url = solr_url + doctor_handler delay = int(delay) max_time=int(max_time) batchsize=int(batchsize) start = time.time() log.info("Starting index (re)build from the scratch") log.info(""" solr_url=%s max_time=%s delay=%s handler_name=%s maximport=%s batchsize=%s inveniourl=%s importurl=%s updateurl=%s deleteurl=%s startfrom=%s doctor_handler=%s """ % (solr_url, max_time, delay, handler_name, maximport, batchsize, inveniourl, importurl, updateurl, deleteurl, startfrom, doctor_handler)) params = dict(maximport=maximport, batchsize=batchsize, inveniourl=inveniourl, importurl=importurl, updateurl=updateurl, deleteurl=deleteurl) rsp = req(up_url, last_recid=startfrom, **params) round = 0 recs = 0 last_round = time.time() now = time.time() idtoken = '#0' i = 0 while (now - start) < max_time: i = i + 1 idtoken = '#%s' % i rsp = req(up_url, idtoken=idtoken, **params) now = time.time() if 'idtoken' not in rsp: break if rsp['idtoken'] != idtoken: time.sleep(delay) continue round = round + 1 recs = recs + batchsize log.info('Indexing (round/recs/last-round-s/total-s/avg-recs-per-sec): %s./%s/%.3f/%.3f/%.3f' % (round, recs, time.time() - last_round, time.time()-start, recs/(time.time()-start))) last_round = time.time() #req(doctor_url, command="start") # just make sure the doctor is running log.info('Stopped at round: %s, total time: %s' % (round, time.time() - start)) req(doctor_url, command="start") time.sleep(1) while (now - start) < max_time: rsp = req(doctor_url, command="info") if rsp['status'] == 'idle': break now = time.time() time.sleep(delay) rsp = req(doctor_url, command="detailed-info") log.info("Indexing finished, here is status info from: %s" % doctor_url) log.info(pprint.pformat(rsp, indent=2, width=200)) rsp = req(solr_url + "/update", commit="true") req(doctor_url, command="discover") req(doctor_url, command="start") time.sleep(1) while (now - start) < max_time: rsp = req(doctor_url, command="info") if rsp['status'] == 'idle': break now = time.time() time.sleep(delay) rsp = req(doctor_url, command="show-missing") log.info("Did we get any missing records? %s" % doctor_url) log.info(pprint.pformat(rsp, indent=2, width=200)) rsp = req(solr_url + "/update", commit="true") log.info("commit was called")