def sciencedata(self): sciencedata_prefix = "https://labcas-dev.jpl.nasa.gov/collections/collections/" results = [] solr_collection = Solr(base_url='http://localhost:8983/solr/collections', version=4) solr_dataset = Solr(base_url='http://localhost:8983/solr/datasets', version=4) collection_query = {'q': '*:*'} collection_response = solr_collection.search(**collection_query) for obj in collection_response.documents: if obj.get("CollectionName") and obj.get("id"): dataset_query = {'q': '*:*', 'fq': "CollectionId='{}'".format(obj.get("id"))} dataset_response = solr_dataset.search(**dataset_query) datasetcount = self.countDatasets(dataset_response.documents) results.append(dict( collectionname=obj["CollectionName"], description=obj.get("CollectionDescription","None"), url=sciencedata_prefix+obj["id"], leadpi=obj.get("LeadPI",["None"]), organ=obj.get("OrganSite",["No Organ info"]), discipline=obj.get("Discipline",["None"]), protocol=obj.get("ProtocolId",["None"]), qastate=obj.get("QAState", ["None"]), species=obj.get("Species", ["None"]), datasetcount=datasetcount )) results.sort(lambda a, b: cmp(a['collectionname'], b['collectionname'])) return results
def test_search_persistent(self): solr = Solr(os.getenv('SOLR_URL'), persistent=True, use_get=True) for _ in xrange(10): response = solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, 4563722) self.assertEqual(len(response.documents), 10)
def query_solr(): query_string = parse_json(json.loads(request.data)) solr = Solr("http://52.76.188.127:8983/solr/clickstream_event_shard1_replica1/") solr_response = solr.search(q=query_string) return json.dumps(solr_response.documents)
class VIVOService(object): def __init__(self): from mysolr import Solr surl = get_env('SOLR_URL') self.solr = Solr(surl) def get(self, query, class_type): out = [] #Will use acNameStemmed for now. Can construct a more intelligent query #later if necessary. query = { 'q': u'acNameStemmed:{0} type:{1}'.format(query, class_type), 'fl': 'URI,nameRaw,PREFERRED_TITLE', 'rows': 20 } response = self.solr.search(**query) #Massage the Solr response. for doc in response.documents: d = {} d['uri'] = doc['URI'] d['id'] = doc['URI'] d['text'] = "{} - {}".format( doc['nameRaw'][0], doc['PREFERRED_TITLE'][0] ) out.append(d) return out
class call_number_app(object): def __init__(self,**kwargs): """ The `call_number_app` takes a number of optional parameters including an URL where the Aristotle Library Apps instance is currently running. :param url: URL of Aristotle Library Apps path to the call number app, defaults to http://0.0.0.0/apps/call_number/json/. """ if kwargs.has_key("url"): self.call_number_url = kwargs.get("url") else: self.call_number_url = "http://0.0.0.0/apps/call_number/json/" self.solr = Solr(base_url=settings.SOLR_URL) def json_search(self,request): """ Performs a call number search using JSON interface to the call number app. Results are returned as JSON. :param request: Django request """ call_number = request.REQUEST.get('q') if request.REQUEST.has_key("number_type"): number_type = request.REQUEST.get('number_type') else: number_type = 'lccn' context = {'docs':None} json_search_url = os.path.join(self.call_number_url, 'term_search') json_search_url = "{0}?call_number={1}&slice-size={2}&type={3}".format(json_search_url, call_number.strip(), int(settings.ITEMS_PER_PAGE) - 3, number_type) json_results = urllib2.urlopen(json_search_url).read() results = json.load(urllib2.urlopen(json_search_url)) if len(results.get("bib_numbers")) > 0: context['docs'] = [] for bib_num in results.get("bib_numbers"): query = {"q":bib_num, "qt":"dismax", "fl":"*"} response = self.solr.search(**query) for doc in response.documents: context["docs"].append(doc) # Iterate through and create record_urls for doc in context['docs']: doc['record_url'] = settings.CATALOG_RECORD_URL.format(doc['id']) context['current_sort'] = None context['sorts'] = [x[0] for x in settings.SORTS] context['start_number'] = 1 context['end_number'] = min(results, settings.ITEMS_PER_PAGE) return context
def solr_search(self, query): """Do the solr search and pass back results""" output_dict = {} # Setup connections solr = Solr(self.server,version=4) #UNLIMITED_ROWS = 10000000 # necessary because default in mysolr is mere 10 # Run the search search_results = solr.search(**query) # Format results for pdb in search_results.documents: output_dict[pdb.get('pdb_id').upper()] = {'description': pdb.get('molecule_name')[0]} return output_dict
def getSingleObjects(id_list, start): smCount = 1 tcount = 0 solr = Solr('http://localhost:8080/solr4/fedobjs') query = {'q' : 'rels_isDiscoverable:True', 'fl' : 'id', 'rows' : 50000, 'start' : 0} response = solr.search(**query) print "Num Results:",response.total_results for each in response.documents: # print "adding:",each['id'] id_list.append(each['id']) tcount+=1 print "Writing",tcount,"results..." writeSitemapXML(id_list, smCount)
def solr_search(self, query): """Do the solr search and pass back results""" output_dict = {} # Setup connections solr = Solr(self.server, version=4) #UNLIMITED_ROWS = 10000000 # necessary because default in mysolr is mere 10 # Run the search search_results = solr.search(**query) # Format results for pdb in search_results.documents: output_dict[pdb.get('pdb_id').upper()] = { 'description': pdb.get('molecule_name')[0] } return output_dict
def _readLabcasSolr(self, labcasurl, labcas_sourceurl_prefix): u'''Read the statements made at the RDF at ``url`` and return a dictionary of {s → [{p → [o]}]} where ``s`` is a subject URI mapping to a sequence of dictionaries whose keys ``p`` are predicate URIs mapping to a sequence of ``o`` objects, which may be literal values or reference URIs.''' solr_conn = Solr(base_url=labcasurl, version=4) solr_query = {'q': '*:*'} solr_response = solr_conn.search(**solr_query) results = {} for obj in solr_response.documents: obj['sourceurl'] = labcas_sourceurl_prefix + obj.get("id") results[obj.get("id")] = obj return results
sys.exit(1) db = db_conn['fashion_ip'] # Connection to Solr for faster full text searching solr = Solr('http://localhost:8080/solr') qstring = sys.argv[1] pir_re = re.compile(r'.* ' + qstring + '.*', re.IGNORECASE) porter = nltk.PorterStemmer() for year in range(1900,2013): print '\nYEAR: ', year response = solr.search(q=qstring + ' year:' + str(year), fl='_id,score', rows=10000, start=0) documents = response.documents for doc in documents: ref = db.docs.find_one({'_id':ObjectId(doc['_id'])},{'content':True,'ref_summary':True}) s = ref['content'] print print ref['ref_summary'] tokens = nltk.word_tokenize(s.lower().encode('utf-8')) # Stemmed version text = IndexedText(porter, tokens) text.concordance(qstring)
class QueryResultTestCase(unittest.TestCase): def setUp(self): self.solr = Solr('http://localhost:8983/solr') def test_search(self): response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, 4) self.assertEqual(len(response.documents), 4) def test_search_cursor(self): cursor = self.solr.search_cursor(q='*:*') i = 0 for response in cursor.fetch(1): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 4) cursor = self.solr.search_cursor(q='*:*') i = 0 for response in cursor.fetch(4): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 1) def test_commit(self): response = self.solr.commit() self.assertEqual(response.status, 200) def test_optimize(self): response = self.solr.optimize() self.assertEqual(response.status, 200) def test_ping(self): response = self.solr.ping() self.assertEqual(response.status, 200) def test_is_up(self): response = self.solr.is_up() self.assertEqual(response, True) def test_update_delete(self): # Get total results response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) total_results = response.total_results # Post one document using json documents = [{'id' : 1}] response = self.solr.update(documents, input_type='json') self.assertEqual(response.status, 200) # Post anoter document using xml documents = [{'id' : 2}] response = self.solr.update(documents, input_type='xml') self.assertEqual(response.status, 200) # Compare total results response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results + 2) # Now delete the two document posted above query = 'id:1' key = 2 response = self.solr.delete_by_query(query) self.assertEqual(response.status, 200) response = self.solr.delete_by_key(key) self.assertEqual(response.status, 200) response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results) def tearDown(self): pass def test_query(self): pass
db = db_conn['fashion_ip'] # Connection to Solr for faster full text searching solr = Solr('http://localhost:8080/solr') qstring = sys.argv[1] pir_re = re.compile(r'.* ' + qstring + '.*', re.IGNORECASE) porter = nltk.PorterStemmer() for year in range(1900, 2013): print '\nYEAR: ', year response = solr.search(q=qstring + ' year:' + str(year), fl='_id,score', rows=10000, start=0) documents = response.documents for doc in documents: ref = db.docs.find_one({'_id': ObjectId(doc['_id'])}, { 'content': True, 'ref_summary': True }) s = ref['content'] print print ref['ref_summary'] tokens = nltk.word_tokenize(s.lower().encode('utf-8'))
class eBsolr: cursor = None def __init__(self, urls, config, version=4): self.cursor = Solr(urls, version=version) def update(self, documents, input_type='json', commit=False): self.cursor.update(documents, input_type, commit) def deleteById(self, tid, commit=False): return self.cursor.delete_by_key(tid, commit=commit) def deleteByQuery(self, query, commit=False): return self.cursor.delete_by_query(query=query, commit=commit) def deleteAll(self, commit=False): return self.cursor.delete_by_query("*:*", commit=commit) def getResponse(self, search, fields=None, start=0, rows=None, sort=None, fq=None): query = {'q': search} if fields: if isinstance(fields, basestring): query['fl'] = fields else: query['fl'] = ",".join(fields) if sort: query['sort'] = sort if fq: query['fq'] = fq # Default to 10000 rows limit = rows if rows is None: limit = _MAXROWS query['start'] = start query['rows'] = limit response = self.cursor.search(**query) if int(response.status) >= 400: raise Exception('Error Solr {}: {}'.format(response.status, response.extract_errmessage())) if rows is None and response.total_results > limit: # query['start'] = response.total_results query['rows'] = response.total_results response = self.cursor.search(**query) return response def get_language_query(self, language): q_temp = None if language is not None and language != "": langArray = language.split(';') if len(langArray) > 0: lang = langArray[0] q_temp = "language:%s" % lang for lang in langArray[1:]: q_temp = "%s OR language:%s" % (q_temp, lang) return q_temp def getDocs(self, search, fields=None, start=0, rows=None, sort=None, fq=None): """search: query sintaks ex: "field:keys,field2:keys2" fields: field yg di ambil (list) ex: ['field', 'field2'] start: start row rows: max / limit row sort: order rows ex: field asc, field2 desc""" # Get documents response = self.getResponse(search, fields, start, rows, sort, fq) return {"docs": response.documents, "count": response.total_results} def getFacetList(self, facets, facetField): ff = {} if not isinstance(facetField, list): facetField = facetField.split(",") for facet in facetField: if facet: ff[facet] = facets['facet_fields'][facet] return ff def getFacetPivotGeneral(self, query, facetField, pivotField, limit=None, fq=None): try: url = "{0}select?q={1}&rows=1&wt=json&indent=true&facet=true&facet.pivot={2},{3}".format( self.cursor.base_url, query.replace("+", "%2B"), facetField, pivotField) url = '{}select'.format(self.cursor.base_url) params = {'q': query, 'rows': 0, 'wt': 'json', 'indent': 'true', 'facet': 'true', 'facet.pivot': '{},{}'.format(facetField, pivotField)} if limit: params['facet.limit'] = limit if fq: params['fq'] = fq # url = "%s&facet.limit=%d" % (url, limit) http_response = requests.get(url, params=params) # print url # http_response = requests.get(url) return http_response.json()['facet_counts']['facet_pivot']['{0},{1}'.format(facetField, pivotField)] except Exception, e: print("Error parsing facet pivot...") print e return None
from mysolr import Solr # Default connection to localhost:8080 solr = Solr("http://localhost:8983/solr/barcore") # All solr params are supported! query = {'q' : '*:*', 'facet' : 'true', 'facet.field' : 'zip'} response = solr.search(**query) # do stuff with documents for document in response.documents: # modify field 'foo' document['rating'] = 2.0 # update index with modified documents solr.update(response.documents, commit=True)
class QueryResultTestCase(unittest.TestCase): def setUp(self): self.solr = Solr('http://localhost:8983/solr') def test_search(self): response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, 4) self.assertEqual(len(response.documents), 4) def test_search_cursor(self): cursor = self.solr.search_cursor(q='*:*') i = 0 for response in cursor.fetch(1): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 4) cursor = self.solr.search_cursor(q='*:*') i = 0 for response in cursor.fetch(4): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 1) def test_commit(self): response = self.solr.commit() self.assertEqual(response.status, 200) def test_optimize(self): response = self.solr.optimize() self.assertEqual(response.status, 200) def test_ping(self): response = self.solr.ping() self.assertEqual(response.status, 200) def test_is_up(self): response = self.solr.is_up() self.assertEqual(response, True) def test_update_delete(self): # Get total results response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) total_results = response.total_results # Post one document using json documents = [{'id': 1}] response = self.solr.update(documents, input_type='json') self.assertEqual(response.status, 200) # Post anoter document using xml documents = [{'id': 2}] response = self.solr.update(documents, input_type='xml') self.assertEqual(response.status, 200) # Compare total results response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results + 2) # Now delete the two document posted above query = 'id:1' key = 2 response = self.solr.delete_by_query(query) self.assertEqual(response.status, 200) response = self.solr.delete_by_key(key) self.assertEqual(response.status, 200) response = self.solr.search(q='*:*') self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results) def tearDown(self): pass def test_query(self): pass
from mysolr import Solr # Default connection to localhost:8080 solr = Solr("http://localhost:8983/solr/barcore") # All solr params are supported! query = {'q': '*:*', 'facet': 'true', 'facet.field': 'zip'} response = solr.search(**query) # do stuff with documents for document in response.documents: # modify field 'foo' document['rating'] = 2.0 # update index with modified documents solr.update(response.documents, commit=True)
class QueryResultTestCase(unittest.TestCase): def setUp(self): self.solr = Solr(os.getenv("SOLR_URL")) def test_search(self): response = self.solr.search(q="*:*") self.assertEqual(response.status, 200) self.assertEqual(response.total_results, 4) self.assertEqual(len(response.documents), 4) def test_search_cursor(self): cursor = self.solr.search_cursor(q="*:*") i = 0 for response in cursor.fetch(1): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 4) cursor = self.solr.search_cursor(q="*:*") i = 0 for response in cursor.fetch(4): self.assertEqual(response.status, 200) i += 1 self.assertEqual(i, 1) def test_commit(self): response = self.solr.commit() self.assertEqual(response.status, 200) def test_optimize(self): response = self.solr.optimize() self.assertEqual(response.status, 200) def test_ping(self): response = self.solr.ping() self.assertEqual(response.status, 200) def test_is_up(self): response = self.solr.is_up() self.assertEqual(response, True) def test_update_delete(self): # Get total results response = self.solr.search(q="*:*") self.assertEqual(response.status, 200) total_results = response.total_results # Post one document using json documents = [{"id": 1}] response = self.solr.update(documents, input_type="json") self.assertEqual(response.status, 200) # Post anoter document using xml documents = [{"id": 2}] response = self.solr.update(documents, input_type="xml") self.assertEqual(response.status, 200) # Compare total results response = self.solr.search(q="*:*") self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results + 2) # Now delete the two document posted above query = "id:1" key = 2 response = self.solr.delete_by_query(query) self.assertEqual(response.status, 200) response = self.solr.delete_by_key(key) self.assertEqual(response.status, 200) response = self.solr.search(q="*:*") self.assertEqual(response.status, 200) self.assertEqual(response.total_results, total_results) def tearDown(self): pass def test_query(self): pass
solr = Solr('http://foo.bar:9090/solr/') # If the server is secured with HTTP basic authentication you can connect by using auth parameter. from mysolr import Solr solr = Solr(auth=('admin', 'admin')) #Further information about auth parameter in requests docs #Queriying to Solr #Making a query to Solr is very easy, just call search method with your query. from mysolr import Solr solr = Solr() # Search for all documents response = solr.search(q='*:*') # Get documents documents = response.documents #Besides, all available Solr query params are supported. So making a query using pagination would be as simple as from mysolr import Solr solr = Solr() # Get 10 documents response = solr.search(q='*:*', rows=10, start=0) #Some parameters contain a period. In those cases you have to use a dictionary to build the query: from mysolr import Solr solr = Solr()
def query_solr(query): solr = Solr() response = solr.search(q=query) documents = response.documents return documents
#coding:utf8 from mysolr import Solr solr = Solr('http://localhost:8983/solr/shinsho_jawiki') # all available Solr query params are supported response = solr.search(q='*:*') response = solr.search(q='*:*', rows=10, start=0) query = {'q' : '*:*', 'facet' : 'true', 'facet.field' : 'foo'} response = solr.search(**query) query = {'q' : '*:*', 'facet' : 'true', 'facet.field' : ['foo', 'bar']} # リスト response = solr.search(**query) documents = response.documents # Concurrent searchs # As mysolr is using requests, it is posible to make concurrent queries thanks to grequest # See installation section for further information about how to install this feature. queries = [ { 'q' : '*:*' }, { 'q' : 'foo:bar' } ] # using 10 threads responses = solr.async_search(queries, size=10)
def get_solr_count(query): server = Solr(SOLR_SERVER) query_response = server.search(**query) count = query_response.total_results return count