Ejemplo n.º 1
0
 def sciencedata(self):
     sciencedata_prefix = "https://labcas-dev.jpl.nasa.gov/collections/collections/"
     results = []
     solr_collection = Solr(base_url='http://localhost:8983/solr/collections', version=4)
     solr_dataset = Solr(base_url='http://localhost:8983/solr/datasets', version=4)
     collection_query = {'q': '*:*'}
     collection_response = solr_collection.search(**collection_query)
     for obj in collection_response.documents:
         if obj.get("CollectionName") and obj.get("id"):
           dataset_query = {'q': '*:*', 'fq': "CollectionId='{}'".format(obj.get("id"))}
           dataset_response = solr_dataset.search(**dataset_query)
           datasetcount = self.countDatasets(dataset_response.documents)
           results.append(dict(
               collectionname=obj["CollectionName"],
               description=obj.get("CollectionDescription","None"),
               url=sciencedata_prefix+obj["id"],
               leadpi=obj.get("LeadPI",["None"]),
               organ=obj.get("OrganSite",["No Organ info"]),
               discipline=obj.get("Discipline",["None"]),
               protocol=obj.get("ProtocolId",["None"]),
               qastate=obj.get("QAState", ["None"]),
               species=obj.get("Species", ["None"]),
               datasetcount=datasetcount
           ))
     results.sort(lambda a, b: cmp(a['collectionname'], b['collectionname']))
     return results
Ejemplo n.º 2
0
 def test_search_persistent(self):
     solr = Solr(os.getenv('SOLR_URL'), persistent=True, use_get=True)
     for _ in xrange(10):
         response = solr.search(q='*:*')
         self.assertEqual(response.status, 200)
         self.assertEqual(response.total_results, 4563722)
         self.assertEqual(len(response.documents), 10)
Ejemplo n.º 3
0
def query_solr():
    query_string = parse_json(json.loads(request.data))

    solr = Solr("http://52.76.188.127:8983/solr/clickstream_event_shard1_replica1/")
    solr_response = solr.search(q=query_string)

    return json.dumps(solr_response.documents)
Ejemplo n.º 4
0
class VIVOService(object):
    def __init__(self):
        from mysolr import Solr
        surl = get_env('SOLR_URL')
        self.solr = Solr(surl)

    def get(self, query, class_type):
        out = []
        #Will use acNameStemmed for now.  Can construct a more intelligent query
        #later if necessary.
        query = {
            'q': u'acNameStemmed:{0} type:{1}'.format(query, class_type),
            'fl': 'URI,nameRaw,PREFERRED_TITLE',
            'rows': 20
        }
        response = self.solr.search(**query)
        #Massage the Solr response.
        for doc in response.documents:
            d = {}
            d['uri'] = doc['URI']
            d['id'] = doc['URI']
            d['text'] = "{} - {}".format(
                doc['nameRaw'][0],
                doc['PREFERRED_TITLE'][0]
            )
            out.append(d)
        return out
class call_number_app(object):

    def __init__(self,**kwargs):
        """
        The `call_number_app` takes a number of optional parameters
        including an URL where the Aristotle Library Apps instance
        is currently running.

        :param url: URL of Aristotle Library Apps path to the call 
                    number app, defaults to 
                    http://0.0.0.0/apps/call_number/json/.
        """
        if kwargs.has_key("url"):
            self.call_number_url = kwargs.get("url")
        else:
            self.call_number_url = "http://0.0.0.0/apps/call_number/json/"
        self.solr = Solr(base_url=settings.SOLR_URL)

    def json_search(self,request):
        """
        Performs a call number search using JSON interface to the call 
        number app. Results are returned as JSON.

        :param request: Django request
        """
        call_number = request.REQUEST.get('q')
        if request.REQUEST.has_key("number_type"):
            number_type = request.REQUEST.get('number_type')
        else:
            number_type = 'lccn'
        context = {'docs':None}
        json_search_url = os.path.join(self.call_number_url,
                                       'term_search')
        json_search_url = "{0}?call_number={1}&slice-size={2}&type={3}".format(json_search_url,
                                                                               call_number.strip(),
                                                                               int(settings.ITEMS_PER_PAGE) - 3,
                                                                               number_type)
                                  
        json_results = urllib2.urlopen(json_search_url).read()
        results = json.load(urllib2.urlopen(json_search_url))
        if len(results.get("bib_numbers")) > 0:
            context['docs'] = []
            for bib_num in results.get("bib_numbers"):
                query = {"q":bib_num,
                         "qt":"dismax",
                         "fl":"*"}
                response = self.solr.search(**query)
                for doc in response.documents:
                    context["docs"].append(doc)
            # Iterate through and create record_urls
            for doc in context['docs']:
                doc['record_url'] = settings.CATALOG_RECORD_URL.format(doc['id'])
        context['current_sort'] = None
        context['sorts'] = [x[0] for x in settings.SORTS]
        context['start_number'] = 1
        context['end_number'] = min(results,
                                    settings.ITEMS_PER_PAGE)
        return context
Ejemplo n.º 6
0
Archivo: plugin.py Proyecto: RAPD/RAPD
 def solr_search(self, query):
     """Do the solr search and pass back results"""
     output_dict = {}
     # Setup connections
     solr = Solr(self.server,version=4)
     #UNLIMITED_ROWS = 10000000 # necessary because default in mysolr is mere 10
     # Run the search
     search_results = solr.search(**query)
     # Format results
     for pdb in search_results.documents:
         output_dict[pdb.get('pdb_id').upper()] = {'description': pdb.get('molecule_name')[0]}
     return output_dict
Ejemplo n.º 7
0
def getSingleObjects(id_list, start):	
	smCount = 1
	tcount = 0	
	solr = Solr('http://localhost:8080/solr4/fedobjs')
	query = {'q' : 'rels_isDiscoverable:True', 'fl' : 'id', 'rows' : 50000, 'start' : 0}
	response = solr.search(**query)
	print "Num Results:",response.total_results
	for each in response.documents:
		# print "adding:",each['id']
		id_list.append(each['id'])		
		tcount+=1
	print "Writing",tcount,"results..."
	writeSitemapXML(id_list, smCount)
Ejemplo n.º 8
0
 def solr_search(self, query):
     """Do the solr search and pass back results"""
     output_dict = {}
     # Setup connections
     solr = Solr(self.server, version=4)
     #UNLIMITED_ROWS = 10000000 # necessary because default in mysolr is mere 10
     # Run the search
     search_results = solr.search(**query)
     # Format results
     for pdb in search_results.documents:
         output_dict[pdb.get('pdb_id').upper()] = {
             'description': pdb.get('molecule_name')[0]
         }
     return output_dict
Ejemplo n.º 9
0
    def _readLabcasSolr(self, labcasurl, labcas_sourceurl_prefix):
        u'''Read the statements made at the RDF at ``url`` and return a
        dictionary of {s → [{p → [o]}]} where ``s`` is a subject URI mapping
        to a sequence of dictionaries whose keys ``p`` are predicate URIs
        mapping to a sequence of ``o`` objects, which may be literal values
        or reference URIs.'''
        solr_conn = Solr(base_url=labcasurl, version=4)
        solr_query = {'q': '*:*'}
        solr_response = solr_conn.search(**solr_query)
        results = {}
        for obj in solr_response.documents:
            obj['sourceurl'] = labcas_sourceurl_prefix + obj.get("id")
            results[obj.get("id")] = obj

        return results
Ejemplo n.º 10
0
    sys.exit(1)

db = db_conn['fashion_ip']

# Connection to Solr for faster full text searching
solr = Solr('http://localhost:8080/solr')

qstring = sys.argv[1]

pir_re = re.compile(r'.* ' + qstring + '.*', re.IGNORECASE)
porter = nltk.PorterStemmer()

for year in range(1900,2013):
	print '\nYEAR: ', year
	
	response = solr.search(q=qstring + ' year:' + str(year), fl='_id,score', rows=10000, start=0)
	documents = response.documents
	
	for doc in documents:
		ref = db.docs.find_one({'_id':ObjectId(doc['_id'])},{'content':True,'ref_summary':True})
		s = ref['content']
		
		print
		print ref['ref_summary']
		
		tokens = nltk.word_tokenize(s.lower().encode('utf-8'))
		
		# Stemmed version
		text = IndexedText(porter, tokens)
		text.concordance(qstring)
		
Ejemplo n.º 11
0
class QueryResultTestCase(unittest.TestCase):

    def setUp(self):
        self.solr = Solr('http://localhost:8983/solr')

    def test_search(self):
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, 4)
        self.assertEqual(len(response.documents), 4)

    def test_search_cursor(self):
        cursor = self.solr.search_cursor(q='*:*')
        i = 0
        for response in cursor.fetch(1):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 4)

        cursor = self.solr.search_cursor(q='*:*')
        i = 0
        for response in cursor.fetch(4):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 1)

    def test_commit(self):
        response = self.solr.commit()
        self.assertEqual(response.status, 200)

    def test_optimize(self):
        response = self.solr.optimize()
        self.assertEqual(response.status, 200)

    def test_ping(self):
        response = self.solr.ping()
        self.assertEqual(response.status, 200)

    def test_is_up(self):
        response = self.solr.is_up()
        self.assertEqual(response, True)

    def test_update_delete(self):
        # Get total results
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        total_results = response.total_results
        # Post one document using json
        documents = [{'id' : 1}]
        response = self.solr.update(documents, input_type='json')
        self.assertEqual(response.status, 200)
        # Post anoter document using xml
        documents = [{'id' : 2}]
        response = self.solr.update(documents, input_type='xml')
        self.assertEqual(response.status, 200)
        # Compare total results
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results + 2)

        # Now delete the two document posted above
        query = 'id:1'
        key = 2
        response = self.solr.delete_by_query(query)
        self.assertEqual(response.status, 200)
        response = self.solr.delete_by_key(key)
        self.assertEqual(response.status, 200)
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results)

    def tearDown(self):
        pass

    def test_query(self):
        pass
Ejemplo n.º 12
0
db = db_conn['fashion_ip']

# Connection to Solr for faster full text searching
solr = Solr('http://localhost:8080/solr')

qstring = sys.argv[1]

pir_re = re.compile(r'.* ' + qstring + '.*', re.IGNORECASE)
porter = nltk.PorterStemmer()

for year in range(1900, 2013):
    print '\nYEAR: ', year

    response = solr.search(q=qstring + ' year:' + str(year),
                           fl='_id,score',
                           rows=10000,
                           start=0)
    documents = response.documents

    for doc in documents:
        ref = db.docs.find_one({'_id': ObjectId(doc['_id'])}, {
            'content': True,
            'ref_summary': True
        })
        s = ref['content']

        print
        print ref['ref_summary']

        tokens = nltk.word_tokenize(s.lower().encode('utf-8'))
Ejemplo n.º 13
0
class eBsolr:
    cursor = None

    def __init__(self, urls, config, version=4):
        self.cursor = Solr(urls, version=version)

    def update(self, documents, input_type='json', commit=False):
        self.cursor.update(documents, input_type, commit)

    def deleteById(self, tid, commit=False):
        return self.cursor.delete_by_key(tid, commit=commit)

    def deleteByQuery(self, query, commit=False):
        return self.cursor.delete_by_query(query=query, commit=commit)

    def deleteAll(self, commit=False):
        return self.cursor.delete_by_query("*:*", commit=commit)

    def getResponse(self, search, fields=None, start=0, rows=None, sort=None, fq=None):
        query = {'q': search}
        if fields:
            if isinstance(fields, basestring):
                query['fl'] = fields
            else:
                query['fl'] = ",".join(fields)
        if sort:
            query['sort'] = sort

        if fq:
            query['fq'] = fq

        # Default to 10000 rows
        limit = rows
        if rows is None:
            limit = _MAXROWS
        query['start'] = start
        query['rows'] = limit

        response = self.cursor.search(**query)
        if int(response.status) >= 400:
            raise Exception('Error Solr {}: {}'.format(response.status, response.extract_errmessage()))
        if rows is None and response.total_results > limit:
            # query['start'] = response.total_results
            query['rows'] = response.total_results
            response = self.cursor.search(**query)

        return response

    def get_language_query(self, language):
        q_temp = None
        if language is not None and language != "":
            langArray = language.split(';')
            if len(langArray) > 0:
                lang = langArray[0]
                q_temp = "language:%s" % lang
                for lang in langArray[1:]:
                    q_temp = "%s OR language:%s" % (q_temp, lang)
        return q_temp

    def getDocs(self, search, fields=None, start=0, rows=None, sort=None, fq=None):
        """search: query sintaks ex: "field:keys,field2:keys2"
           fields: field yg di ambil (list) ex: ['field', 'field2']
           start: start row
           rows: max / limit row
           sort: order rows ex: field asc, field2 desc"""
        # Get documents
        response = self.getResponse(search, fields, start, rows, sort, fq)

        return {"docs": response.documents, "count": response.total_results}

    def getFacetList(self, facets, facetField):
        ff = {}
        if not isinstance(facetField, list):
            facetField = facetField.split(",")
        for facet in facetField:
            if facet:
                ff[facet] = facets['facet_fields'][facet]

        return ff

    def getFacetPivotGeneral(self, query, facetField, pivotField, limit=None, fq=None):
        try:
            url = "{0}select?q={1}&rows=1&wt=json&indent=true&facet=true&facet.pivot={2},{3}".format(
                self.cursor.base_url, query.replace("+", "%2B"), facetField, pivotField)

            url = '{}select'.format(self.cursor.base_url)
            params = {'q': query,
                      'rows': 0,
                      'wt': 'json',
                      'indent': 'true',
                      'facet': 'true',
                      'facet.pivot': '{},{}'.format(facetField, pivotField)}

            if limit:
                params['facet.limit'] = limit
            if fq:
                params['fq'] = fq
                #                 url = "%s&facet.limit=%d" % (url, limit)
            http_response = requests.get(url, params=params)
            # print url
            #             http_response = requests.get(url)

            return http_response.json()['facet_counts']['facet_pivot']['{0},{1}'.format(facetField, pivotField)]
        except Exception, e:
            print("Error parsing facet pivot...")
            print e
        return None
from mysolr import Solr

# Default connection to localhost:8080
solr = Solr("http://localhost:8983/solr/barcore")

# All solr params are supported!
query = {'q' : '*:*', 'facet' : 'true', 'facet.field' : 'zip'}
response = solr.search(**query)

# do stuff with documents
for document in response.documents:
    # modify field 'foo'
    document['rating'] = 2.0

# update index with modified documents
solr.update(response.documents, commit=True)
Ejemplo n.º 15
0
class QueryResultTestCase(unittest.TestCase):
    def setUp(self):
        self.solr = Solr('http://localhost:8983/solr')

    def test_search(self):
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, 4)
        self.assertEqual(len(response.documents), 4)

    def test_search_cursor(self):
        cursor = self.solr.search_cursor(q='*:*')
        i = 0
        for response in cursor.fetch(1):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 4)

        cursor = self.solr.search_cursor(q='*:*')
        i = 0
        for response in cursor.fetch(4):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 1)

    def test_commit(self):
        response = self.solr.commit()
        self.assertEqual(response.status, 200)

    def test_optimize(self):
        response = self.solr.optimize()
        self.assertEqual(response.status, 200)

    def test_ping(self):
        response = self.solr.ping()
        self.assertEqual(response.status, 200)

    def test_is_up(self):
        response = self.solr.is_up()
        self.assertEqual(response, True)

    def test_update_delete(self):
        # Get total results
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        total_results = response.total_results
        # Post one document using json
        documents = [{'id': 1}]
        response = self.solr.update(documents, input_type='json')
        self.assertEqual(response.status, 200)
        # Post anoter document using xml
        documents = [{'id': 2}]
        response = self.solr.update(documents, input_type='xml')
        self.assertEqual(response.status, 200)
        # Compare total results
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results + 2)

        # Now delete the two document posted above
        query = 'id:1'
        key = 2
        response = self.solr.delete_by_query(query)
        self.assertEqual(response.status, 200)
        response = self.solr.delete_by_key(key)
        self.assertEqual(response.status, 200)
        response = self.solr.search(q='*:*')
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results)

    def tearDown(self):
        pass

    def test_query(self):
        pass
Ejemplo n.º 16
0
from mysolr import Solr

# Default connection to localhost:8080
solr = Solr("http://localhost:8983/solr/barcore")

# All solr params are supported!
query = {'q': '*:*', 'facet': 'true', 'facet.field': 'zip'}
response = solr.search(**query)

# do stuff with documents
for document in response.documents:
    # modify field 'foo'
    document['rating'] = 2.0

# update index with modified documents
solr.update(response.documents, commit=True)
Ejemplo n.º 17
0
class QueryResultTestCase(unittest.TestCase):
    def setUp(self):
        self.solr = Solr(os.getenv("SOLR_URL"))

    def test_search(self):
        response = self.solr.search(q="*:*")
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, 4)
        self.assertEqual(len(response.documents), 4)

    def test_search_cursor(self):
        cursor = self.solr.search_cursor(q="*:*")
        i = 0
        for response in cursor.fetch(1):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 4)

        cursor = self.solr.search_cursor(q="*:*")
        i = 0
        for response in cursor.fetch(4):
            self.assertEqual(response.status, 200)
            i += 1
        self.assertEqual(i, 1)

    def test_commit(self):
        response = self.solr.commit()
        self.assertEqual(response.status, 200)

    def test_optimize(self):
        response = self.solr.optimize()
        self.assertEqual(response.status, 200)

    def test_ping(self):
        response = self.solr.ping()
        self.assertEqual(response.status, 200)

    def test_is_up(self):
        response = self.solr.is_up()
        self.assertEqual(response, True)

    def test_update_delete(self):
        # Get total results
        response = self.solr.search(q="*:*")
        self.assertEqual(response.status, 200)
        total_results = response.total_results
        # Post one document using json
        documents = [{"id": 1}]
        response = self.solr.update(documents, input_type="json")
        self.assertEqual(response.status, 200)
        # Post anoter document using xml
        documents = [{"id": 2}]
        response = self.solr.update(documents, input_type="xml")
        self.assertEqual(response.status, 200)
        # Compare total results
        response = self.solr.search(q="*:*")
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results + 2)

        # Now delete the two document posted above
        query = "id:1"
        key = 2
        response = self.solr.delete_by_query(query)
        self.assertEqual(response.status, 200)
        response = self.solr.delete_by_key(key)
        self.assertEqual(response.status, 200)
        response = self.solr.search(q="*:*")
        self.assertEqual(response.status, 200)
        self.assertEqual(response.total_results, total_results)

    def tearDown(self):
        pass

    def test_query(self):
        pass
Ejemplo n.º 18
0
solr = Solr('http://foo.bar:9090/solr/')

# If the server is secured with HTTP basic authentication you can connect by using auth parameter.
from mysolr import Solr

solr = Solr(auth=('admin', 'admin'))

#Further information about auth parameter in requests docs
#Queriying to Solr

#Making a query to Solr is very easy, just call search method with your query.
from mysolr import Solr

solr = Solr()
# Search for all documents
response = solr.search(q='*:*')
# Get documents
documents = response.documents

#Besides, all available Solr query params are supported. So making a query using pagination would be as simple as
from mysolr import Solr

solr = Solr()

# Get 10 documents
response = solr.search(q='*:*', rows=10, start=0)

#Some parameters contain a period. In those cases you have to use a dictionary to build the query:
from mysolr import Solr

solr = Solr()
Ejemplo n.º 19
0
def query_solr(query):
    solr = Solr()
    response = solr.search(q=query)
    documents = response.documents
    return documents
Ejemplo n.º 20
0
#coding:utf8

from mysolr import Solr

solr = Solr('http://localhost:8983/solr/shinsho_jawiki')

# all available Solr query params are supported
response = solr.search(q='*:*')
response = solr.search(q='*:*', rows=10, start=0)
query = {'q' : '*:*', 'facet' : 'true', 'facet.field' : 'foo'}
response = solr.search(**query)
query = {'q' : '*:*', 'facet' : 'true', 'facet.field' : ['foo', 'bar']} # リスト
response = solr.search(**query)

documents = response.documents

# Concurrent searchs
# As mysolr is using requests, it is posible to make concurrent queries thanks to grequest
# See installation section for further information about how to install this feature.

queries = [
    {
        'q' : '*:*'
    },
    {
        'q' : 'foo:bar'
    }
]

# using 10 threads
responses = solr.async_search(queries, size=10)
Ejemplo n.º 21
0
def get_solr_count(query):
    server = Solr(SOLR_SERVER)
    query_response = server.search(**query)
    count = query_response.total_results
    return count