def test_highlight_inclusion(self): with canned_solr_response_data(): with self.app.test_request_context(): resp = solr.query("foo") doc = resp.get_doc(0) self.assertNotIn('highlights',doc) resp = solr.query("foo", highlights=[('abstract',)]) doc = resp.get_doc(0) self.assertIn('highlights',doc)
def test_error_response_pagination(self): """Tests the get functions in case of an error coming from SOLR""" error_response = {"responseHeader":{ "status":400, "QTime":1,}, "error":{ "msg":"org.apache.lucene.queryparser.classic.ParseException: undefined field authsorfsd", "code":400}} with canned_solr_response_data(error_response): with self.app.test_request_context(): resp = solr.query("foo") self.assertEqual(resp.get_count(), 0) self.assertEqual(resp.get_hits(), 0) self.assertEqual(resp.get_start_count(), 0) #pagination dictionar.y pag_dict = { 'max_pagination_len': 5 , 'num_total_pages': 0, 'current_page': 1, 'pages_before': [], 'pages_after': [], } self.assertEqual(resp.get_pagination(), pag_dict)
def alladin_lite(): """ View that creates the data for alladin lite """ #if there are not bibcodes, there should be a query to extract the authors if request.values.has_key('bibcode'): bibcodes = request.values.getlist('bibcode') else: try: query_components = json.loads(request.values.get('current_search_parameters')) except (TypeError, JSONDecodeError): #@todo: logging of the error return render_template('errors/generic_error.html', error_message='Error. Please try later.') #update the query parameters to return only what is necessary query_components.update({ 'facets': [], 'fields': ['bibcode'], 'highlights': [], 'rows': str(config.SEARCH_DEFAULT_ROWS) }) resp = solr.query(**query_components) if resp.is_error(): return render_template('errors/generic_error.html', error_message='Error while creating the objects skymap. Please try later.') bibcodes = [x.bibcode for x in resp.get_docset_objects()] return render_template('alladin_lite_embedded.html', bibcodes={'bibcodes':bibcodes})
def get_references(**args): """ Get the references for a set of bibcodes """ papers= [] # This information can be retrieved with one single Solr query # (just an 'OR' query of a list of bibcodes) # To restrict the size of the query URL, we split the list of # bibcodes up in a list of smaller lists biblists = list(chunks(args['bibcodes'], config.METRICS_CHUNK_SIZE)) for biblist in biblists: q = " OR ".join(map(lambda a: "bibcode:%s"%a, biblist)) try: # Get the information from Solr # We only need the contents of the 'reference' field (i.e. the list of bibcodes # referenced by the paper at hand) resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=['reference']) except SolrReferenceQueryError, e: app.logger.error("Solr references query for %s blew up (%s)" % (q,e)) raise # Collect all bibcodes in a list (do NOT remove multiplicity) search_results = resp.search_response() for doc in search_results['results']['docs']: if 'reference' in doc: papers += doc['reference']
def get_document(identifier, **kwargs): q = "identifier:%s" % identifier resp = solr.query(q, rows=1, fields=config.SOLR_SEARCH_DEFAULT_FIELDS, **kwargs) if resp.get_hits() == 1: return resp.get_doc_object(0) else: return None
def get_publications_from_query(q): try: # Get the information from Solr resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=['reference']) except SolrReferenceQueryError, e: app.logger.error("Solr publications query for %s blew up (%s)" % (q,e)) raise
def word_cloud(): """ View that creates the data for the word cloud """ query_url = config.SOLRQUERY_URL tvrh_query_url = query_url.rsplit('/', 1)[0] + '/tvrh' try: query_components = json.loads(request.values.get('current_search_parameters')) except (TypeError, JSONDecodeError): #@todo: logging of the error return render_template('errors/generic_error.html', error_message='Error while creating the word cloud (code #1). Please try later.') # checked bibcodes will be input as if request.values.has_key('bibcode'): bibcodes = request.values.getlist('bibcode') query_components['q'] = ' OR '.join(["bibcode:%s" % b for b in bibcodes]) query_components.update({ 'facets': [], 'fields': ['id'], 'highlights': [], 'defType':'aqp', 'rows': str(config.WORD_CLOUD_DEFAULT_FIRST_RESULTS), 'tv.all': 'true', 'tv.fl':'abstract', }) resp = solr.query(query_url=tvrh_query_url, **query_components) if resp.is_error(): return render_template('errors/generic_error.html', error_message='Error while creating the word cloud (code #2). Please try later.') return render_template('word_cloud_embedded.html', wordcloud_data=wc_json(resp.raw_response()))
def author_network(): """ View that creates the data for the author network """ #if there are not bibcodes, there should be a query to extract the authors try: query_components = json.loads(request.values.get('current_search_parameters')) except (TypeError, JSONDecodeError): #@todo: logging of the error return render_template('errors/generic_error.html', error_message='Error while creating the author network (code #1). Please try later.') # checked bibcodes will be input as if request.values.has_key('bibcode'): bibcodes = request.values.getlist('bibcode') query_components['q'] = ' OR '.join(["bibcode:%s" % b for b in bibcodes]) #update the query parameters to return only what is necessary query_components.update({ 'facets': [], 'fields': ['author_norm'], 'highlights': [], 'rows': str(config.AUTHOR_NETWORK_DEFAULT_FIRST_RESULTS) }) resp = solr.query(**query_components) if resp.is_error(): return render_template('errors/generic_error.html', error_message='Error while creating the author network (code #2). Please try later.') #extract the authors lists_of_authors = [doc.author_norm for doc in resp.get_docset_objects() if doc.author_norm] return render_template('author_network_embedded.html', network_data=get_authorsnetwork(lists_of_authors))
def export_to_other_formats(): """ view that exports a set of papers the imput is a format and a list of bibcodes or a variable containing the parameters for a solr query """ #extract the format export_format = request.values.getlist('export_format') list_type = request.values.get('list_type') #list of bibcodes to extract bibcodes_to_export = [] #flag to check if everything has been extracted all_extracted = True num_hits = None #if there are not bibcodes, there should be first a query to extract them if not request.values.has_key('bibcode'): #@todo: code to query solr with the same query parameters but override the fields to retrieve try: query_components = json.loads(request.values.get('current_search_parameters')) except (TypeError, JSONDecodeError): #@todo: logging of the error return render_template('errors/generic_error.html', error_message='Error while exporting records (code #1). Please try later.') #update the query parameters to return only what is necessary query_components.update({'facets':[], 'fields': ['bibcode'], 'highlights':[], 'rows': str(config.EXPORT_DEFAULT_ROWS)}) #execute the query if list_type == 'similar': resp = get_document_similar(**query_components) else: resp = solr.query(**query_components) if resp.is_error(): return render_template('errors/generic_error.html', error_message='Error while exporting records (code #2). Please try later.') #extract the bibcodes for doc in resp.get_docset_objects(): bibcodes_to_export.append(doc.bibcode) #check if all the results of the query have been extracted ( num results <= max to extract ) if resp.get_hits() > config.EXPORT_DEFAULT_ROWS: all_extracted = False num_hits = resp.get_hits() else: #extract all the bibcodes bibcodes_to_export = request.values.getlist('bibcode') #actually export the records if bibcodes_to_export: export_str = get_classic_records_export(bibcodes_to_export, export_format) else: export_str = '' #if not everything has been extracted, show message on top if not all_extracted: export_str = 'Exported first %s results of %s total. \n\n\n%s' % (config.EXPORT_DEFAULT_ROWS, num_hits, export_str) else: export_str = 'Exported %s records \n\n\n%s' % (len(bibcodes_to_export), export_str) return Response(export_str, mimetype='text/plain')
def get_citations(self, **kwargs): """ Returns the list of citations """ q = "citations(%s:%s)" % (config.SOLR_DOCUMENT_ID_FIELD, self.data[config.SOLR_DOCUMENT_ID_FIELD]) with statsd.timer("core.solr.citations.query_response_time"): resp = solr.query(q, **kwargs) return resp
def get_document_similar(q, **kwargs): params = dict(config.SOLR_MLT_PARAMS) params['mlt.fl'] = ','.join(config.SOLR_MLT_FIELDS) params.update(**kwargs) # TODO: someday maybe flask-solrquery can be made to know about different endpoints query_url = config.SOLRQUERY_URL mlt_query_url = query_url.rsplit('/', 1)[0] + '/mlt' return solr.query(q, query_url=mlt_query_url, **params)
def get_document(identifier, **kwargs): q = "identifier:%s" % identifier with statsd.timer("core.solr.document.query_response_time"): resp = solr.query(q, rows=1, fields=config.SOLR_SEARCH_DEFAULT_FIELDS, **kwargs) if resp.get_hits() == 1: return resp.get_doc_object(0) else: return None
def test_response_content_with_facets(self): with canned_solr_response_data(): with self.app.test_request_context(): resp = solr.query("foo", facets=[('year',)]) resp_data = resp.search_response() self.assertIn('facets', resp_data['results']) self.assertEqual(resp.get_all_facet_queries(), {'year:[2000 TO 2003]': 13}) self.assertEqual(resp.get_all_facet_fields(), {'bibstem_facet': ['ApJ', 10, 'ArXiv', 8], 'year': ['2009', 3, '2008', 5]})
def facets(): """ returns facet sets for a search query """ form = QueryForm.init_with_defaults(request.values) if form.validate(): query_components = QueryBuilderSearch.build(form, request.values, facets_components=True) resp = solr.query(**query_components) return render_template('facets_sublevel.html', resp=resp, facet_field_interf_id=query_components['facet_field_interf_id'] )
def solr_search(): try: q = request.args.get('q') except: abort(400) resp = solr.query(q, rows=200, fields=['bibcode','title','score'], sort=(config.SOLR_SORT_OPTIONS['DATE'], 'desc'), filters=['database:ASTRONOMY']) search_results = resp.search_response() return render_template('results.html', results=search_results['results'], type='solr', search_url=resp.request.url)
def test_response_content(self): with canned_solr_response_data(): with self.app.test_request_context(): resp = solr.query("foo") self.assertEqual(resp.get_http_status(), 200) self.assertFalse(resp.is_error()) resp_data = resp.search_response() self.assertIn('results', resp_data) self.assertNotIn('facets', resp_data['results'])
def facets(): """ returns facet sets for a search query """ form = QueryForm.init_with_defaults(request.values) if form.validate(): query_components = QueryBuilderSearch.build(form, request.values, facets_components=True) try: resp = solr.query(**query_components) except Exception, e: raise AdsabsSolrqueryException("Error communicating with search service", sys.exc_info()) return render_template('facets_sublevel.html', resp=resp, facet_field_interf_id=query_components['facet_field_interf_id'] )
def test_02_solr_request_http_method(self): req = SearchRequest("foo") prepared = req.prepare("http://example.com/select") self.assertEqual(prepared.method, 'GET') prepared = req.prepare("http://example.com/select", method='POST') self.assertEqual(prepared.method, 'POST') with self.app.test_request_context(): solr.request_http_method = 'POST' with fake_solr_http_response(): resp = solr.query(**{'q': "black holes"}) self.assertEqual(resp.request.prepared.method, 'POST') solr.request_http_method = 'GET'
def get_toc(self, **kwargs): """ Returns the table of contents It queries SOLR for the first 13 characters of the bibcode and "*" If the 14th character is a "E" I add also this before the "*" """ bibcode = self.bibcode if bibcode[13] == 'E': bibquery = bibcode[:14] else: bibquery = bibcode[:13] q = "bibcode:%s*" % bibquery return solr.query(q, **kwargs)
def get_article_data(biblist, check_references=True): ''' Get basic article metadata for a list of bibcodes ''' list = " OR ".join(map(lambda a: "bibcode:%s"%a, biblist)) q = '%s' % list fl= ['bibcode','title','first_author','keyword_norm','reference','citation_count','pubdate'] try: # Get the information from Solr resp = solr.query(q, sort=[["pubdate", "desc"], ["bibcode", "desc"]], rows=config.BIBUTILS_MAX_HITS, fields=fl) except SolrQueryError, e: app.logger.error("Solr article data query for %s blew up (%s)" % (str(biblist),e)) raise
def get_normalized_keywords(bibc): ''' For a given publication, construct a list of normalized keywords of this publication and its references ''' keywords = [] q = 'bibcode:%s or references(bibcode:%s)' % (bibc,bibc) try: # Get the information from Solr resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=['keyword_norm']) except SolrQueryError, e: app.logger.error("Solr keywords query for %s blew up (%s)" % (bibc,e)) raise
def test_error_response_parsing_02(self): #case of possible error string not containg solr class exception error_response = {"responseHeader":{ "status":500, "QTime":1,}, "error":{"msg":"random string here", "code":500}} with canned_solr_response_data(error_response, 500): with self.app.test_request_context('/'): resp = solr.query("foo") self.assertEqual(resp.get_http_status(), 500) self.assertTrue(resp.is_error()) self.assertEqual(resp.get_error(), "random string here") self.assertEqual(resp.get_error_message(), "random string here")
def get_toc(self, **kwargs): """ Returns the table of contents It queries SOLR for the first 13 characters of the bibcode and "*" If the 14th character is a "E" I add also this before the "*" """ bibcode = self.bibcode if bibcode[13] == 'E': bibquery = bibcode[:14] else: bibquery = bibcode[:13] q = "bibcode:%s*" % bibquery with statsd.timer("core.solr.toc.query_response_time"): resp = solr.query(q, **kwargs) return resp
def get_references(**args): """ Get the references for a set of bibcodes """ papers= [] # This information can be retrieved with one single Solr query # (just an 'OR' query of a list of bibcodes) q = " OR ".join(map(lambda a: "bibcode:%s"%a, args['bibcodes'])) try: # Get the information from Solr # We only need the contents of the 'reference' field (i.e. the list of bibcodes # referenced by the paper at hand) resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=['reference']) except SolrReferenceQueryError, e: app.logger.error("Solr references query for %s blew up (%s)" % (q,e)) raise
def get_meta_data(**args): """ Get the meta data for a set of bibcodes """ data_dict = {} # This information can be retrieved with one single Solr query # (just an 'OR' query of a list of bibcodes) bibcodes = [bibcode for (bibcode,score) in args['results']] list = " OR ".join(map(lambda a: "bibcode:%s"%a, bibcodes)) q = '%s' % list try: # Get the information from Solr resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=['bibcode,title,first_author']) except SolrMetaDataQueryError, e: app.logger.error("Solr references query for %s blew up (%s)" % (bibcode,e)) raise
def test_error_response_parsing_01(self): """Tests the get functions in case of an error coming from SOLR""" #error coming from query parser on SOLR error_response = {"responseHeader":{ "status":400, "QTime":1,}, "error":{"msg":"org.apache.lucene.queryparser.classic.ParseException: undefined field authsorfsd", "code":400}} with canned_solr_response_data(error_response, 400): with self.app.test_request_context(): resp = solr.query("foo") self.assertEqual(resp.get_http_status(), 400) self.assertTrue(resp.is_error()) self.assertEqual(resp.get_error(), "org.apache.lucene.queryparser.classic.ParseException: undefined field authsorfsd") self.assertEqual(resp.get_error_message(), "undefined field authsorfsd")
def run(self): while True: bbc = self.task_queue.get() if bbc is None: break Nauths = 1 try: bibcode,Nauths = bbc.split('/') except: bibcode = bbc q = 'citations(bibcode:%s)' % bibcode fl= 'bibcode,property,reference' try: if sys.platform == 'darwin': resp = solr_req(config.SOLR_URL + '/select', q=q, fl=fl, rows=config.BIBUTILS_MAX_HITS) result_field = 'response' else: result_field = 'results' # do the query and filter out the results without the bibcode field # (publications without citations return an empty document) resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=fl.split(',')) search_results = resp.search_response() # gather citations and put them into the results queue citations = [] cits = [] ref_cits = [] non_ref_cits = [] for doc in search_results[result_field]['docs']: if not 'bibcode' in doc: continue pubyear = int(bibcode[:4]) try: Nrefs = len(doc['reference']) except: Nrefs = 0 citations.append(doc['bibcode']) cits.append((doc['bibcode'],Nrefs,int(Nauths),pubyear)) if 'REFEREED' in doc['property']: ref_cits.append((doc['bibcode'],Nrefs,int(Nauths),pubyear)) else: non_ref_cits.append((doc['bibcode'],Nrefs,int(Nauths),pubyear)) self.result_queue.put({'bibcode':bibcode,'citations':citations,'cit_info':cits,'ref_cit_info':ref_cits,'non_ref_cit_info':non_ref_cits}) except SolrCitationQueryError, e: app.logger.error("Solr citation query for %s blew up (%s)" % (bibcode,e)) raise
def run(self): while True: biblist = self.task_queue.get() if biblist is None: break q = " OR ".join(map(lambda a: "bibcode:%s"%a, biblist)) fl = 'bibcode,reference,author_norm,property,read_count' try: result_field = 'results' # do the query and filter out the results without the bibcode field # (publications without citations return an empty document) resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=fl.split(',')) search_results = resp.search_response() # gather citations and put them into the results queue self.result_queue.put(search_results[result_field]['docs']) except SolrCitationQueryError, e: app.logger.error("Solr data query for %s blew up (%s)" % (q,e)) raise
def search(): """ returns the results of a search """ if not len(request.values): form = QueryForm(csrf_enabled=False) # prefill the database select menu option form.db_f.default = config.SEARCH_DEFAULT_DATABASE else: form = QueryForm.init_with_defaults(request.values) if form.validate(): query_components = QueryBuilderSearch.build(form, request.values) resp = solr.query(**query_components) if resp.is_error(): flash(resp.get_error_message(), 'error') return render_template('search_results.html', resp=resp, form=form) else: for field_name, errors_list in form.errors.iteritems(): flash('errors in the form validation: %s.' % '; '.join(errors_list), 'error') return render_template('search.html', form=form)
def search(): """ returns the results of a search """ if not len(request.values): form = QueryForm(csrf_enabled=False) # prefill the database select menu option form.db_f.default = config.SEARCH_DEFAULT_DATABASE else: form = QueryForm.init_with_defaults(request.values) if form.validate(): query_components = QueryBuilderSearch.build(form, request.values) try: resp = solr.query(**query_components) except Exception, e: raise AdsabsSolrqueryException("Error communicating with search service", sys.exc_info()) if resp.is_error(): flash(resp.get_error_message(), 'error') return render_template('search_results.html', resp=resp, form=form, query_components=query_components) else:
def test_01_reqest_context(self): with self.app.test_request_context('/'): with fake_solr_http_response(): resp = solr.query(**{'q': "black holes"}) self.assertEqual(resp.get_hits(), 13)