Exemple #1
0
 def test_highlight_inclusion(self):
     with canned_solr_response_data():
         with self.app.test_request_context():
             resp = solr.query("foo")
             doc = resp.get_doc(0)
             self.assertNotIn('highlights',doc)
             resp = solr.query("foo", highlights=[('abstract',)])
             doc = resp.get_doc(0)
             self.assertIn('highlights',doc)
Exemple #2
0
 def test_error_response_pagination(self):
     """Tests the get functions in case of an error coming from SOLR"""
     error_response = {"responseHeader":{
                         "status":400,
                         "QTime":1,},
                       "error":{
                         "msg":"org.apache.lucene.queryparser.classic.ParseException: undefined field authsorfsd",
                         "code":400}}
      
     with canned_solr_response_data(error_response):
         with self.app.test_request_context():
             resp = solr.query("foo")
              
             self.assertEqual(resp.get_count(), 0)
             self.assertEqual(resp.get_hits(), 0)
             self.assertEqual(resp.get_start_count(), 0)
              
             #pagination dictionar.y
             pag_dict = {
                    'max_pagination_len': 5 ,
                    'num_total_pages': 0,
                    'current_page': 1,
                    'pages_before': [],
                    'pages_after': [],       
             }
             self.assertEqual(resp.get_pagination(), pag_dict)
Exemple #3
0
def alladin_lite():
    """
    View that creates the data for alladin lite
    """
    #if there are not bibcodes, there should be a query to extract the authors
    if request.values.has_key('bibcode'):
        bibcodes = request.values.getlist('bibcode')
    else:
        try:
            query_components = json.loads(request.values.get('current_search_parameters'))
        except (TypeError, JSONDecodeError):
            #@todo: logging of the error
            return render_template('errors/generic_error.html', error_message='Error. Please try later.')

        #update the query parameters to return only what is necessary
        query_components.update({
            'facets': [],
            'fields': ['bibcode'],
            'highlights': [],
            'rows': str(config.SEARCH_DEFAULT_ROWS)
            })

        resp = solr.query(**query_components)

        if resp.is_error():
            return render_template('errors/generic_error.html', error_message='Error while creating the objects skymap. Please try later.')

        bibcodes = [x.bibcode for x in resp.get_docset_objects()]

    return render_template('alladin_lite_embedded.html', bibcodes={'bibcodes':bibcodes})
Exemple #4
0
def get_references(**args):
    """
    Get the references for a set of bibcodes
    """
    papers= []
    # This information can be retrieved with one single Solr query
    # (just an 'OR' query of a list of bibcodes)
    # To restrict the size of the query URL, we split the list of
    # bibcodes up in a list of smaller lists
    biblists = list(chunks(args['bibcodes'], config.METRICS_CHUNK_SIZE))
    for biblist in biblists:
        q = " OR ".join(map(lambda a: "bibcode:%s"%a, biblist))
        try:
            # Get the information from Solr
            # We only need the contents of the 'reference' field (i.e. the list of bibcodes 
            # referenced by the paper at hand)
            resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=['reference'])
        except SolrReferenceQueryError, e:
            app.logger.error("Solr references query for %s blew up (%s)" % (q,e))
            raise
        # Collect all bibcodes in a list (do NOT remove multiplicity)
        search_results = resp.search_response()
        for doc in search_results['results']['docs']:
            if 'reference' in doc:
                papers += doc['reference']
Exemple #5
0
def get_document(identifier, **kwargs):
    q = "identifier:%s" % identifier
    resp = solr.query(q, rows=1, fields=config.SOLR_SEARCH_DEFAULT_FIELDS, **kwargs)
    if resp.get_hits() == 1:
        return resp.get_doc_object(0)
    else:
        return None
Exemple #6
0
def get_publications_from_query(q):
    try:
        # Get the information from Solr
        resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=['reference'])
    except SolrReferenceQueryError, e:
        app.logger.error("Solr publications query for %s blew up (%s)" % (q,e))
        raise
Exemple #7
0
def word_cloud():
    """
    View that creates the data for the word cloud
    """

    query_url = config.SOLRQUERY_URL
    tvrh_query_url = query_url.rsplit('/', 1)[0] + '/tvrh'

    try:
        query_components = json.loads(request.values.get('current_search_parameters'))
    except (TypeError, JSONDecodeError):
        #@todo: logging of the error
        return render_template('errors/generic_error.html', error_message='Error while creating the word cloud (code #1). Please try later.')

    # checked bibcodes will be input as
    if request.values.has_key('bibcode'):
        bibcodes = request.values.getlist('bibcode')
        query_components['q'] = ' OR '.join(["bibcode:%s" % b for b in bibcodes])

    query_components.update({
        'facets': [],
        'fields': ['id'],
        'highlights': [],
        'defType':'aqp', 
        'rows': str(config.WORD_CLOUD_DEFAULT_FIRST_RESULTS),
        'tv.all': 'true', 
        'tv.fl':'abstract', 
    })
        
    resp = solr.query(query_url=tvrh_query_url, **query_components)

    if resp.is_error():
        return render_template('errors/generic_error.html', error_message='Error while creating the word cloud (code #2). Please try later.')
    
    return render_template('word_cloud_embedded.html', wordcloud_data=wc_json(resp.raw_response()))
Exemple #8
0
def author_network():
    """
    View that creates the data for the author network
    """
        
    #if there are not bibcodes, there should be a query to extract the authors
    try:
        query_components = json.loads(request.values.get('current_search_parameters'))
    except (TypeError, JSONDecodeError):
        #@todo: logging of the error
        return render_template('errors/generic_error.html', error_message='Error while creating the author network (code #1). Please try later.')

    # checked bibcodes will be input as
    if request.values.has_key('bibcode'):
        bibcodes = request.values.getlist('bibcode')
        query_components['q'] = ' OR '.join(["bibcode:%s" % b for b in bibcodes])

    #update the query parameters to return only what is necessary
    query_components.update({
        'facets': [], 
        'fields': ['author_norm'], 
        'highlights': [], 
        'rows': str(config.AUTHOR_NETWORK_DEFAULT_FIRST_RESULTS)
        })

    resp = solr.query(**query_components)

    if resp.is_error():
        return render_template('errors/generic_error.html', error_message='Error while creating the author network (code #2). Please try later.')

    #extract the authors
    lists_of_authors = [doc.author_norm for doc in resp.get_docset_objects() if doc.author_norm]
        
    return render_template('author_network_embedded.html', network_data=get_authorsnetwork(lists_of_authors))
Exemple #9
0
def export_to_other_formats():
    """
    view that exports a set of papers
    the imput is a format and 
    a list of bibcodes or a variable containing the parameters for a solr query
    """
    #extract the format
    export_format = request.values.getlist('export_format')
    list_type = request.values.get('list_type')

    #list of bibcodes to extract
    bibcodes_to_export = []
    #flag to check if everything has been extracted
    all_extracted = True
    num_hits = None
    
    #if there are not bibcodes, there should be first a query to extract them  
    if not request.values.has_key('bibcode'):
        #@todo: code to query solr with the same query parameters but override the fields to retrieve
        try:
            query_components = json.loads(request.values.get('current_search_parameters'))
        except (TypeError, JSONDecodeError):
            #@todo: logging of the error
            return render_template('errors/generic_error.html', error_message='Error while exporting records (code #1). Please try later.')
        
        #update the query parameters to return only what is necessary
        query_components.update({'facets':[], 'fields': ['bibcode'], 'highlights':[], 'rows': str(config.EXPORT_DEFAULT_ROWS)})
        #execute the query
        if list_type == 'similar':
            resp = get_document_similar(**query_components)
        else:
            resp = solr.query(**query_components)
        if resp.is_error():
            return render_template('errors/generic_error.html', error_message='Error while exporting records (code #2). Please try later.')
        #extract the bibcodes
        for doc in resp.get_docset_objects():
            bibcodes_to_export.append(doc.bibcode)
        #check if all the results of the query have been extracted ( num results <= max to extract )
        if resp.get_hits() > config.EXPORT_DEFAULT_ROWS:
            all_extracted = False
            num_hits = resp.get_hits()

    else:
        #extract all the bibcodes
        bibcodes_to_export = request.values.getlist('bibcode')
        
    #actually export the records
    if bibcodes_to_export:
        export_str = get_classic_records_export(bibcodes_to_export, export_format)
    else:
        export_str = ''
    
    #if not everything has been extracted, show message on top  
    if not all_extracted:
        export_str = 'Exported first %s results of %s total. \n\n\n%s' % (config.EXPORT_DEFAULT_ROWS, num_hits, export_str)
    else:
        export_str = 'Exported %s records \n\n\n%s' % (len(bibcodes_to_export), export_str)
    
    return Response(export_str, mimetype='text/plain')
Exemple #10
0
 def get_citations(self, **kwargs):
     """
     Returns the list of citations
     """
     q = "citations(%s:%s)" % (config.SOLR_DOCUMENT_ID_FIELD, self.data[config.SOLR_DOCUMENT_ID_FIELD])
     with statsd.timer("core.solr.citations.query_response_time"):
         resp = solr.query(q, **kwargs)
     return resp
Exemple #11
0
def get_document_similar(q, **kwargs):        
    params = dict(config.SOLR_MLT_PARAMS)
    params['mlt.fl'] = ','.join(config.SOLR_MLT_FIELDS)
    params.update(**kwargs)
    # TODO: someday maybe flask-solrquery can be made to know about different endpoints
    query_url = config.SOLRQUERY_URL
    mlt_query_url = query_url.rsplit('/', 1)[0] + '/mlt'
    return solr.query(q, query_url=mlt_query_url, **params)
Exemple #12
0
def get_document(identifier, **kwargs):
    q = "identifier:%s" % identifier
    with statsd.timer("core.solr.document.query_response_time"):
        resp = solr.query(q, rows=1, fields=config.SOLR_SEARCH_DEFAULT_FIELDS, **kwargs)
    if resp.get_hits() == 1:
        return resp.get_doc_object(0)
    else:
        return None
Exemple #13
0
 def test_response_content_with_facets(self):
     with canned_solr_response_data():
         with self.app.test_request_context():
             resp = solr.query("foo", facets=[('year',)])
             resp_data = resp.search_response()
             self.assertIn('facets', resp_data['results'])
             self.assertEqual(resp.get_all_facet_queries(), {'year:[2000 TO 2003]': 13})
             self.assertEqual(resp.get_all_facet_fields(), {'bibstem_facet': ['ApJ', 10, 'ArXiv', 8], 'year': ['2009', 3, '2008', 5]})
Exemple #14
0
def facets():
    """
    returns facet sets for a search query
    """
    form = QueryForm.init_with_defaults(request.values)
    if form.validate():
        query_components = QueryBuilderSearch.build(form, request.values, facets_components=True)
        resp = solr.query(**query_components)
        return render_template('facets_sublevel.html', resp=resp, facet_field_interf_id=query_components['facet_field_interf_id'] )
Exemple #15
0
def solr_search():
    try:
        q = request.args.get('q')
    except:
        abort(400)
    resp = solr.query(q, rows=200, fields=['bibcode','title','score'], 
                      sort=(config.SOLR_SORT_OPTIONS['DATE'], 'desc'),
                      filters=['database:ASTRONOMY'])
    search_results = resp.search_response()
    return render_template('results.html', results=search_results['results'], type='solr', search_url=resp.request.url)
Exemple #16
0
    def test_response_content(self):
        with canned_solr_response_data():
            with self.app.test_request_context():
                resp = solr.query("foo")
                self.assertEqual(resp.get_http_status(), 200)
                self.assertFalse(resp.is_error())

                resp_data = resp.search_response()
                self.assertIn('results', resp_data)
                self.assertNotIn('facets', resp_data['results'])
Exemple #17
0
def facets():
    """
    returns facet sets for a search query
    """
    form = QueryForm.init_with_defaults(request.values)
    if form.validate():
        query_components = QueryBuilderSearch.build(form, request.values, facets_components=True)
        try:
            resp = solr.query(**query_components)
        except Exception, e:
            raise AdsabsSolrqueryException("Error communicating with search service", sys.exc_info())
        return render_template('facets_sublevel.html', resp=resp, facet_field_interf_id=query_components['facet_field_interf_id'] )
Exemple #18
0
    def test_02_solr_request_http_method(self):
        req = SearchRequest("foo")
        prepared = req.prepare("http://example.com/select")
        self.assertEqual(prepared.method, 'GET')
        prepared = req.prepare("http://example.com/select", method='POST')
        self.assertEqual(prepared.method, 'POST')

        with self.app.test_request_context():
            solr.request_http_method = 'POST'
            with fake_solr_http_response():
                resp = solr.query(**{'q': "black holes"})
                self.assertEqual(resp.request.prepared.method, 'POST')
            solr.request_http_method = 'GET'
Exemple #19
0
 def get_toc(self, **kwargs):
     """
     Returns the table of contents
     It queries SOLR for the first 13 characters of the bibcode and "*"
     If the 14th character is a "E" I add also this before the "*"
     """
     bibcode = self.bibcode
     if bibcode[13] == 'E':
         bibquery = bibcode[:14]
     else:
         bibquery = bibcode[:13]
     q = "bibcode:%s*" % bibquery
     return solr.query(q, **kwargs)
def get_article_data(biblist, check_references=True):
    '''
    Get basic article metadata for a list of bibcodes
    '''
    list = " OR ".join(map(lambda a: "bibcode:%s"%a, biblist))
    q = '%s' % list
    fl= ['bibcode','title','first_author','keyword_norm','reference','citation_count','pubdate']
    try:
        # Get the information from Solr
        resp = solr.query(q, sort=[["pubdate", "desc"], ["bibcode", "desc"]], rows=config.BIBUTILS_MAX_HITS, fields=fl)
    except SolrQueryError, e:
        app.logger.error("Solr article data query for %s blew up (%s)" % (str(biblist),e))
        raise
def get_normalized_keywords(bibc):
    '''
    For a given publication, construct a list of normalized keywords of this
    publication and its references
    '''
    keywords = []
    q = 'bibcode:%s or references(bibcode:%s)' % (bibc,bibc)
    try:
        # Get the information from Solr
        resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=['keyword_norm'])
    except SolrQueryError, e:
        app.logger.error("Solr keywords query for %s blew up (%s)" % (bibc,e))
        raise
    def test_02_solr_request_http_method(self):
        req = SearchRequest("foo")
        prepared = req.prepare("http://example.com/select")
        self.assertEqual(prepared.method, 'GET')
        prepared = req.prepare("http://example.com/select", method='POST')
        self.assertEqual(prepared.method, 'POST')

        with self.app.test_request_context():
            solr.request_http_method = 'POST'
            with fake_solr_http_response():
                resp = solr.query(**{'q': "black holes"})
                self.assertEqual(resp.request.prepared.method, 'POST')
            solr.request_http_method = 'GET'
Exemple #23
0
 def test_error_response_parsing_02(self):
     #case of possible error string not containg solr class exception    
     error_response = {"responseHeader":{
                         "status":500,
                         "QTime":1,},
                       "error":{"msg":"random string here",
                         "code":500}}
     with canned_solr_response_data(error_response, 500):
         with self.app.test_request_context('/'):
             resp = solr.query("foo")
      
             self.assertEqual(resp.get_http_status(), 500)
             self.assertTrue(resp.is_error())
             self.assertEqual(resp.get_error(), "random string here")
             self.assertEqual(resp.get_error_message(), "random string here")
Exemple #24
0
 def get_toc(self, **kwargs):
     """
     Returns the table of contents
     It queries SOLR for the first 13 characters of the bibcode and "*"
     If the 14th character is a "E" I add also this before the "*"
     """
     bibcode = self.bibcode
     if bibcode[13] == 'E':
         bibquery = bibcode[:14]
     else:
         bibquery = bibcode[:13]
     q = "bibcode:%s*" % bibquery
     with statsd.timer("core.solr.toc.query_response_time"):
         resp = solr.query(q, **kwargs)
     return resp
Exemple #25
0
def get_references(**args):
    """
    Get the references for a set of bibcodes
    """
    papers= []
    # This information can be retrieved with one single Solr query
    # (just an 'OR' query of a list of bibcodes)
    q = " OR ".join(map(lambda a: "bibcode:%s"%a, args['bibcodes']))
    try:
        # Get the information from Solr
        # We only need the contents of the 'reference' field (i.e. the list of bibcodes 
        # referenced by the paper at hand)
        resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=['reference'])
    except SolrReferenceQueryError, e:
        app.logger.error("Solr references query for %s blew up (%s)" % (q,e))
        raise
Exemple #26
0
def get_meta_data(**args):
    """
    Get the meta data for a set of bibcodes
    """
    data_dict = {}
    # This information can be retrieved with one single Solr query
    # (just an 'OR' query of a list of bibcodes)
    bibcodes = [bibcode for (bibcode,score) in args['results']]
    list = " OR ".join(map(lambda a: "bibcode:%s"%a, bibcodes))
    q = '%s' % list
    try:
        # Get the information from Solr
        resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=['bibcode,title,first_author'])
    except SolrMetaDataQueryError, e:
        app.logger.error("Solr references query for %s blew up (%s)" % (bibcode,e))
        raise
Exemple #27
0
 def test_error_response_parsing_01(self):
     """Tests the get functions in case of an error coming from SOLR"""
     #error coming from query parser on SOLR
     error_response = {"responseHeader":{
                         "status":400,
                         "QTime":1,},
                       "error":{"msg":"org.apache.lucene.queryparser.classic.ParseException: undefined field authsorfsd",
                         "code":400}}
     with canned_solr_response_data(error_response, 400):
         with self.app.test_request_context():
             resp = solr.query("foo")
      
             self.assertEqual(resp.get_http_status(), 400)
             self.assertTrue(resp.is_error())
             self.assertEqual(resp.get_error(), "org.apache.lucene.queryparser.classic.ParseException: undefined field authsorfsd")
             self.assertEqual(resp.get_error_message(), "undefined field authsorfsd")
Exemple #28
0
 def run(self):
     while True:
         bbc = self.task_queue.get()
         if bbc is None:
             break
         Nauths = 1
         try:
             bibcode,Nauths = bbc.split('/')
         except:
             bibcode = bbc
         q = 'citations(bibcode:%s)' % bibcode
         fl= 'bibcode,property,reference'
         try:
             if sys.platform == 'darwin':
                 resp = solr_req(config.SOLR_URL + '/select', q=q, fl=fl, rows=config.BIBUTILS_MAX_HITS)
                 result_field = 'response'
             else:
                 result_field = 'results'
             # do the query and filter out the results without the bibcode field
             # (publications without citations return an empty document)
                 resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=fl.split(','))
                 search_results = resp.search_response()
             # gather citations and put them into the results queue
             citations = []
             cits = []
             ref_cits = []
             non_ref_cits = []
             for doc in search_results[result_field]['docs']:
                 if not 'bibcode' in doc:
                     continue
                 pubyear = int(bibcode[:4])
                 try:
                     Nrefs = len(doc['reference'])
                 except:
                     Nrefs = 0
                 citations.append(doc['bibcode'])
                 cits.append((doc['bibcode'],Nrefs,int(Nauths),pubyear))
                 if 'REFEREED' in doc['property']:
                     ref_cits.append((doc['bibcode'],Nrefs,int(Nauths),pubyear))
                 else:
                     non_ref_cits.append((doc['bibcode'],Nrefs,int(Nauths),pubyear))
             self.result_queue.put({'bibcode':bibcode,'citations':citations,'cit_info':cits,'ref_cit_info':ref_cits,'non_ref_cit_info':non_ref_cits})
         except SolrCitationQueryError, e:
             app.logger.error("Solr citation query for %s blew up (%s)" % (bibcode,e))
             raise
Exemple #29
0
 def run(self):
     while True:
         biblist = self.task_queue.get()
         if biblist is None:
             break
         q = " OR ".join(map(lambda a: "bibcode:%s"%a, biblist))
         fl = 'bibcode,reference,author_norm,property,read_count'
         try:
             result_field = 'results'
             # do the query and filter out the results without the bibcode field
             # (publications without citations return an empty document)
             resp = solr.query(q, rows=config.BIBUTILS_MAX_HITS, fields=fl.split(','))
             search_results = resp.search_response()
             # gather citations and put them into the results queue
             self.result_queue.put(search_results[result_field]['docs'])
         except SolrCitationQueryError, e:
             app.logger.error("Solr data query for %s blew up (%s)" % (q,e))
             raise
Exemple #30
0
def search():
    """
    returns the results of a search
    """
    if not len(request.values):
        form = QueryForm(csrf_enabled=False)
        # prefill the database select menu option
        form.db_f.default = config.SEARCH_DEFAULT_DATABASE
    else:
        form = QueryForm.init_with_defaults(request.values)
        if form.validate():
            query_components = QueryBuilderSearch.build(form, request.values)
            resp = solr.query(**query_components)
            if resp.is_error():
                flash(resp.get_error_message(), 'error')
            return render_template('search_results.html', resp=resp, form=form)
        else:
            for field_name, errors_list in form.errors.iteritems():
                flash('errors in the form validation: %s.' % '; '.join(errors_list), 'error')
    return render_template('search.html', form=form)
Exemple #31
0
def search():
    """
    returns the results of a search
    """
    if not len(request.values):
        form = QueryForm(csrf_enabled=False)
        # prefill the database select menu option
        form.db_f.default = config.SEARCH_DEFAULT_DATABASE
    else:
        form = QueryForm.init_with_defaults(request.values)
        if form.validate():
            query_components = QueryBuilderSearch.build(form, request.values)
            try:
                resp = solr.query(**query_components)
            except Exception, e:
                raise AdsabsSolrqueryException("Error communicating with search service", sys.exc_info())
            if resp.is_error():
                flash(resp.get_error_message(), 'error')
            return render_template('search_results.html', resp=resp, form=form, query_components=query_components)
        else:
    def test_01_reqest_context(self):

        with self.app.test_request_context('/'):
            with fake_solr_http_response():
                resp = solr.query(**{'q': "black holes"})
                self.assertEqual(resp.get_hits(), 13)