Ejemplo n.º 1
0
 def get_similar(self, **kwargs):
     
     from adsabs.core.solr import get_document_similar
     q = "%s:%s" % (config.SOLR_DOCUMENT_ID_FIELD, self.data[config.SOLR_DOCUMENT_ID_FIELD])
     with statsd.timer("core.solr.similar.query_response_time"):
         resp = get_document_similar(q, **kwargs)
     return resp
Ejemplo n.º 2
0
 def get_citations(self, **kwargs):
     """
     Returns the list of citations
     """
     q = "citations(%s:%s)" % (config.SOLR_DOCUMENT_ID_FIELD, self.data[config.SOLR_DOCUMENT_ID_FIELD])
     with statsd.timer("core.solr.citations.query_response_time"):
         resp = solr.query(q, **kwargs)
     return resp
Ejemplo n.º 3
0
def get_document(identifier, **kwargs):
    q = "identifier:%s" % identifier
    with statsd.timer("core.solr.document.query_response_time"):
        resp = solr.query(q, rows=1, fields=config.SOLR_SEARCH_DEFAULT_FIELDS, **kwargs)
    if resp.get_hits() == 1:
        return resp.get_doc_object(0)
    else:
        return None
Ejemplo n.º 4
0
 def get_toc(self, **kwargs):
     """
     Returns the table of contents
     It queries SOLR for the first 13 characters of the bibcode and "*"
     If the 14th character is a "E" I add also this before the "*"
     """
     bibcode = self.bibcode
     if bibcode[13] == 'E':
         bibquery = bibcode[:14]
     else:
         bibquery = bibcode[:13]
     q = "bibcode:%s*" % bibquery
     with statsd.timer("core.solr.toc.query_response_time"):
         resp = solr.query(q, **kwargs)
     return resp
Ejemplo n.º 5
0
def get_suggestions(**args):

    timer = statsd.timer("bibutils.get_suggestions.generate_time")
    timer.start()
    
    # initializations
    papers = []
    bibcodes = []
    if 'bibcodes' in args:
        bibcodes = args['bibcodes']
    if len(bibcodes) == 0:
        return []
    # Any overrides for default values?
    if 'Nsuggest' in args:
        Nsuggestions = args['Nsuggest']
    else:
        Nsuggestions = config.BIBUTILS_DEFAULT_SUGGESTIONS
    if 'fmt' in args: 
        output_format = args['fmt']
    else:
        output_format = config.BIBUTILS_DEFAULT_FORMAT
    # get rid of potential trailing spaces
    bibcodes = map(lambda a: a.strip(), bibcodes)[:config.BIBUTILS_MAX_INPUT]
    # start processing
    # get the citations for all publications (keeping multiplicity is essential)
    cits = get_citing_papers(bibcodes=bibcodes)
    # clean up cits
    cits = filter(lambda a: len(a) > 0, cits)
    # get references
    refs = get_references(bibcodes=bibcodes)
    # clean up refs
    refs = filter(lambda a: len(a) > 0, refs)
    # removes papers from the original list to get candidates
    papers = filter(lambda a: a not in bibcodes, cits + refs)
    # establish frequencies of papers in results
    paperFreq = [(k,len(list(g))) for k, g in groupby(sorted(papers))]
    # and sort them, most frequent first
    paperFreq = sorted(paperFreq, key=operator.itemgetter(1),reverse=True)
    # remove all papers with frequencies smaller than threshold
    paperFreq = filter(lambda a: a[1] > config.BIBUTILS_THRESHOLD_FREQUENCY and a[1] < len(bibcodes), paperFreq)
    # get metadata for suggestions
    meta_dict = get_meta_data(results=paperFreq[:Nsuggestions])
    timer.stop()
    # return results in required format
    if output_format == 'score':
        return [{'bibcode':x,'score':y, 'title':meta_dict[x]['title'], 'author':meta_dict[x]['author']} for (x,y) in paperFreq[:Nsuggestions] if x in meta_dict.keys()]
    else:
        return [{'bibcode':x,'score':'NA', 'title':meta_dict[x]['title'], 'author':meta_dict[x]['author']} for (x,y) in paperFreq[:Nsuggestions] if x in meta_dict.keys()]
Ejemplo n.º 6
0
def generate_metrics(**args):

    timer = statsd.timer("bibutils.generate_metrics.generate_time")
    timer.start()

    # First we gather the necessary 'attributes' for all publications involved
    # (see above methods for more details)
    attr_list,num_cit,num_cit_ref = get_attributes(args)
    # What types of metrics are we gather (everything by default)
    stats_models = []
    # Determine the output format (really only used to get the 'legacy format')
    format = args.get('fmt','')
    model_types = args.get('types',config.METRICS_DEFAULT_MODELS)
    # Instantiate the metrics classes, defined in the 'models' module
    for model_class in metricsmodels.data_models(models=model_types.split(',')):
        model_class.attributes = attr_list
        model_class.num_citing = num_cit
        model_class.num_citing_ref = num_cit_ref
        model_class.results = {}
        stats_models.append(model_class)
    # The metrics calculations are sent off in parallel
#    rez=Pool(config.METRICS_THREADS).map(generate_data, stats_models)
    po = Pool()
    rez = po.map_async(generate_data, stats_models)
    model_results = rez.get()
    # Now shape the results in the final format
    results = format_results(model_results)
    timer.stop()

    # Send the result back to our caller
    if format == 'legacy':
        return legacy_format(results)
    elif format == 'API':
        for key in results.keys():
            newkey = key.replace(' ','_')
            results[newkey] = results.pop(key)
            if not 'histogram' in key or not 'series' in key:
                for kee in results[newkey].keys():
                    results[newkey][kee.replace(' ','_')] = results[newkey].pop(kee)
        return results
    else:
        return results
Ejemplo n.º 7
0
def search():
    """
    returns the results of a search
    """
    if not len(request.values):
        form = QueryForm(csrf_enabled=False)
        # prefill the database select menu option
        form.db_f.default = config.SEARCH_DEFAULT_DATABASE
    else:
        form = QueryForm.init_with_defaults(request.values)
        if form.validate():
            query_components = QueryBuilderSearch.build(form, request.values)
            bigquery_id = request.values.get('bigquery')
            try:
                
                req = solr.create_request(**query_components)
                url = None
                if bigquery_id:
                    prepare_bigquery_request(req, request.values['bigquery'])
                    url = config.SOLRBIGQUERY_URL
                    
                req = solr.set_defaults(req, query_url=url)
                
                with statsd.timer("search.solr.query_response_time"):
                    resp = solr.get_response(req)
                
                statsd.incr("search.solr.executed")
                if bigquery_id:
                    facets = resp.get_facet_parameters()
                    facets.append(('bigquery', bigquery_id))
                
            except Exception, e:
                statsd.incr("search.solr.failed")
                raise AdsabsSolrqueryException("Error communicating with search service", sys.exc_info())
            if resp.is_error():
                statsd.incr("search.solr.error")
                flash(resp.get_error_message(), 'error')

            return render_template('search_results.html', resp=resp, form=form, 
                                   query_components=query_components, bigquery_id=bigquery_id)
        else:
Ejemplo n.º 8
0
 def set_statsd_context():
     g.statsd_context = "%s.%s" % (request.endpoint, request.method)
     g.total_request_timer = statsd.timer(g.statsd_context + ".response_time")
     g.total_request_timer.start()
Ejemplo n.º 9
0
 def get_coreads(self, **kwargs):
     """returns the results of the 'trending' 2nd order operator"""
     q = "trending(%s:%s)" % (config.SOLR_DOCUMENT_ID_FIELD, self.data[config.SOLR_DOCUMENT_ID_FIELD])
     with statsd.timer("core.solr.coreads.query_response_time"):
         resp = solr.query(q, **kwargs)
     return resp