def get_search_query(): query = request.args.get("query", None) is_oa = request.args.get("is_oa", None) if is_oa is not None: try: is_oa = str_to_bool(is_oa) except ValueError: if is_oa == 'null': is_oa = None else: abort_json(400, "is_oa must be 'true' or 'false'") if not query: abort_json(400, "query parameter is required") start_time = time() response = fulltext_search_title(query, is_oa) sorted_response = sorted(response, key=lambda k: k['score'], reverse=True) for api_response in sorted_response: doi = api_response['response']['doi'] version_suffix = re.findall(ur'[./](v\d+)$', doi, re.IGNORECASE) if version_suffix: title = api_response['response']['title'] title = u'{} ({})'.format(title, version_suffix[0].upper()) api_response['response']['title'] = title elapsed_time = elapsed(start_time, 3) return jsonify({"results": sorted_response, "elapsed_seconds": elapsed_time})
def get_search_query(query): start_time = time() my_pubs = fulltext_search_title(query) response = [my_pub.to_dict_search() for my_pub in my_pubs] sorted_response = sorted(response, key=lambda k: k['score'], reverse=True) elapsed_time = elapsed(start_time, 3) return jsonify({"results": sorted_response, "elapsed_seconds": elapsed_time})
def get_search_query(query): start_time = time() my_pubs = fulltext_search_title(query) response = [my_pub.to_dict_search() for my_pub in my_pubs] sorted_response = sorted(response, key=lambda k: k['score'], reverse=True) elapsed_time = elapsed(start_time, 3) return jsonify({ "results": sorted_response, "elapsed_seconds": elapsed_time })
def get_search_query(query): start_time = time() my_pubs = fulltext_search_title(query) print "building response" response = [my_pub.to_dict_serp() for my_pub in my_pubs] sorted_response = sorted(response, key=lambda k: k['score'], reverse=True) print "done building response" print "getting synonyms" synonym = get_synonym(query) print "done getting synonyms" print "getting terms" term_lookup = get_term_lookup(query) if synonym and not term_lookup: term_lookup = get_term_lookup(synonym) print "done getting terms" elapsed_time = elapsed(start_time, 3) return jsonify({ "results": sorted_response, "synonym": synonym, "term_lookup": term_lookup, "elapsed_seconds": elapsed_time })
def get_search_query(query): start_time = time() query = query.replace(u"_", u" ") if request.headers.getlist("X-Forwarded-For"): ip = request.headers.getlist("X-Forwarded-For")[0] else: ip = request.remote_addr if not request.args.get("automated", None): log_query(query, ip) no_live_calls = request.args.get("no-live-calls", "") nocache = request.args.get("nocache", "true") return_full_api_response = True if request.args.get("minimum", ""): return_full_api_response = False query_entities = get_entities_from_query(query) print "query_entities", query_entities getting_entity_lookup_elapsed = elapsed(start_time, 3) # page starts at 1 not 0 page = 1 try: page = int(request.args.get("page")) except: pass if page > 10: abort_json( 400, u"Page too large. API currently only supports 10 pages right now.") if request.args.get("pagesize"): pagesize = int(request.args.get("pagesize")) else: pagesize = 10 if pagesize > 100: abort_json(400, u"pagesize too large; max 100") try: oa_only = str_to_bool(request.args.get("oa", "false")) except: oa_only = False if nocache: print u"skipping cache" else: if query_entities and len(query_entities) == 1 and page == 1: cached_response = get_cached_api_response(query_entities[0], oa_only) if cached_response and cached_response[0]: (api_response, collected_date) = cached_response total_time = elapsed(start_time, 3) api_response["_cached_on"] = collected_date.isoformat() api_response["_timing"] = {"total": total_time} print "got response!!!" return jsonify(api_response) (pubs_to_sort, time_to_pmids_elapsed, time_for_pubs_elapsed) = fulltext_search_title( query, query_entities, oa_only, full=return_full_api_response) initializing_publist_start_time = time() # sorted_pubs = sorted(pubs_to_sort, key=lambda k: k.adjusted_score, reverse=True) # selected_pubs = sorted_pubs[(pagesize * (page-1)):(pagesize * page)] # selected_pmids = [p.pmid for p in selected_pubs] sorted_pubs = sorted(pubs_to_sort, key=lambda k: k["adjusted_score"], reverse=True) sorted_pubs = [p for p in sorted_pubs] selected_pubs = sorted_pubs[(pagesize * (page - 1)):(pagesize * page)] selected_dois = [p["doi"] for p in selected_pubs] print selected_dois selected_pubs_full = [] if selected_dois: selected_pubs_full += db.session.query(PubDoi).filter( PubDoi.doi.in_(selected_dois)).options( orm.undefer_group('full')).all() selected_pubs_full = [p for p in selected_pubs_full if not p.suppress] # get rid of retracted ones for my_pub in selected_pubs_full: my_pub.adjusted_score = [ p["adjusted_score"] for p in sorted_pubs if p["doi"] == my_pub.display_doi ][0] my_pub_list = PubList(pubs=selected_pubs_full) initializing_publist_elapsed = elapsed(initializing_publist_start_time, 3) set_dandelions_start_time = time() if not no_live_calls: my_pub_list.set_dandelions() set_dandelions_elapsed = elapsed(set_dandelions_start_time) set_pictures_start_time = time() my_pub_list.set_pictures() set_pictures_elapsed = elapsed(set_pictures_start_time) to_dict_start_time = time() results = my_pub_list.to_dict_serp_list(full=return_full_api_response) response = { "results": results, "page": page, "oa_only": oa_only, "total_num_pubs": min(100, len(pubs_to_sort)), "query_entities": query_entities } if return_full_api_response: response["annotations"] = my_pub_list.to_dict_annotation_metadata() to_dict_elapsed = elapsed(to_dict_start_time, 3) total_time = elapsed(start_time, 3) response["_timing"] = { "9 total": total_time, "1 getting_entity_lookup_elapsed": getting_entity_lookup_elapsed, "2 identify_pmids_for_top_100": time_to_pmids_elapsed, "3 loading_top_100_data_for_sorting": time_for_pubs_elapsed, "4 loading_final_10_full_pubs": initializing_publist_elapsed, "5 set_dandelions_elapsed": set_dandelions_elapsed, "6 set_pictures_elapsed": set_pictures_elapsed, "7 to_dict_elapsed": to_dict_elapsed, } print u"finished query for {}: took {} seconds".format( query, elapsed(start_time)) return jsonify(response)