def _compare_searches(self, invenio_syntax, spires_syntax): """Determine if two queries parse to the same search command. For comparison of actual search results (regression testing), see the tests in the Inspire module. """ parser = search_engine_query_parser.SearchQueryParenthesisedParser() converter = search_engine_query_parser.SpiresToInvenioSyntaxConverter() parsed_query = parser.parse_query(converter.convert_query(spires_syntax)) #parse_query removes any parens that convert_query added, but then #we have to rejoin the list it returns and create basic searches result_obtained = create_basic_search_units( None, ' '.join(parsed_query).replace('+ ',''), '', None ) # incase the desired result has parens parsed_wanted = parser.parse_query(invenio_syntax) result_wanted = create_basic_search_units( None, ' '.join(parsed_wanted).replace('+ ',''), '', None) assert result_obtained == result_wanted, \ """SPIRES parsed as %s instead of %s""" % \ (repr(result_obtained), repr(result_wanted)) return
def calculate_hosted_collections_search_params(req, pattern_list, field, hosted_collections, verbosity_level=0): """Calculate the searching parameters for the selected hosted collections i.e. the actual hosted search engines and the basic search units""" from invenio.search_engine import create_basic_search_units assert req vprint = get_verbose_print(req, 'Hosted collections (calculate_hosted_collections_search_params): ', verbosity_level) pattern = bind_patterns(pattern_list) vprint(3, 'pattern = ' + pattern) # if for any strange reason there is no pattern, just return # UPDATE : let search go on even there is no pattern (an empty pattern_list and field) #if not pattern: return (None, None) # calculate the basic search units basic_search_units = create_basic_search_units(None, pattern, field) vprint(3, 'basic_search_units = ' + str(basic_search_units)) # calculate the set of hosted search engines hosted_search_engines = select_hosted_search_engines(hosted_collections) vprint(3, 'hosted_search_engines = ' + str(hosted_search_engines)) # no need really to print out a sorted list of the hosted search engines, is there? I'll leave this commented out #hosted_search_engines_list = external_collection_sort_engine_by_name(hosted_search_engines) #vprint(3, 'hosted_search_engines_list (sorted) : ' + str(hosted_search_engines_list)) return (hosted_search_engines, basic_search_units)
def _check(self, p, f, m, result_wanted): "Internal checking function calling create_basic_search_units." result_obtained = search_engine.create_basic_search_units(None, p, f, m) assert result_obtained == result_wanted, \ 'obtained %s instead of %s' % (repr(result_obtained), repr(result_wanted)) return
def print_external_results_overview(req, current_collection, pattern_list, field, external_collection, verbosity_level=0, lang=CFG_SITE_LANG): """Print the external collection overview box. Return the selected external collections and parsed query""" from invenio.search_engine import create_basic_search_units assert req vprint = get_verbose_print(req, 'External collection (print_external_results_overview): ', verbosity_level) pattern = bind_patterns(pattern_list) vprint(3, 'pattern = ' + pattern) if not pattern: return (None, None, None, None) basic_search_units = create_basic_search_units(None, pattern, field) vprint(3, 'basic_search_units = ' + str(basic_search_units)) (search_engines, seealso_engines) = select_external_engines(current_collection, external_collection) vprint(3, 'search_engines = ' + str(search_engines)) vprint(3, 'seealso_engines = ' + str(seealso_engines)) search_engines_list = external_collection_sort_engine_by_name(search_engines) vprint(3, 'search_engines_list (sorted) : ' + str(search_engines_list)) html = template.external_collection_overview(lang, search_engines_list) req.write(html) return (search_engines, seealso_engines, pattern, basic_search_units)
def test_search_Nucl_Phys_B75_1974_461_with_spaces(self): """websearch - search ' Nucl. Phys. B75 (1974) 461 ', with JournalHintService""" user_info = collect_user_info(1) pattern = ' Nucl. Phys. B75 (1974) 461 ' search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assertEqual(response, (0, ''))
def test_search_D_S_Salopek_J_R_Bond_and_J_M_Bardeen_Phys_Rev_D40_1989_1753(self): """websearch - search 'D.S. Salopek, J.R.Bond and J.M.Bardeen,Phys.Rev.D40(1989)1753.', with JournalHintService""" user_info = collect_user_info(1) pattern = 'D.S. Salopek, J.R.Bond and J.M.Bardeen,Phys.Rev.D40(1989)1753.' search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assertEqual(response, (0, ''))
def test_search_restricted_submission(self): """websearch - search for restricted submission, with SubmissionNameSearchService""" user_info = collect_user_info(get_uid_from_email('*****@*****.**')) pattern = 'submit thesis' search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assert_(response[0] >=50) self.assert_('doctype=DEMOTHE' in response[1])
def test_search_restricted_submission_as_guest(self): """websearch - search for restricted submission as guest, with SubmissionNameSearchService""" user_info = collect_user_info(0) pattern = 'submit thesis' search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assertEqual(response, (0, ''))
def test_search_author_Tom(self): """websearch - search for an author using invenio sintax, with JournalHintService""" user_info = collect_user_info(0) pattern = 'author:Tom' search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assertEqual(response, (0, ''))
def test_search_Pais_Valencia_utf8(self): """websearch - search 'País Valencià' utf8, with JournalHintService""" user_info = collect_user_info(0) pattern = u'País Valencià'.encode('utf8') search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assertEqual(response, (0, ''))
def test_search_public_collection_as_guest(self): """websearch - search for public collection as guest, from search page""" user_info = collect_user_info(0) pattern = 'Atlantis Times Arts' search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assert_(response[0] > 50) self.assert_('collection/Atlantis%20Times%20Arts' in response[1])
def test_search_Nucl_Instrum_Methods_Phys_Res_A_445_2000_456_462(self): """webseach - search 'Nucl. Instrum. Methods Phys. Res., A :445 2000 456-462', with JournalHintService""" user_info = collect_user_info(1) pattern = 'Nucl. Instrum. Methods Phys. Res., A :445 2000 456-462' search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assert_(response[0] >=50) self.assert_('Development of photon beam diagnostics for VUV radiation from a SASE FEL' in response[1])
def test_search_public_submission_as_guest(self): """websearch - search for public submission as guest, with SubmissionNameSearchService""" user_info = collect_user_info(0) pattern = 'submit article' search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assert_(response[0] >= 50) self.assert_('doctype=DEMOART' in response[1])
def test_search_empty_string(self): """websearch - search empty string, with JournalHintService""" user_info = collect_user_info(0) pattern = '' search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assertEqual(response, (0, ''))
def test_search_restricted_submission_category(self): """websearch - search for restricted submission, with SubmissionNameSearchService""" user_info = collect_user_info(1) pattern = 'submit news' search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assert_(response[0] >=50) self.assert_('doctype=DEMOJRN' in response[1])
def calculate_external_search_params(pattern_list, field, hosted_colls): """Function that calculates the basic search units given the search pattern. Also returns a set of hosted collections engines.""" from invenio.search_engine import create_basic_search_units from invenio.websearch_external_collections import bind_patterns from invenio.websearch_external_collections import select_hosted_search_engines as select_external_search_engines pattern = bind_patterns(pattern_list) basic_search_units = create_basic_search_units(None, pattern, field) external_search_engines = select_external_search_engines(hosted_colls) return (external_search_engines, basic_search_units)
def test_search_restricted_submission(self): """websearch - search for restricted submission, with SubmissionNameSearchService""" user_info = collect_user_info(get_uid_from_email('*****@*****.**')) pattern = 'submit thesis' search_units = create_basic_search_units(None, pattern, '') response = self.plugin.answer(req=user_info, user_info=user_info, of='hb', cc=CFG_SITE_NAME, colls_to_search='', p=pattern, f='', search_units=search_units, ln='en') self.assert_(response[0] >= 50) self.assert_('doctype=DEMOTHE' in response[1])
def get_fulltext_terms_from_search_pattern(search_pattern): keywords = [] if search_pattern is not None: from invenio.search_engine import create_basic_search_units for unit in create_basic_search_units(None, search_pattern.encode('utf-8'), None): bsu_o, bsu_p, bsu_f, bsu_m = unit[0], unit[1], unit[2], unit[3] if (bsu_o != '-' and bsu_f in [None, 'fulltext']): if bsu_m == 'a' and bsu_p.startswith('%') and bsu_p.endswith( '%'): # remove leading and training `%' representing partial phrase search keywords.append(bsu_p[1:-1]) else: keywords.append(bsu_p) return keywords
def print_external_results_overview(req, current_collection, pattern_list, field, external_collection, verbosity_level=0, lang=CFG_SITE_LANG, print_overview=True): """Print the external collection overview box. Return the selected external collections and parsed query""" from invenio.search_engine import create_basic_search_units assert req vprint = get_verbose_print( req, 'External collection (print_external_results_overview): ', verbosity_level) pattern = bind_patterns(pattern_list) vprint(3, 'pattern = %s' % cgi.escape(pattern)) if not pattern: return (None, None, None, None) basic_search_units = create_basic_search_units(None, pattern, field) vprint(3, 'basic_search_units = %s' % cgi.escape(repr(basic_search_units))) (search_engines, seealso_engines) = select_external_engines(current_collection, external_collection) vprint(3, 'search_engines = ' + str(search_engines)) vprint(3, 'seealso_engines = ' + str(seealso_engines)) search_engines_list = external_collection_sort_engine_by_name( search_engines) vprint(3, 'search_engines_list (sorted) : ' + str(search_engines_list)) if print_overview: html = template.external_collection_overview(lang, search_engines_list) req.write(html) return (search_engines, seealso_engines, pattern, basic_search_units)
def word_similarity_solr(pattern, hitset, params, verbose, explicit_field, ranked_result_amount): """ Ranking a records containing specified words and returns a sorted list. input: hitset - a list of hits for the query found by search_engine verbose - verbose value explicit_field - field to search (selected in GUI) ranked_result_amount - amount of results to be ranked output: recset - a list of sorted records: [[23,34], [344,24], [1,01]] prefix - what to show before the rank value postfix - what to show after the rank value voutput - contains extra information, content dependent on verbose value """ voutput = "" search_units = [] if not len(hitset): return ([], "", "", voutput) if pattern: pattern = " ".join(map(str, pattern)) from invenio.search_engine import create_basic_search_units search_units = create_basic_search_units(None, pattern, explicit_field) else: return (None, "Records not ranked. The query is not detailed enough, or not enough records found, for ranking to be possible.", "", voutput) if verbose > 0: voutput += "Hitset: %s<br/>" % hitset voutput += "Pattern: %s<br/>" % pattern voutput += "Search units: %s<br/>" % search_units query = "" (ranked_result, matched_recs) = (None, None) # Ranks similar records if search_units[0][2] == 'recid': recid = search_units[0][1] if verbose > 0: voutput += "Ranked amount: %s<br/>" % ranked_result_amount try: (ranked_result, matched_recs) = solr_get_similar_ranked(recid, hitset, params, ranked_result_amount) except: register_exception() return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput) # Cutoffs potentially large hitset it = itertools.islice(hitset, params['find_similar_to_recid']['hitset_cutoff']) hitset = intbitset(list(it)) # Regular word similarity ranking else: for (operator, pattern, field, unit_type) in search_units: # Any field if field == '': field = 'global' # Field might not exist elif field not in params["fields"].keys(): field = params["default_field"] if unit_type == "a": # Eliminates leading and trailing % if pattern[0] == "%": pattern = pattern[1:-1] pattern = "\"" + pattern + "\"" weighting = "^" + str(params["fields"][field]["weight"]) if ':' in pattern: pattern = pattern.rsplit(':', 1)[1] query_part = field + ":" + pattern + weighting # Considers boolean operator from the second part on, allows negation from the first part on if query or operator == "-": query += " " + BOOLEAN_EQUIVALENTS[operator] + " " query += query_part + " " if verbose > 0: voutput += "Solr query: %s<br/>" % query try: (ranked_result, matched_recs) = solr_get_ranked(query, hitset, params, ranked_result_amount) except: register_exception() return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput) if verbose > 0: voutput += "All matched records: %s<br/>" % matched_recs # Considers not ranked records not_ranked = hitset.difference(matched_recs) if not_ranked: lrecIDs = list(not_ranked) ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result if verbose > 0: voutput += "Not ranked: %s<br/>" % not_ranked return (ranked_result, params["prefix"], params["postfix"], voutput)
def word_similarity_solr(pattern, hitset, params, verbose, explicit_field, ranked_result_amount): """ Ranking a records containing specified words and returns a sorted list. input: hitset - a list of hits for the query found by search_engine verbose - verbose value explicit_field - field to search (selected in GUI) ranked_result_amount - amount of results to be ranked output: recset - a list of sorted records: [[23,34], [344,24], [1,01]] prefix - what to show before the rank value postfix - what to show after the rank value voutput - contains extra information, content dependent on verbose value """ voutput = "" search_units = [] if not len(hitset): return ([], "", "", voutput) if pattern: pattern = " ".join(map(str, pattern)) from invenio.search_engine import create_basic_search_units search_units = create_basic_search_units(None, pattern, explicit_field) else: return (None, "Records not ranked. The query is not detailed enough, or not enough records found, for ranking to be possible.", "", voutput) if verbose > 0: voutput += "Hitset: %s<br/>" % hitset voutput += "Pattern: %s<br/>" % pattern voutput += "Search units: %s<br/>" % search_units query = "" (ranked_result, matched_recs) = (None, None) # Ranks similar records if search_units[0][2] == 'recid': recid = search_units[0][1] if verbose > 0: voutput += "Ranked amount: %s<br/>" % ranked_result_amount try: (ranked_result, matched_recs) = solr_get_similar_ranked(recid, hitset, params, ranked_result_amount) except: register_exception() return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput) # Cutoffs potentially large hitset it = itertools.islice(hitset, params['find_similar_to_recid']['hitset_cutoff']) hitset = intbitset(list(it)) # Regular word similarity ranking else: for (operator, pattern, field, unit_type) in search_units: # Any field if field == '': field = 'global' # Field might not exist elif field not in params["fields"].keys(): field = params["default_field"] if unit_type == "a": # Eliminates leading and trailing % if pattern[0] == "%": pattern = pattern[1:-1] pattern = "\"" + pattern + "\"" weighting = "^" + str(params["fields"][field]["weight"]) if ':' in pattern: pattern = pattern.rsplit(':', 1)[1] query_part = field + ":" + pattern + weighting # Considers boolean operator from the second part on, allows negation from the first part on if query or operator == "-": query += " " + BOOLEAN_EQUIVALENTS[operator] + " " query += query_part + " " if verbose > 0: voutput += "Solr query: %s<br/>" % query try: (ranked_result, matched_recs) = solr_get_ranked(query, hitset, params, ranked_result_amount) except: register_exception() return (None, "Records not ranked. An error occurred. Please check the query.", "", voutput) if verbose > 0: voutput += "All matched records: %s<br/>" % matched_recs # Considers not ranked records not_ranked = hitset.difference(matched_recs) if not_ranked: lrecIDs = list(not_ranked) ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result if verbose > 0: voutput += "Not ranked: %s<br/>" % not_ranked # Similar-to-recid requires reverse order if search_units[0][2] == 'recid': ranked_result.reverse() return (ranked_result, params["prefix"], params["postfix"], voutput)
def format_template_show_preview_or_save( req, bft, ln=CFG_SITE_LANG, code=None, ln_for_preview=CFG_SITE_LANG, pattern_for_preview="", content_type_for_preview="text/html", save_action=None, navtrail="", ): """ Print the preview of a record with a format template. To be included inside Format template editor. If the save_action has a value, then the code should also be saved at the same time @param req: the request object @param code: the code of a template to use for formatting @param ln: language @param ln_for_preview: the language for the preview (for bfo) @param pattern_for_preview: the search pattern to be used for the preview (for bfo) @param content_type_for_preview: the content-type to use to serve the preview page @param save_action: has a value if the code has to be saved @param bft: the filename of the template to save @param navtrail: navigation trail @return: a web page """ ln = wash_language(ln) _ = gettext_set_language(ln) (auth_code, auth_msg) = check_user(req, "cfgbibformat") if not auth_code: user_info = collect_user_info(req) uid = user_info["uid"] bft = wash_url_argument(bft, "str") if save_action is not None and code is not None: # save bibformatadminlib.update_format_template_code(bft, code=code) bibformat_engine.clear_caches() if code is None: code = bibformat_engine.get_format_template(bft)["code"] ln_for_preview = wash_language(ln_for_preview) pattern_for_preview = wash_url_argument(pattern_for_preview, "str") if pattern_for_preview == "": try: recID = search_pattern(p="-collection:DELETED").pop() except KeyError: return page( title="No Document Found", body="", uid=uid, language=ln_for_preview, navtrail="", lastupdated=__lastupdated__, req=req, navmenuid="search", ) pattern_for_preview = "recid:%s" % recID else: try: recID = search_pattern(p=pattern_for_preview + " -collection:DELETED").pop() except KeyError: return page( title="No Record Found for %s" % pattern_for_preview, body="", uid=uid, language=ln_for_preview, navtrail="", lastupdated=__lastupdated__, req=req, ) units = create_basic_search_units(None, pattern_for_preview, None) keywords = [unit[1] for unit in units if unit[0] != "-"] bfo = bibformat_engine.BibFormatObject( recID=recID, ln=ln_for_preview, search_pattern=keywords, xml_record=None, user_info=user_info ) (body, errors) = bibformat_engine.format_with_format_template(bft, bfo, verbose=7, format_template_code=code) if content_type_for_preview == "text/html": # Standard page display with CDS headers, etc. return page( title="", body=body, uid=uid, language=ln_for_preview, navtrail=navtrail, lastupdated=__lastupdated__, req=req, navmenuid="search", ) else: # Output with chosen content-type. req.content_type = content_type_for_preview req.send_http_header() req.write(body) else: return page_not_authorized(req=req, text=auth_msg)
def format_template_show_preview_or_save(req, bft, ln=CFG_SITE_LANG, code=None, ln_for_preview=CFG_SITE_LANG, pattern_for_preview="", content_type_for_preview='text/html', save_action=None, navtrail=""): """ Print the preview of a record with a format template. To be included inside Format template editor. If the save_action has a value, then the code should also be saved at the same time @param req: the request object @param code: the code of a template to use for formatting @param ln: language @param ln_for_preview: the language for the preview (for bfo) @param pattern_for_preview: the search pattern to be used for the preview (for bfo) @param content_type_for_preview: the content-type to use to serve the preview page @param save_action: has a value if the code has to be saved @param bft: the filename of the template to save @param navtrail: navigation trail @return: a web page """ ln = wash_language(ln) _ = gettext_set_language(ln) (auth_code, auth_msg) = check_user(req, 'cfgbibformat') if not auth_code: user_info = collect_user_info(req) uid = user_info['uid'] bft = wash_url_argument(bft, 'str') if save_action is not None and code is not None: #save bibformatadminlib.update_format_template_code(bft, code=code) bibformat_engine.clear_caches() if code is None: code = bibformat_engine.get_format_template(bft)['code'] ln_for_preview = wash_language(ln_for_preview) pattern_for_preview = wash_url_argument(pattern_for_preview, 'str') if pattern_for_preview == "": try: recID = search_pattern(p='-collection:DELETED').pop() except KeyError: return page(title="No Document Found", body="", uid=uid, language=ln_for_preview, navtrail="", lastupdated=__lastupdated__, req=req, navmenuid='search') pattern_for_preview = "recid:%s" % recID else: try: recID = search_pattern(p=pattern_for_preview + \ ' -collection:DELETED').pop() except KeyError: return page(title="No Record Found for %s" % pattern_for_preview, body="", uid=uid, language=ln_for_preview, navtrail="", lastupdated=__lastupdated__, req=req) units = create_basic_search_units(None, pattern_for_preview, None) keywords = [unit[1] for unit in units if unit[0] != '-'] bfo = bibformat_engine.BibFormatObject(recID=recID, ln=ln_for_preview, search_pattern=keywords, xml_record=None, user_info=user_info) body = format_with_format_template(bft, bfo, verbose=7, format_template_code=code) if content_type_for_preview == 'text/html': #Standard page display with CDS headers, etc. return page(title="", body=body, uid=uid, language=ln_for_preview, navtrail=navtrail, lastupdated=__lastupdated__, req=req, navmenuid='search') else: #Output with chosen content-type. req.content_type = content_type_for_preview req.send_http_header() req.write(body) else: return page_not_authorized(req=req, text=auth_msg)
def word_similarity_xapian(pattern, hitset, params, verbose, field, ranked_result_amount): """ Ranking a records containing specified words and returns a sorted list. input: hitset - a list of hits for the query found by search_engine verbose - verbose value field - field to search (selected in GUI) ranked_result_amount - amount of results to be ranked output: recset - a list of sorted records: [[23,34], [344,24], [1,01]] prefix - what to show before the rank value postfix - what to show after the rank value voutput - contains extra information, content dependent on verbose value """ voutput = "" search_units = [] if pattern: xapian_init_databases() pattern = " ".join(map(str, pattern)) from invenio.search_engine import create_basic_search_units search_units = create_basic_search_units(None, pattern, field) if verbose > 0: voutput += "Hitset: %s<br/>" % hitset voutput += "Pattern: %s<br/>" % pattern voutput += "Search units: %s<br/>" % search_units all_ranked_results = [] included_hits = intbitset() excluded_hits = intbitset() for (operator, pattern, field, unit_type) in search_units: #@UnusedVariable # Field might not exist if field not in params["fields"].keys(): field = params["default_field"] if unit_type == "a": # Eliminates leading and trailing % if pattern[0] == "%": pattern = pattern[1:-1] pattern = "\"" + pattern + "\"" (ranked_result_part, matched_recs) = xapian_get_ranked_index(field, pattern, params["fields"][field], hitset, ranked_result_amount) if verbose > 0: voutput += "Index %s: %s<br/>" % (field, ranked_result_part) voutput += "Index records %s: %s<br/>" % (field, matched_recs) # Excludes - results if operator == "-": excluded_hits = excluded_hits.union(matched_recs) # + and | are interpreted as OR else: included_hits = included_hits.union(matched_recs) all_ranked_results.extend(ranked_result_part) ranked_result = [] if hitset: # Removes the excluded records result_hits = included_hits.difference(excluded_hits) # Avoids duplicate results and normalises scores ranked_result = get_greatest_ranked_records(all_ranked_results) ranked_result = get_normalized_ranking_scores(ranked_result) # Considers not ranked records not_ranked = hitset.difference(result_hits) if not_ranked: lrecIDs = list(not_ranked) ranked_result = zip(lrecIDs, [0] * len(lrecIDs)) + ranked_result if verbose > 0: voutput += "All matched records: %s<br/>" % result_hits voutput += "All ranked records: %s<br/>" % ranked_result voutput += "All not ranked records: %s<br/>" % not_ranked ranked_result.sort(lambda x, y: cmp(x[1], y[1])) return (ranked_result, params["prefix"], params["postfix"], voutput) return (ranked_result, "", "", voutput)