def sort_and_rank_records(recids, so=None, rm=None, p=''): output = recids.tolist() if so: output.reverse() elif rm: from invenio.legacy.bibrank.record_sorter import rank_records ranked = rank_records(rm, 0, output, p.split()) if ranked[0]: output = ranked[0] output.reverse() else: output = output.tolist() else: output.reverse() return output
def find_similar_videos(recid, collection="Videos", threshold=75, maximum=3, shuffle=True): """ Returns a list of similar video records """ similar_records = [] collection_recids = intbitset(perform_request_search(cc=collection)) ranking = rank_records('wrd', 0, collection_recids, ['recid:' + str(recid)]) ## ([6, 7], [81, 100], '(', ')', '') for list_pos, rank in enumerate(ranking[1]): if rank >= threshold: similar_records.append(ranking[0][list_pos]) if shuffle: if maximum > len(similar_records): maximum = len(similar_records) return random.sample(similar_records, maximum) else: return similar_records[:maximum]
def find_similar_videos(recid, collection="Videos", threshold=75, maximum=3, shuffle=True): """ Returns a list of similar video records """ similar_records = [] collection_recids = intbitset(perform_request_search(cc=collection)) ranking = rank_records('wrd', 0, collection_recids, ['recid:' + str(recid)]) ## ([6, 7], [81, 100], '(', ')', '') for list_pos, rank in enumerate(ranking[1]): if rank >= threshold: similar_records.append(ranking[0][list_pos]) if shuffle: if maximum > len(similar_records): maximum = len(similar_records) return random.sample(similar_records, maximum) else: return similar_records[:maximum]
def sort_records_bibsort(recIDs, sort_method, sort_field='', sort_order='d', rg=None, jrec=1, sort_or_rank='s', sorting_methods=SORTING_METHODS): """Order the list based on a sorting method using the BibSortDataCacher.""" sorting_methods = sorting_methods or SORTING_METHODS if not jrec: jrec = 1 # sanity check if sort_method not in sorting_methods: if sort_or_rank == 'r': return rank_records(rank_method_code=sort_method, rank_limit_relevance=0, hitset=recIDs) else: return sort_records_bibxxx(recIDs, None, sort_field, sort_order, '', rg, jrec) # we should return sorted records up to irec_max(exclusive) dummy, irec_max = get_interval_for_records_to_sort(len(recIDs), jrec, rg) solution = intbitset() input_recids = intbitset(recIDs) CACHE_SORTED_DATA[sort_method].recreate_cache_if_needed() sort_cache = CACHE_SORTED_DATA[sort_method].cache bucket_numbers = sort_cache['bucket_data'].keys() # check if all buckets have been constructed if len(bucket_numbers) != cfg['CFG_BIBSORT_BUCKETS']: if sort_or_rank == 'r': return rank_records(rank_method_code=sort_method, rank_limit_relevance=0, hitset=recIDs) else: return sort_records_bibxxx(recIDs, None, sort_field, sort_order, rg=rg, jrec=jrec) if sort_order == 'd': bucket_numbers.reverse() for bucket_no in bucket_numbers: solution.union_update(input_recids & sort_cache['bucket_data'][bucket_no]) if len(solution) >= irec_max: break dict_solution = {} missing_records = intbitset() for recid in solution: try: dict_solution[recid] = sort_cache['data_dict_ordered'][recid] except KeyError: # recid is in buckets, but not in the bsrMETHODDATA, # maybe because the value has been deleted, but the change has not # yet been propagated to the buckets missing_records.add(recid) # check if there are recids that are not in any bucket -> to be added at # the end/top, ordered by insertion date if len(solution) < irec_max: # some records have not been yet inserted in the bibsort structures # or, some records have no value for the sort_method missing_records += input_recids - solution reverse = sort_order == 'd' if sort_method.strip().lower() == cfg['CFG_BIBSORT_DEFAULT_FIELD'] and \ reverse: # If we want to sort the records on their insertion date, add the # missing records at the top. solution = sorted(missing_records, reverse=True) + \ sorted(dict_solution, key=dict_solution.__getitem__, reverse=True) else: solution = sorted(dict_solution, key=dict_solution.__getitem__, reverse=reverse) + sorted(missing_records) # Only keep records, we are going to display solution = slice_records(solution, jrec, rg) if sort_or_rank == 'r': # We need the recids, with their ranking score return solution, [dict_solution.get(record, 0) for record in solution] else: return solution
def sort_records_bibsort(recIDs, sort_method, sort_field='', sort_order='d', rg=None, jrec=1, sort_or_rank='s', sorting_methods=SORTING_METHODS): """Order the list based on a sorting method using the BibSortDataCacher.""" sorting_methods = sorting_methods or SORTING_METHODS if not jrec: jrec = 1 # sanity check if sort_method not in sorting_methods: if sort_or_rank == 'r': return rank_records(rank_method_code=sort_method, rank_limit_relevance=0, hitset=recIDs) else: return sort_records_bibxxx(recIDs, None, sort_field, sort_order, '', rg, jrec) # we should return sorted records up to irec_max(exclusive) dummy, irec_max = get_interval_for_records_to_sort(len(recIDs), jrec, rg) solution = intbitset() input_recids = intbitset(recIDs) CACHE_SORTED_DATA[sort_method].recreate_cache_if_needed() sort_cache = CACHE_SORTED_DATA[sort_method].cache bucket_numbers = sort_cache['bucket_data'].keys() # check if all buckets have been constructed if len(bucket_numbers) != cfg['CFG_BIBSORT_BUCKETS']: if sort_or_rank == 'r': return rank_records(rank_method_code=sort_method, rank_limit_relevance=0, hitset=recIDs) else: return sort_records_bibxxx(recIDs, None, sort_field, sort_order, rg=rg, jrec=jrec) if sort_order == 'd': bucket_numbers.reverse() for bucket_no in bucket_numbers: solution.union_update( input_recids & sort_cache['bucket_data'][bucket_no] ) if len(solution) >= irec_max: break dict_solution = {} missing_records = intbitset() for recid in solution: try: dict_solution[recid] = sort_cache['data_dict_ordered'][recid] except KeyError: # recid is in buckets, but not in the bsrMETHODDATA, # maybe because the value has been deleted, but the change has not # yet been propagated to the buckets missing_records.add(recid) # check if there are recids that are not in any bucket -> to be added at # the end/top, ordered by insertion date if len(solution) < irec_max: # some records have not been yet inserted in the bibsort structures # or, some records have no value for the sort_method missing_records += input_recids - solution reverse = sort_order == 'd' if sort_method.strip().lower() == cfg['CFG_BIBSORT_DEFAULT_FIELD'] and \ reverse: # If we want to sort the records on their insertion date, add the # missing records at the top. solution = sorted(missing_records, reverse=True) + \ sorted(dict_solution, key=dict_solution.__getitem__, reverse=True) else: solution = sorted(dict_solution, key=dict_solution.__getitem__, reverse=reverse) + sorted(missing_records) # Only keep records, we are going to display solution = slice_records(solution, jrec, rg) if sort_or_rank == 'r': # We need the recids, with their ranking score return solution, [dict_solution.get(record, 0) for record in solution] else: return solution