def api_search(search_form, target_file=None, extra_parameters=False, merging_strategy='merge_optimized'): if search_form.cleaned_data['query'] == None and search_form.cleaned_data['filter'] == None and not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file: # No input data for search, return empty results return [], 0, None, None, None, None, None if search_form.cleaned_data['query'] == None and search_form.cleaned_data['filter'] == None: # Standard content-based search try: results, count, note = similarity_api_search(target=search_form.cleaned_data['target'], filter=search_form.cleaned_data['descriptors_filter'], num_results=search_form.cleaned_data['page_size'], offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'], target_file=target_file) gaia_ids = [result[0] for result in results] distance_to_target_data = None if search_form.cleaned_data['target'] or target_file: # Save sound distance to target into view class so it can be accessed by the serializer # We only do that when a target is specified (otherwise there is no meaningful distance value) distance_to_target_data = dict(results) gaia_count = count return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None except SimilarityException, e: if e.status_code == 500: raise ServerErrorException(msg=e.message) elif e.status_code == 400: raise BadRequestException(msg=e.message) elif e.status_code == 404: raise NotFoundException(msg=e.message) else: raise ServerErrorException(msg='Similarity server error: %s' % e.message) except Exception, e: raise ServerErrorException(msg='The similarity server could not be reached or some unexpected error occurred.')
def get_gaia_results(search_form, target_file, page_size, max_pages, start_page=1, valid_ids=None, offset=None): gaia_ids = list() gaia_count = None distance_to_target_data = dict() note = None try: current_page = start_page n_page_requests = 1 # Iterate over gaia result pages while (len(gaia_ids) < gaia_count or gaia_count == None) and n_page_requests <= max_pages: if not offset: offset = (current_page - 1) * page_size results, count, note = similarity_api_search( target=search_form.cleaned_data['target'], filter=search_form.cleaned_data['descriptors_filter'], num_results=page_size, offset=offset, target_file=target_file, in_ids=valid_ids) gaia_ids += [id[0] for id in results] gaia_count = count if search_form.cleaned_data['target'] or target_file: # Save sound distance to target into so it can be later used in the view class and added to results distance_to_target_data.update(dict(results)) #print 'Gaia page %i (total %i sounds)' % (current_page, gaia_count) current_page += 1 n_page_requests += 1 except SimilarityException, e: if e.status_code == 500: raise ServerErrorException(msg=e.message) elif e.status_code == 400: raise BadRequestException(msg=e.message) elif e.status_code == 404: raise NotFoundException(msg=e.message) else: raise ServerErrorException(msg='Similarity server error: %s' % e.message)
def api_search(search_form, target_file=None, extra_parameters=False, merging_strategy='merge_optimized'): if search_form.cleaned_data['query'] == None and search_form.cleaned_data[ 'filter'] == None and not search_form.cleaned_data[ 'descriptors_filter'] and not search_form.cleaned_data[ 'target'] and not target_file: # No input data for search, return empty results return [], 0, None, None, None, None, None if search_form.cleaned_data['query'] == None and search_form.cleaned_data[ 'filter'] == None: # Standard content-based search try: results, count, note = similarity_api_search( target=search_form.cleaned_data['target'], filter=search_form.cleaned_data['descriptors_filter'], num_results=search_form.cleaned_data['page_size'], offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'], target_file=target_file) gaia_ids = [result[0] for result in results] distance_to_target_data = None if search_form.cleaned_data['target'] or target_file: # Save sound distance to target into view class so it can be accessed by the serializer # We only do that when a target is specified (otherwise there is no meaningful distance value) distance_to_target_data = dict(results) gaia_count = count return gaia_ids, gaia_count, distance_to_target_data, None, note, None, None except SimilarityException, e: if e.status_code == 500: raise ServerErrorException(msg=e.message) elif e.status_code == 400: raise BadRequestException(msg=e.message) elif e.status_code == 404: raise NotFoundException(msg=e.message) else: raise ServerErrorException(msg='Similarity server error: %s' % e.message) except Exception, e: raise ServerErrorException( msg= 'The similarity server could not be reached or some unexpected error occurred.' )
def get_gaia_results(search_form, target_file, page_size, max_pages, start_page=1, valid_ids=None, offset=None): gaia_ids = list() gaia_count = None distance_to_target_data = dict() note = None try: current_page = start_page n_page_requests = 1 # Iterate over gaia result pages while (len(gaia_ids) < gaia_count or gaia_count == None) and n_page_requests <= max_pages: if not offset: offset = (current_page - 1) * page_size results, count, note = similarity_api_search( target=search_form.cleaned_data["target"], filter=search_form.cleaned_data["descriptors_filter"], num_results=page_size, offset=offset, target_file=target_file, in_ids=valid_ids, ) gaia_ids += [id[0] for id in results] gaia_count = count if search_form.cleaned_data["target"] or target_file: # Save sound distance to target into so it can be later used in the view class and added to results distance_to_target_data.update(dict(results)) # print 'Gaia page %i (total %i sounds)' % (current_page, gaia_count) current_page += 1 n_page_requests += 1 except SimilarityException, e: if e.status_code == 500: raise ServerErrorException(msg=e.message) elif e.status_code == 400: raise BadRequestException(msg=e.message) elif e.status_code == 404: raise NotFoundException(msg=e.message) else: raise ServerErrorException(msg="Similarity server error: %s" % e.message)
def api_search(search_form, target_file=None): distance_to_target_data = None if not search_form.cleaned_data['query'] and not search_form.cleaned_data['filter'] and not search_form.cleaned_data['descriptors_filter'] and not search_form.cleaned_data['target'] and not target_file: # No input data for search, return empty results return [], 0, None, None, None if not search_form.cleaned_data['query'] and not search_form.cleaned_data['filter']: # Standard content-based search try: results, count, note = similarity_api_search(target=search_form.cleaned_data['target'], filter=search_form.cleaned_data['descriptors_filter'], num_results=search_form.cleaned_data['page_size'], offset=(search_form.cleaned_data['page'] - 1) * search_form.cleaned_data['page_size'], target_file=target_file) gaia_ids = [result[0] for result in results] distance_to_target_data = None if search_form.cleaned_data['target'] or target_file: # Save sound distance to target into view class so it can be accessed by the serializer # We only do that when a target is specified (otherwise there is no meaningful distance value) distance_to_target_data = dict(results) gaia_count = count return gaia_ids, gaia_count, distance_to_target_data, None, note except SimilarityException, e: if e.status_code == 500: raise ServerErrorException(msg=e.message) elif e.status_code == 400: raise InvalidUrlException(msg=e.message) elif e.status_code == 404: raise NotFoundException(msg=e.message) else: raise ServerErrorException(msg=e.message) except Exception, e: if settings.DEBUG: raise ServerErrorException(msg=e.message) else: raise ServerErrorException()
def api_search(search_form, target_file=None, max_repeat=False, max_solr_filter_ids=False): MERGE_STRATEGY = "filter_solr_results_repeat" MAX_SOLR_FILTER_IDS = 350 if max_solr_filter_ids: MAX_SOLR_FILTER_IDS = min(int(max_solr_filter_ids), MAX_SOLR_FILTER_IDS * 2) MAX_REPEAT = 7 if max_repeat: MAX_REPEAT = min(int(max_repeat), MAX_REPEAT * 2) """ In combined search queries we need to merge solr and gaia results. MERGE_STRATEGY determines which strategy we follow to approach this: - 'merge_all': merge all strategy will get all results from solr and all results from gaia and then combine the ids in a unique list. The advantage of this strategy is that it returns the exact total number of matches for the query. The disadvantage is that depending on the query it can become really slow, and sometimes throwing timeouts. - 'filter_solr_results': in this strategy we first get gaia results and then perform a solr query restricted to the results returned by gaia. Given that filtering in solr results must be done using OR clauses in a filter id field, we can not pass a very big number of ids as the performance is severely affected. The standard limit of OR clauses in a solr query is 1024 (parameter <maxBooleanClauses>1024</maxBooleanClauses> in solrconfig.xml). Therefore, the query can return a maximum of 1024 results. We actually set this parameter using MAX_SOLR_FILTER_IDS, so we can control the performance. This strategy is faster than 'merge_all' and the response time is under control, but we can not get all possible query matches. - 'filter_solr_results_repeat': is like the previous strategy but repeating the whole process MAX_REPEAT times so that we increase the probability of obtaining matches. """ distance_to_target_data = None if ( not search_form.cleaned_data["query"] and not search_form.cleaned_data["filter"] and not search_form.cleaned_data["descriptors_filter"] and not search_form.cleaned_data["target"] and not target_file ): # No input data for search, return empty results return [], 0, None, None, None if not search_form.cleaned_data["query"] and not search_form.cleaned_data["filter"]: # Standard content-based search try: results, count, note = similarity_api_search( target=search_form.cleaned_data["target"], filter=search_form.cleaned_data["descriptors_filter"], num_results=search_form.cleaned_data["page_size"], offset=(search_form.cleaned_data["page"] - 1) * search_form.cleaned_data["page_size"], target_file=target_file, ) gaia_ids = [result[0] for result in results] distance_to_target_data = None if search_form.cleaned_data["target"] or target_file: # Save sound distance to target into view class so it can be accessed by the serializer # We only do that when a target is specified (otherwise there is no meaningful distance value) distance_to_target_data = dict(results) gaia_count = count return gaia_ids, gaia_count, distance_to_target_data, None, note except SimilarityException, e: if e.status_code == 500: raise ServerErrorException(msg=e.message) elif e.status_code == 400: raise BadRequestException(msg=e.message) elif e.status_code == 404: raise NotFoundException(msg=e.message) else: raise ServerErrorException(msg="Similarity server error: %s" % e.message) except Exception, e: raise ServerErrorException( msg="The similarity server could not be reached or some unexpected error occurred." )
else: # Combined search (there is at least one of query/filter and one of descriptors_filter/target) # Get gaia results try: max_gaia_results = 99999999 if MERGE_STRATEGY == "filter_solr_results": # If using 'filter_solr_results' strategy there is no need to get all gaia results as we will only # be able to use MAX_SOLR_FILTER_IDS when filtering in solr max_gaia_results = MAX_SOLR_FILTER_IDS elif MERGE_STRATEGY == "filter_solr_results_repeat": max_gaia_results = MAX_SOLR_FILTER_IDS * MAX_REPEAT results, count, note = similarity_api_search( target=search_form.cleaned_data["target"], filter=search_form.cleaned_data["descriptors_filter"], num_results=max_gaia_results, offset=0, target_file=target_file, ) gaia_ids = [id[0] for id in results] distance_to_target_data = None if search_form.cleaned_data["target"] or target_file: # Save sound distance to target into view class so it can be accessed by the serializer # We only do that when a target is specified (otherwise there is no meaningful distance value) distance_to_target_data = dict(results) except SimilarityException, e: if e.status_code == 500: raise ServerErrorException(msg=e.message) elif e.status_code == 400: raise BadRequestException(msg=e.message)
# If grouping option is on, store grouping info in a dictionary that we can add when serializing sounds more_from_pack_data.update(dict([(int(element['id']), [element['more_from_pack'], element['pack_id'], element['pack_name'], element['other_ids']]) for element in result.docs])) current_page += 1 except SolrException, e: raise InvalidUrlException(msg='Solr exception: %s' % e.message) except Exception, e: if settings.DEBUG: raise ServerErrorException(msg=e.message) else: raise ServerErrorException() # Get gaia results try: results, count, note = similarity_api_search(target=search_form.cleaned_data['target'], filter=search_form.cleaned_data['descriptors_filter'], num_results=99999999, # Return all sounds in one page offset=0, target_file=target_file) gaia_ids = [id[0] for id in results] distance_to_target_data = None if search_form.cleaned_data['target'] or target_file: # Save sound distance to target into view class so it can be accessed by the serializer # We only do that when a target is specified (otherwise there is no meaningful distance value) distance_to_target_data = dict(results) if search_form.cleaned_data['group_by_pack']: # If results were grouped by pack, we need to update the counts of the 'more_from_pack' property, as they do not # consider the gaia search result and will not be accurate. keys_to_remove = [] for key, value in more_from_pack_data.items(): ids_from_pack_in_gaia_results = list(set(more_from_pack_data[key][3]).intersection(gaia_ids))